[project @ 2006-01-06 16:30:17 by simonmar]

author simonmar <unknown>

Fri, 6 Jan 2006 16:30:19 +0000 (16:30 +0000)

committer simonmar <unknown>

Fri, 6 Jan 2006 16:30:19 +0000 (16:30 +0000)
author simonmar <unknown>
Fri, 6 Jan 2006 16:30:19 +0000 (16:30 +0000)
committer simonmar <unknown>
Fri, 6 Jan 2006 16:30:19 +0000 (16:30 +0000)
diff --git a/ghc/compiler/HsVersions.h b/ghc/compiler/HsVersions.h

index 1c07d2c..dd80922 100644 (file)
--- a/ghc/compiler/HsVersions.h
+++ b/ghc/compiler/HsVersions.h
@@ -90,11 +90,19 @@ name = Util.global (value) :: IORef (ty); \
  -- when compiling FastString itself
  #ifndef COMPILING_FAST_STRING
  -- 
-import qualified FastString 
+import qualified FastString as FS 
  #endif
  
-#define SLIT(x)         (FastString.mkLitString# (x#))
-#define FSLIT(x) (FastString.mkFastString# (x#))
+#define SLIT(x)         (FS.mkLitString# (x#))
+#define FSLIT(x) (FS.mkFastString# (x#))
+
+-- Useful for declaring arguments to be strict
+#define STRICT1(f) f a b c | a `seq` False = undefined
+#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
+#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
+#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
+#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
+#define STRICT6(f) f a b c d e f | a `seq` b `seq` c `seq` d `seq` e `seq` f `seq` False = undefined
  
  #endif /* HsVersions.h */
  
diff --git a/ghc/compiler/Makefile b/ghc/compiler/Makefile

index 4920e16..2b5252a 100644 (file)
--- a/ghc/compiler/Makefile
+++ b/ghc/compiler/Makefile
@@ -408,7 +408,7 @@ SRC_HC_OPTS += -DGHCI -package template-haskell
  PKG_DEPENDS += template-haskell
  
  # Use threaded RTS with GHCi, so threads don't get blocked at the prompt.
-SRC_HC_OPTS += -threaded
+# SRC_HC_OPTS += -threaded
  
  ALL_DIRS += ghci
  
diff --git a/ghc/compiler/basicTypes/Id.lhs b/ghc/compiler/basicTypes/Id.lhs

index 0d15b20..10d5a28 100644 (file)
--- a/ghc/compiler/basicTypes/Id.lhs
+++ b/ghc/compiler/basicTypes/Id.lhs
@@ -9,7 +9,7 @@ module Id (
  
         -- Simple construction
         mkGlobalId, mkLocalId, mkLocalIdWithInfo, 
-       mkSysLocal, mkSysLocalUnencoded, mkUserLocal, mkVanillaGlobal,
+       mkSysLocal, mkUserLocal, mkVanillaGlobal,
         mkTemplateLocals, mkTemplateLocalsNum, mkWildId, mkTemplateLocal,
         mkWorkerId, mkExportedLocalId,
  
@@ -105,15 +105,15 @@ import qualified Demand   ( Demand )
  import DataCon         ( DataCon, isUnboxedTupleCon )
  import NewDemand       ( Demand, StrictSig, topDmd, topSig, isBottomingSig )
  import Name            ( Name, OccName, nameIsLocalOrFrom, 
-                         mkSystemVarName, mkSystemVarNameEncoded, mkInternalName,
-                         getOccName, getSrcLoc
-                       ) 
+                         mkSystemVarName, mkInternalName, getOccName,
+                         getSrcLoc ) 
  import Module          ( Module )
-import OccName         ( EncodedFS, mkWorkerOcc )
+import OccName         ( mkWorkerOcc )
  import Maybes          ( orElse )
  import SrcLoc          ( SrcLoc )
  import Outputable
  import Unique          ( Unique, mkBuiltinUnique )
+import FastString      ( FastString )
  import StaticFlags     ( opt_NoStateHack )
  
  -- infixl so you can say (id `set` a `set` b)
@@ -162,15 +162,10 @@ mkLocalId name ty = mkLocalIdWithInfo name ty vanillaIdInfo
  -- SysLocal: for an Id being created by the compiler out of thin air...
  -- UserLocal: an Id with a name the user might recognize...
  mkUserLocal :: OccName -> Unique -> Type -> SrcLoc -> Id
-mkSysLocal  :: EncodedFS  -> Unique -> Type -> Id
+mkSysLocal  :: FastString  -> Unique -> Type -> Id
  mkVanillaGlobal :: Name -> Type -> IdInfo -> Id
  
--- for SysLocal, we assume the base name is already encoded, to avoid
--- re-encoding the same string over and over again.
-mkSysLocal fs uniq ty = mkLocalId (mkSystemVarNameEncoded uniq fs) ty
-
--- version to use when the faststring needs to be encoded
-mkSysLocalUnencoded fs uniq ty = mkLocalId (mkSystemVarName uniq fs)  ty
+mkSysLocal fs uniq ty = mkLocalId (mkSystemVarName uniq fs) ty
  
  mkUserLocal occ uniq ty loc = mkLocalId (mkInternalName    uniq occ loc) ty
  mkVanillaGlobal            = mkGlobalId VanillaGlobal
diff --git a/ghc/compiler/basicTypes/Literal.lhs b/ghc/compiler/basicTypes/Literal.lhs

index 9aab6ee..e83ea9d 100644 (file)
--- a/ghc/compiler/basicTypes/Literal.lhs
+++ b/ghc/compiler/basicTypes/Literal.lhs
@@ -35,7 +35,6 @@ import FastTypes
  import FastString
  import Binary
  
-import UnicodeUtil     ( stringToUtf8 )
  import Ratio           ( numerator )
  import FastString      ( uniqueOfFS, lengthFS )
  import DATA_INT                ( Int8,  Int16,  Int32 )
@@ -95,7 +94,11 @@ data Literal
    =    ------------------
         -- First the primitive guys
      MachChar   Char             -- Char#        At least 31 bits
-  | MachStr    FastString
+
+  | MachStr    FastString      -- A string-literal: stored and emitted
+                               -- UTF-8 encoded, we'll arrange to decode it
+                               -- at runtime.  Also emitted with a '\0'
+                               -- terminator.
  
    | MachNullAddr                -- the NULL pointer, the only pointer value
                                  -- that can be represented as a Literal.
@@ -206,7 +209,7 @@ mkMachInt64  x = MachInt64 x
  mkMachWord64 x = MachWord64 x
  
  mkStringLit :: String -> Literal
-mkStringLit s = MachStr (mkFastString (stringToUtf8 s))
+mkStringLit s = MachStr (mkFastString s) -- stored UTF-8 encoded
  
  inIntRange, inWordRange :: Integer -> Bool
  inIntRange  x = x >= tARGET_MIN_INT && x <= tARGET_MAX_INT
diff --git a/ghc/compiler/basicTypes/MkId.lhs b/ghc/compiler/basicTypes/MkId.lhs

index 02d2559..9d93a67 100644 (file)
--- a/ghc/compiler/basicTypes/MkId.lhs
+++ b/ghc/compiler/basicTypes/MkId.lhs
@@ -59,7 +59,7 @@ import Class          ( Class, classTyCon, classSelIds )
  import Var             ( Id, TyVar, Var )
  import VarSet          ( isEmptyVarSet, subVarSet, varSetElems )
  import Name            ( mkFCallName, mkWiredInName, Name, BuiltInSyntax(..) )
-import OccName         ( mkOccFS, varName )
+import OccName         ( mkOccNameFS, varName )
  import PrimOp          ( PrimOp, primOpSig, primOpOcc, primOpTag )
  import ForeignCall     ( ForeignCall )
  import DataCon         ( DataCon, DataConIds(..), dataConTyVars,
@@ -847,7 +847,7 @@ another gun with which to shoot yourself in the foot.
  
  \begin{code}
  mkWiredInIdName mod fs uniq id
- = mkWiredInName mod (mkOccFS varName fs) uniq Nothing (AnId id) UserSyntax
+ = mkWiredInName mod (mkOccNameFS varName fs) uniq Nothing (AnId id) UserSyntax
  
  unsafeCoerceName = mkWiredInIdName gHC_PRIM FSLIT("unsafeCoerce#") unsafeCoerceIdKey  unsafeCoerceId
  nullAddrName     = mkWiredInIdName gHC_PRIM FSLIT("nullAddr#")    nullAddrIdKey      nullAddrId
diff --git a/ghc/compiler/basicTypes/Module.lhs b/ghc/compiler/basicTypes/Module.lhs

index 9145b35..f4e413d 100644 (file)
--- a/ghc/compiler/basicTypes/Module.lhs
+++ b/ghc/compiler/basicTypes/Module.lhs
@@ -4,7 +4,7 @@
  
  Module
  ~~~~~~~~~~
-Simply the name of a module, represented as a Z-encoded FastString.
+Simply the name of a module, represented as a FastString.
  These are Uniquable, hence we can build FiniteMaps with ModuleNames as
  the keys.
  
@@ -17,13 +17,11 @@ module Module
      , ModLocation(..)
      , addBootSuffix, addBootSuffix_maybe, addBootSuffixLocn
  
-    , moduleString             -- :: ModuleName -> EncodedString
-    , moduleUserString         -- :: ModuleName -> UserString
-    , moduleFS                 -- :: ModuleName -> EncodedFS
+    , moduleString             -- :: ModuleName -> String
+    , moduleFS                 -- :: ModuleName -> FastString
  
-    , mkModule                 -- :: UserString -> ModuleName
-    , mkModuleFS               -- :: UserFS    -> ModuleName
-    , mkSysModuleFS            -- :: EncodedFS -> ModuleName
+    , mkModule                 -- :: String -> ModuleName
+    , mkModuleFS               -- :: FastString -> ModuleName
   
      , ModuleEnv
      , elemModuleEnv, extendModuleEnv, extendModuleEnvList, plusModuleEnv_C
@@ -108,7 +106,7 @@ addBootSuffixLocn locn
  %************************************************************************
  
  \begin{code}
-newtype Module = Module EncodedFS
+newtype Module = Module FastString
         -- Haskell module names can include the quote character ',
         -- so the module names have the z-encoding applied to them
  
@@ -131,30 +129,26 @@ instance Ord Module where
  instance Outputable Module where
    ppr = pprModule
  
-
  pprModule :: Module -> SDoc
-pprModule (Module nm) = pprEncodedFS nm
+pprModule (Module nm) = 
+    getPprStyle $ \ sty ->
+    if codeStyle sty 
+       then ftext (zEncodeFS nm)
+       else ftext nm
  
-moduleFS :: Module -> EncodedFS
+moduleFS :: Module -> FastString
  moduleFS (Module mod) = mod
  
-moduleString :: Module -> EncodedString
+moduleString :: Module -> String
  moduleString (Module mod) = unpackFS mod
  
-moduleUserString :: Module -> UserString
-moduleUserString (Module mod) = decode (unpackFS mod)
-
  -- used to be called mkSrcModule
-mkModule :: UserString -> Module
-mkModule s = Module (mkFastString (encode s))
+mkModule :: String -> Module
+mkModule s = Module (mkFastString s)
  
  -- used to be called mkSrcModuleFS
-mkModuleFS :: UserFS -> Module
-mkModuleFS s = Module (encodeFS s)
-
--- used to be called mkSysModuleFS
-mkSysModuleFS :: EncodedFS -> Module
-mkSysModuleFS s = Module s 
+mkModuleFS :: FastString -> Module
+mkModuleFS s = Module s
  \end{code}
  
  %************************************************************************
diff --git a/ghc/compiler/basicTypes/Name.lhs b/ghc/compiler/basicTypes/Name.lhs

index fd8f2cf..3aeb03b 100644 (file)
--- a/ghc/compiler/basicTypes/Name.lhs
+++ b/ghc/compiler/basicTypes/Name.lhs
@@ -12,7 +12,7 @@ module Name (
         Name,                                   -- Abstract
         BuiltInSyntax(..), 
         mkInternalName, mkSystemName,
-       mkSystemVarName, mkSystemVarNameEncoded, mkSysTvName, 
+       mkSystemVarName, mkSysTvName, 
         mkFCallName, mkIPName,
         mkExternalName, mkWiredInName,
  
@@ -38,10 +38,11 @@ module Name (
  import {-# SOURCE #-} TypeRep( TyThing )
  
  import OccName         -- All of it
-import Module          ( Module )
+import Module          ( Module, moduleFS )
  import SrcLoc          ( noSrcLoc, wiredInSrcLoc, SrcLoc )
  import Unique          ( Unique, Uniquable(..), getKey, pprUnique )
  import Maybes          ( orElse, isJust )
+import FastString      ( FastString, zEncodeFS )
  import Outputable
  \end{code}
  
@@ -215,21 +216,16 @@ mkSystemName :: Unique -> OccName -> Name
  mkSystemName uniq occ = Name { n_uniq = uniq, n_sort = System, 
                                n_occ = occ, n_loc = noSrcLoc }
  
-mkSystemVarName :: Unique -> UserFS -> Name
-mkSystemVarName uniq fs = mkSystemName uniq (mkVarOcc fs)
+mkSystemVarName :: Unique -> FastString -> Name
+mkSystemVarName uniq fs = mkSystemName uniq (mkVarOccFS fs)
  
--- Use this version when the string is already encoded.  Avoids duplicating
--- the string each time a new name is created.
-mkSystemVarNameEncoded :: Unique -> EncodedFS -> Name
-mkSystemVarNameEncoded uniq fs = mkSystemName uniq (mkSysOccFS varName fs) 
+mkSysTvName :: Unique -> FastString -> Name
+mkSysTvName uniq fs = mkSystemName uniq (mkOccNameFS tvName fs) 
  
-mkSysTvName :: Unique -> EncodedFS -> Name
-mkSysTvName uniq fs = mkSystemName uniq (mkSysOccFS tvName fs) 
-
-mkFCallName :: Unique -> EncodedString -> Name
+mkFCallName :: Unique -> String -> Name
         -- The encoded string completely describes the ccall
  mkFCallName uniq str =  Name { n_uniq = uniq, n_sort = Internal, 
-                              n_occ = mkFCallOcc str, n_loc = noSrcLoc }
+                              n_occ = mkVarOcc str, n_loc = noSrcLoc }
  
  mkIPName :: Unique -> OccName -> Name
  mkIPName uniq occ
@@ -317,13 +313,13 @@ pprName (Name {n_sort = sort, n_uniq = uniq, n_occ = occ})
        Internal               -> pprInternal sty uniq occ
  
  pprExternal sty uniq mod occ is_wired is_builtin
-  | codeStyle sty        = ppr mod <> char '_' <> ppr_occ_name occ
+  | codeStyle sty        = ppr_z_module mod <> char '_' <> ppr_z_occ_name occ
         -- In code style, always qualify
         -- ToDo: maybe we could print all wired-in things unqualified
         --       in code style, to reduce symbol table bloat?
    | debugStyle sty       = ppr mod <> dot <> ppr_occ_name occ
                            <> braces (hsep [if is_wired then ptext SLIT("(w)") else empty,
-                                           text (briefOccNameFlavour occ), 
+                                           pprNameSpaceBrief (occNameSpace occ), 
                                             pprUnique uniq])
    | BuiltInSyntax <- is_builtin  = ppr_occ_name occ
         -- never qualify builtin syntax
@@ -332,7 +328,7 @@ pprExternal sty uniq mod occ is_wired is_builtin
  
  pprInternal sty uniq occ
    | codeStyle sty  = pprUnique uniq
-  | debugStyle sty = ppr_occ_name occ <> braces (hsep [text (briefOccNameFlavour occ), 
+  | debugStyle sty = ppr_occ_name occ <> braces (hsep [pprNameSpaceBrief (occNameSpace occ), 
                                                        pprUnique uniq])
    | dumpStyle sty  = ppr_occ_name occ <> char '_' <> pprUnique uniq
                         -- For debug dumps, we're not necessarily dumping
@@ -343,15 +339,21 @@ pprInternal sty uniq occ
  pprSystem sty uniq occ
    | codeStyle sty  = pprUnique uniq
    | debugStyle sty = ppr_occ_name occ <> char '_' <> pprUnique uniq
-                    <> braces (text (briefOccNameFlavour occ))
+                    <> braces (pprNameSpaceBrief (occNameSpace occ))
    | otherwise     = ppr_occ_name occ <> char '_' <> pprUnique uniq
                                 -- If the tidy phase hasn't run, the OccName
                                 -- is unlikely to be informative (like 's'),
                                 -- so print the unique
  
-ppr_occ_name occ = pprEncodedFS (occNameFS occ)
+ppr_occ_name occ = ftext (occNameFS occ)
         -- Don't use pprOccName; instead, just print the string of the OccName; 
         -- we print the namespace in the debug stuff above
+
+-- In code style, we Z-encode the strings.  The results of Z-encoding each FastString are
+-- cached behind the scenes in the FastString implementation.
+ppr_z_occ_name occ = ftext (zEncodeFS (occNameFS occ))
+ppr_z_module   mod = ftext (zEncodeFS (moduleFS mod))
+
  \end{code}
  
  %************************************************************************
diff --git a/ghc/compiler/basicTypes/OccName.lhs b/ghc/compiler/basicTypes/OccName.lhs

index 756d6a9..bd6d3f7 100644 (file)
--- a/ghc/compiler/basicTypes/OccName.lhs
+++ b/ghc/compiler/basicTypes/OccName.lhs
@@ -7,62 +7,66 @@
  
  \begin{code}
  module OccName (
-       -- The NameSpace type; abstact
+       -- * The NameSpace type; abstact
         NameSpace, tcName, clsName, tcClsName, dataName, varName, 
-       tvName, srcDataName, nameSpaceString, 
+       tvName, srcDataName,
  
-       -- The OccName type
+       -- ** Printing
+       pprNameSpace, pprNonVarNameSpace, pprNameSpaceBrief,
+
+       -- * The OccName type
         OccName,        -- Abstract, instance of Outputable
         pprOccName, 
  
+       -- ** Construction      
+       mkOccName, mkOccNameFS, 
+       mkVarOcc, mkVarOccFS,
+       mkTyVarOcc,
+       mkDFunOcc,
+       mkTupleOcc, 
+       setOccNameSpace,
+
+       -- ** Derived OccNames
+       mkDataConWrapperOcc, mkWorkerOcc, mkDefaultMethodOcc, mkDerivedTyConOcc,
+       mkClassTyConOcc, mkClassDataConOcc, mkDictOcc, mkIPOcc, 
+       mkSpecOcc, mkForeignExportOcc, mkGenOcc1, mkGenOcc2,
+       mkDataTOcc, mkDataCOcc, mkDataConWorkerOcc,
+       mkSuperDictSelOcc, mkLocalOcc, mkMethodOcc,
+
+       -- ** Deconstruction
+       occNameFS, occNameString, occNameSpace, 
+
+       isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
+       parenSymOcc, reportIfUnused, isTcClsName, isVarName,
+
+       isTupleOcc_maybe,
+
         -- The OccEnv type
         OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
         lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
         occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
  
-
         -- The OccSet type
-       OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
+       OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, 
+       extendOccSetList,
         unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts, 
         foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
  
-       mkOccName, mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
-       mkVarOcc, mkVarOccEncoded, mkTyVarOcc,
-       mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
-       mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
-       mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
-       mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
-       mkDataConWrapperOcc, mkDataConWorkerOcc,
-       
-       isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
-       parenSymOcc, reportIfUnused, isTcClsName, isVarName,
-
-       occNameFS, occNameString, occNameUserString, occNameSpace, 
-       occNameFlavour, briefOccNameFlavour,
-       setOccNameSpace,
-
-       mkTupleOcc, isTupleOcc_maybe,
-
         -- Tidying up
         TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
  
-       -- Encoding
-       EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
-
         -- The basic form of names
         isLexCon, isLexVar, isLexId, isLexSym,
         isLexConId, isLexConSym, isLexVarId, isLexVarSym,
-       isLowerISO, isUpperISO,
         startsVarSym, startsVarId, startsConSym, startsConId
      ) where
  
  #include "HsVersions.h"
  
-import Char    ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
-import Util    ( thenCmp )
-import Unique  ( Unique, mkUnique, Uniquable(..) )
-import BasicTypes ( Boxity(..), Arity )
-import StaticFlags ( opt_PprStyle_Debug )
+import Util            ( thenCmp )
+import Unique          ( Unique, mkUnique, Uniquable(..) )
+import BasicTypes      ( Boxity(..), Arity )
+import StaticFlags     ( opt_PprStyle_Debug )
  import UniqFM
  import UniqSet
  import FastString
@@ -70,34 +74,16 @@ import Outputable
  import Binary
  
  import GLAEXTS
-\end{code}
-
-We hold both module names and identifier names in a 'Z-encoded' form
-that makes them acceptable both as a C identifier and as a Haskell
-(prefix) identifier. 
-
-They can always be decoded again when printing error messages
-or anything else for the user, but it does make sense for it
-to be represented here in encoded form, so that when generating
-code the encoding operation is not performed on each occurrence.
  
-These type synonyms help documentation.
-
-\begin{code}
-type UserFS    = FastString    -- As the user typed it
-type EncodedFS = FastString    -- Encoded form
+import Data.Char       ( isUpper, isLower, ord )
  
-type UserString = String       -- As the user typed it
-type EncodedString = String    -- Encoded form
+-- Unicode TODO: put isSymbol in libcompat
+#if __GLASGOW_HASKELL__ > 604
+import Data.Char       ( isSymbol )
+#else
+isSymbol = const False
+#endif
  
-
-pprEncodedFS :: EncodedFS -> SDoc
-pprEncodedFS fs
-  = getPprStyle        $ \ sty ->
-    if userStyle sty || dumpStyle sty
-       -- ftext (decodeFS fs) would needlessly pack the string again
-       then text (decode (unpackFS fs))
-        else ftext fs
  \end{code}
  
  %************************************************************************
@@ -155,12 +141,20 @@ isVarName TvName  = True
  isVarName VarName = True
  isVarName other   = False
  
+pprNameSpace :: NameSpace -> SDoc
+pprNameSpace DataName  = ptext SLIT("data constructor")
+pprNameSpace VarName   = ptext SLIT("variable")
+pprNameSpace TvName    = ptext SLIT("type variable")
+pprNameSpace TcClsName = ptext SLIT("type constructor or class")
+
+pprNonVarNameSpace :: NameSpace -> SDoc
+pprNonVarNameSpace VarName = empty
+pprNonVarNameSpace ns = pprNameSpace ns
  
-nameSpaceString :: NameSpace -> String
-nameSpaceString DataName  = "data constructor"
-nameSpaceString VarName   = "variable"
-nameSpaceString TvName    = "type variable"
-nameSpaceString TcClsName = "type constructor or class"
+pprNameSpaceBrief DataName  = char 'd'
+pprNameSpaceBrief VarName   = char 'v'
+pprNameSpaceBrief TvName    = ptext SLIT("tv")
+pprNameSpaceBrief TcClsName = ptext SLIT("tc")
  \end{code}
  
  
@@ -173,7 +167,7 @@ nameSpaceString TcClsName = "type constructor or class"
  \begin{code}
  data OccName = OccName 
      { occNameSpace  :: !NameSpace
-    , occNameFS     :: !EncodedFS
+    , occNameFS     :: !FastString
      }
  \end{code}
  
@@ -201,9 +195,11 @@ instance Outputable OccName where
  pprOccName :: OccName -> SDoc
  pprOccName (OccName sp occ) 
    = getPprStyle $ \ sty ->
-    pprEncodedFS occ <> if debugStyle sty then
-                          braces (text (briefNameSpaceFlavour sp))
-                       else empty
+    if codeStyle sty 
+       then ftext (zEncodeFS occ)
+       else ftext occ <> if debugStyle sty 
+                           then braces (pprNameSpaceBrief sp)
+                           else empty
  \end{code}
  
  
@@ -211,54 +207,24 @@ pprOccName (OccName sp occ)
  %*                                                                     *
  \subsection{Construction}
  %*                                                                     *
-%*****p*******************************************************************
-
-*Sys* things do no encoding; the caller should ensure that the thing is
-already encoded
-
-\begin{code}
-mkSysOcc :: NameSpace -> EncodedString -> OccName
-mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
-                     OccName occ_sp (mkFastString str)
-
-mkSysOccFS :: NameSpace -> EncodedFS -> OccName
-mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
-                      OccName occ_sp fs
-
-mkFCallOcc :: EncodedString -> OccName
--- This version of mkSysOcc doesn't check that the string is already encoded,
--- because it will be something like "{__ccall f dyn Int# -> Int#}" 
--- This encodes a lot into something that then parses like an Id.
--- But then alreadyEncoded complains about the braces!
-mkFCallOcc str = OccName varName (mkFastString str)
-
--- Kind constructors get a special function.  Uniquely, they are not encoded,
--- so that they have names like '*'.  This means that *even in interface files*
--- we'll get kinds like (* -> (* -> *)).  We can't use mkSysOcc because it
--- has an ASSERT that doesn't hold.
-mkKindOccFS :: NameSpace -> EncodedFS -> OccName
-mkKindOccFS occ_sp fs = OccName occ_sp fs
-\end{code}
-
-*Source-code* things are encoded.
+%************************************************************************
  
  \begin{code}
-mkOccFS :: NameSpace -> UserFS -> OccName
-mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
-
  mkOccName :: NameSpace -> String -> OccName
-mkOccName ns s = mkSysOcc ns (encode s)
+mkOccName occ_sp str = OccName occ_sp (mkFastString str)
  
-mkVarOcc :: UserFS -> OccName
-mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
+mkOccNameFS :: NameSpace -> FastString -> OccName
+mkOccNameFS occ_sp fs = OccName occ_sp fs
  
-mkTyVarOcc :: UserFS -> OccName
-mkTyVarOcc fs = mkSysOccFS tvName (encodeFS fs)
+mkVarOcc :: String -> OccName
+mkVarOcc s = mkOccName varName s
  
-mkVarOccEncoded :: EncodedFS -> OccName
-mkVarOccEncoded fs = mkSysOccFS varName fs
-\end{code}
+mkVarOccFS :: FastString -> OccName
+mkVarOccFS fs = mkOccNameFS varName fs
  
+mkTyVarOcc :: FastString -> OccName
+mkTyVarOcc fs = mkOccNameFS tvName fs
+\end{code}
  
  
  %************************************************************************
@@ -355,34 +321,13 @@ intersectsOccSet s1 s2 = not (isEmptyOccSet (s1 `intersectOccSet` s2))
  %*                                                                     *
  %************************************************************************
  
-\begin{code} 
-occNameString :: OccName -> EncodedString
+\begin{code}
+occNameString :: OccName -> String
  occNameString (OccName _ s) = unpackFS s
  
-occNameUserString :: OccName -> UserString
-occNameUserString occ = decode (occNameString occ)
-
  setOccNameSpace :: NameSpace -> OccName -> OccName
  setOccNameSpace sp (OccName _ occ) = OccName sp occ
  
--- occNameFlavour is used only to generate good error messages
-occNameFlavour :: OccName -> SDoc
-occNameFlavour (OccName DataName _)  = ptext SLIT("data constructor")
-occNameFlavour (OccName TvName _)    = ptext SLIT("type variable")
-occNameFlavour (OccName TcClsName _) = ptext SLIT("type constructor or class")
-occNameFlavour (OccName VarName s)   = empty
-
--- briefOccNameFlavour is used in debug-printing of names
-briefOccNameFlavour :: OccName -> String
-briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
-
-briefNameSpaceFlavour DataName  = "d"
-briefNameSpaceFlavour VarName   = "v"
-briefNameSpaceFlavour TvName    = "tv"
-briefNameSpaceFlavour TcClsName = "tc"
-\end{code}
-
-\begin{code}
  isVarOcc, isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
  
  isVarOcc (OccName VarName _) = True
@@ -400,19 +345,19 @@ isValOcc other                  = False
  
  -- Data constructor operator (starts with ':', or '[]')
  -- Pretty inefficient!
-isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
-isDataSymOcc (OccName VarName s)  = isLexConSym (decodeFS s)
+isDataSymOcc (OccName DataName s) = isLexConSym s
+isDataSymOcc (OccName VarName s)  = isLexConSym s
  isDataSymOcc other               = False
  
  isDataOcc (OccName DataName _) = True
-isDataOcc (OccName VarName s)  = isLexCon (decodeFS s)
+isDataOcc (OccName VarName s)  = isLexCon s
  isDataOcc other                       = False
  
  -- Any operator (data constructor or variable)
  -- Pretty inefficient!
-isSymOcc (OccName DataName s)  = isLexConSym (decodeFS s)
-isSymOcc (OccName TcClsName s) = isLexConSym (decodeFS s)
-isSymOcc (OccName VarName s)   = isLexSym (decodeFS s)
+isSymOcc (OccName DataName s)  = isLexConSym s
+isSymOcc (OccName TcClsName s) = isLexConSym s
+isSymOcc (OccName VarName s)   = isLexSym s
  isSymOcc other                = False
  
  parenSymOcc :: OccName -> SDoc -> SDoc
@@ -426,13 +371,12 @@ parenSymOcc occ doc | isSymOcc occ = parens doc
  reportIfUnused :: OccName -> Bool
    -- Haskell 98 encourages compilers to suppress warnings about
    -- unused names in a pattern if they start with "_".
-reportIfUnused occ = case occNameUserString occ of
+reportIfUnused occ = case occNameString occ of
                         ('_' : _) -> False
-                       zz_other  -> True
+                       _other    -> True
  \end{code}
  
  
-
  %************************************************************************
  %*                                                                     *
  \subsection{Making system names}
@@ -466,16 +410,17 @@ NB: The string must already be encoded!
  \begin{code}
  mk_deriv :: NameSpace 
          -> String              -- Distinguishes one sort of derived name from another
-        -> EncodedString       -- Must be already encoded!!  We don't want to encode it a 
-                               -- second time because encoding isn't idempotent
+        -> String
          -> OccName
  
-mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
+mk_deriv occ_sp sys_prefix str = mkOccName occ_sp (sys_prefix ++ str)
  \end{code}
  
  \begin{code}
-mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
-          mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
+mkDataConWrapperOcc, mkWorkerOcc, mkDefaultMethodOcc, mkDerivedTyConOcc,
+       mkClassTyConOcc, mkClassDataConOcc, mkDictOcc, mkIPOcc, 
+       mkSpecOcc, mkForeignExportOcc, mkGenOcc1, mkGenOcc2,
+       mkDataTOcc, mkDataCOcc, mkDataConWorkerOcc
     :: OccName -> OccName
  
  -- These derived variables have a prefix that no Haskell value could have
@@ -503,7 +448,6 @@ mkDataCOcc = mk_simple_deriv varName  "$c"
  
  mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
  
-
  -- Data constructor workers are made by setting the name space
  -- of the data constructor OccName (which should be a DataName)
  -- to VarName
@@ -528,7 +472,7 @@ mkLocalOcc uniq occ
  
  
  \begin{code}
-mkDFunOcc :: EncodedString     -- Typically the class and type glommed together e.g. "OrdMaybe"
+mkDFunOcc :: String            -- Typically the class and type glommed together e.g. "OrdMaybe"
                                 -- Only used in debug mode, for extra clarity
           -> Bool               -- True <=> hs-boot instance dfun
           -> Int                -- Unique index
@@ -609,248 +553,37 @@ tidyOccName in_scope occ@(OccName occ_sp fs)
         Just n  ->      -- Already used: make a new guess, 
                         -- change the guess base, and try again
                    tidyOccName  (extendOccEnv in_scope occ (n+1))
-                               (mkSysOcc occ_sp (unpackFS fs ++ show n))
+                               (mkOccName occ_sp (unpackFS fs ++ show n))
  \end{code}
  
-
-%************************************************************************
-%*                                                                     *
-\subsection{The 'Z' encoding}
-%*                                                                     *
-%************************************************************************
-
-This is the main name-encoding and decoding function.  It encodes any
-string into a string that is acceptable as a C name.  This is the name
-by which things are known right through the compiler.
-
-The basic encoding scheme is this.  
-
-* Tuples (,,,) are coded as Z3T
-
-* Alphabetic characters (upper and lower) and digits
-       all translate to themselves; 
-       except 'Z', which translates to 'ZZ'
-       and    'z', which translates to 'zz'
-  We need both so that we can preserve the variable/tycon distinction
-
-* Most other printable characters translate to 'zx' or 'Zx' for some
-       alphabetic character x
-
-* The others translate as 'znnnU' where 'nnn' is the decimal number
-        of the character
-
-       Before          After
-       --------------------------
-       Trak            Trak
-       foo_wib         foozuwib
-       >               zg
-       >1              zg1
-       foo#            foozh
-       foo##           foozhzh
-       foo##1          foozhzh1
-       fooZ            fooZZ   
-       :+              ZCzp
-       ()              Z0T     0-tuple
-       (,,,,)          Z5T     5-tuple  
-       (# #)           Z1H     unboxed 1-tuple (note the space)
-       (#,,,,#)        Z5H     unboxed 5-tuple
-               (NB: There is no Z1T nor Z0H.)
-
-\begin{code}
--- alreadyEncoded is used in ASSERTs to check for encoded
--- strings.  It isn't fail-safe, of course, because, say 'zh' might
--- be encoded or not.
-alreadyEncoded :: String -> Bool
-alreadyEncoded s = all ok s
-                where
-                  ok ' ' = True
-                       -- This is a bit of a lie; if we really wanted spaces
-                       -- in names we'd have to encode them.  But we do put
-                       -- spaces in ccall "occurrences", and we don't want to
-                       -- reject them here
-                  ok ch  = isAlphaNum ch
-
-alreadyEncodedFS :: FastString -> Bool
-alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
-
-encode :: UserString -> EncodedString
-encode cs = case maybe_tuple cs of
-               Just n  -> n            -- Tuples go to Z2T etc
-               Nothing -> go cs
-         where
-               go []     = []
-               go (c:cs) = encode_ch c ++ go cs
-
-encodeFS :: UserFS -> EncodedFS
-encodeFS fast_str  | all unencodedChar str = fast_str
-                  | otherwise             = mkFastString (encode str)
-                  where
-                    str = unpackFS fast_str
-
-unencodedChar :: Char -> Bool  -- True for chars that don't need encoding
-unencodedChar 'Z' = False
-unencodedChar 'z' = False
-unencodedChar c   =  c >= 'a' && c <= 'z'
-                 || c >= 'A' && c <= 'Z'
-                 || c >= '0' && c <= '9'
-
-encode_ch :: Char -> EncodedString
-encode_ch c | unencodedChar c = [c]    -- Common case first
-
--- Constructors
-encode_ch '('  = "ZL"  -- Needed for things like (,), and (->)
-encode_ch ')'  = "ZR"  -- For symmetry with (
-encode_ch '['  = "ZM"
-encode_ch ']'  = "ZN"
-encode_ch ':'  = "ZC"
-encode_ch 'Z'  = "ZZ"
-
--- Variables
-encode_ch 'z'  = "zz"
-encode_ch '&'  = "za"
-encode_ch '|'  = "zb"
-encode_ch '^'  = "zc"
-encode_ch '$'  = "zd"
-encode_ch '='  = "ze"
-encode_ch '>'  = "zg"
-encode_ch '#'  = "zh"
-encode_ch '.'  = "zi"
-encode_ch '<'  = "zl"
-encode_ch '-'  = "zm"
-encode_ch '!'  = "zn"
-encode_ch '+'  = "zp"
-encode_ch '\'' = "zq"
-encode_ch '\\' = "zr"
-encode_ch '/'  = "zs"
-encode_ch '*'  = "zt"
-encode_ch '_'  = "zu"
-encode_ch '%'  = "zv"
-encode_ch c    = 'z' : shows (ord c) "U"
-\end{code}
-
-Decode is used for user printing.
-
-\begin{code}
-decodeFS :: FastString -> FastString
-decodeFS fs = mkFastString (decode (unpackFS fs))
-
-decode :: EncodedString -> UserString
-decode [] = []
-decode ('Z' : d : rest) | isDigit d = decode_tuple   d rest
-                       | otherwise = decode_upper   d : decode rest
-decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
-                       | otherwise = decode_lower   d : decode rest
-decode (c   : rest) = c : decode rest
-
-decode_upper, decode_lower :: Char -> Char
-
-decode_upper 'L' = '('
-decode_upper 'R' = ')'
-decode_upper 'M' = '['
-decode_upper 'N' = ']'
-decode_upper 'C' = ':'
-decode_upper 'Z' = 'Z'
-decode_upper ch  = pprTrace "decode_upper" (char ch) ch
-               
-decode_lower 'z' = 'z'
-decode_lower 'a' = '&'
-decode_lower 'b' = '|'
-decode_lower 'c' = '^'
-decode_lower 'd' = '$'
-decode_lower 'e' = '='
-decode_lower 'g' = '>'
-decode_lower 'h' = '#'
-decode_lower 'i' = '.'
-decode_lower 'l' = '<'
-decode_lower 'm' = '-'
-decode_lower 'n' = '!'
-decode_lower 'p' = '+'
-decode_lower 'q' = '\''
-decode_lower 'r' = '\\'
-decode_lower 's' = '/'
-decode_lower 't' = '*'
-decode_lower 'u' = '_'
-decode_lower 'v' = '%'
-decode_lower ch  = pprTrace "decode_lower" (char ch) ch
-
--- Characters not having a specific code are coded as z224U
-decode_num_esc d rest
-  = go (digitToInt d) rest
-  where
-    go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
-    go n ('U' : rest)           = chr n : decode rest
-    go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
-
-decode_tuple :: Char -> EncodedString -> UserString
-decode_tuple d rest
-  = go (digitToInt d) rest
-  where
-       -- NB. recurse back to decode after decoding the tuple, because
-       -- the tuple might be embedded in a longer name.
-    go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
-    go 0 ('T':rest)    = "()" ++ decode rest
-    go n ('T':rest)    = '(' : replicate (n-1) ',' ++ ")" ++ decode rest
-    go 1 ('H':rest)    = "(# #)" ++ decode rest
-    go n ('H':rest)    = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ decode rest
-    go n other = pprPanic "decode_tuple" (ppr n <+> text other)
-\end{code}
-
-
  %************************************************************************
  %*                                                                     *
                 Stuff for dealing with tuples
  %*                                                                     *
  %************************************************************************
  
-Tuples are encoded as
-       Z3T or Z3H
-for 3-tuples or unboxed 3-tuples respectively.  No other encoding starts 
-       Z<digit>
-
-* "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
-  There are no unboxed 0-tuples.  
-
-* "()" is the tycon for a boxed 0-tuple.
-  There are no boxed 1-tuples.
-
-
-\begin{code}
-maybe_tuple :: UserString -> Maybe EncodedString
-
-maybe_tuple "(# #)" = Just("Z1H")
-maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
-                                (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
-                                other               -> Nothing
-maybe_tuple "()" = Just("Z0T")
-maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
-                                (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
-                                other         -> Nothing
-maybe_tuple other           = Nothing
-
-count_commas :: Int -> String -> (Int, String)
-count_commas n (',' : cs) = count_commas (n+1) cs
-count_commas n cs        = (n,cs)
-\end{code}
-
  \begin{code}
  mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
-mkTupleOcc ns bx ar
-  = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
+mkTupleOcc ns bx ar = OccName ns (mkFastString str)
    where
-    bx_char = case bx of
-               Boxed   -> "T"
-               Unboxed -> "H"
+       -- no need to cache these, the caching is done in the caller
+       -- (TysWiredIn.mk_tuple)
+    str = case bx of
+               Boxed   -> '(' : commas ++ ")"
+               Unboxed -> '(' : '#' : commas ++ "#)"
+
+    commas = take (ar-1) (repeat ',')
  
  isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
  -- Tuples are special, because there are so many of them!
  isTupleOcc_maybe (OccName ns fs)
    = case unpackFS fs of
-       ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
-       other                    -> Nothing
+       '(':'#':',':rest -> Just (ns, Unboxed, 2 + count_commas rest)
+       '(':',':rest     -> Just (ns, Boxed,   2 + count_commas rest)
+       _other           -> Nothing
    where
-    decode_tup n "H"     = (ns, Unboxed, n)
-    decode_tup n "T"     = (ns, Boxed, n)
-    decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
+    count_commas (',':rest) = 1 + count_commas rest
+    count_commas _          = 0
  \end{code}
  
  %************************************************************************
@@ -875,37 +608,31 @@ isLexSym cs = isLexConSym cs || isLexVarSym cs
  -------------
  
  isLexConId cs                          -- Prefix type or data constructors
-  | nullFastString cs = False          --      e.g. "Foo", "[]", "(,)" 
+  | nullFS cs        = False           --      e.g. "Foo", "[]", "(,)" 
    | cs == FSLIT("[]") = True
    | otherwise        = startsConId (headFS cs)
  
  isLexVarId cs                          -- Ordinary prefix identifiers
-  | nullFastString cs = False          --      e.g. "x", "_x"
+  | nullFS cs        = False           --      e.g. "x", "_x"
    | otherwise         = startsVarId (headFS cs)
  
  isLexConSym cs                         -- Infix type or data constructors
-  | nullFastString cs = False          --      e.g. ":-:", ":", "->"
+  | nullFS cs        = False           --      e.g. ":-:", ":", "->"
    | cs == FSLIT("->") = True
    | otherwise        = startsConSym (headFS cs)
  
  isLexVarSym cs                         -- Infix identifiers
-  | nullFastString cs = False          --      e.g. "+"
+  | nullFS cs        = False           --      e.g. "+"
    | otherwise         = startsVarSym (headFS cs)
  
  -------------
  startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
-startsVarSym c = isSymbolASCII c || isSymbolISO c      -- Infix Ids
+startsVarSym c = isSymbolASCII c || (ord c > 0x7f && isSymbol c) -- Infix Ids
  startsConSym c = c == ':'                              -- Infix data constructors
-startsVarId c  = isLower c || isLowerISO c || c == '_' -- Ordinary Ids
-startsConId c  = isUpper c || isUpperISO c || c == '(' -- Ordinary type constructors and data constructors
-
+startsVarId c  = isLower c || c == '_' -- Ordinary Ids
+startsConId c  = isUpper c || c == '(' -- Ordinary type constructors and data constructors
  
  isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
-isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
-isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
-       --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
-isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
-       --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
  \end{code}
  
  %************************************************************************
diff --git a/ghc/compiler/basicTypes/RdrName.lhs b/ghc/compiler/basicTypes/RdrName.lhs

index 900717e..030aa1f 100644 (file)
--- a/ghc/compiler/basicTypes/RdrName.lhs
+++ b/ghc/compiler/basicTypes/RdrName.lhs
@@ -40,25 +40,17 @@ module RdrName (
  
  #include "HsVersions.h"
  
-import OccName ( NameSpace, varName,
-                 OccName, UserFS, 
-                 setOccNameSpace,
-                 mkOccFS, occNameFlavour,
-                 isDataOcc, isTvOcc, isTcOcc,
-                 OccEnv, emptyOccEnv, extendOccEnvList, lookupOccEnv, 
-                 elemOccEnv, plusOccEnv_C, extendOccEnv_C, foldOccEnv,
-                 occEnvElts
-               )
+import OccName
  import Module   ( Module, mkModuleFS )
  import Name    ( Name, NamedThing(getName), nameModule, nameParent_maybe,
                   nameOccName, isExternalName, nameSrcLoc )
  import Maybes  ( mapCatMaybes )
  import SrcLoc  ( isGoodSrcLoc, SrcSpan )
+import FastString ( FastString )
  import Outputable
  import Util    ( thenCmp )
  \end{code}
  
-
  %************************************************************************
  %*                                                                     *
  \subsection{The main data type}
@@ -147,14 +139,14 @@ mkDerivedRdrName parent mk_occ
  ---------------
         -- These two are used when parsing source files
         -- They do encode the module and occurrence names
-mkUnqual :: NameSpace -> UserFS -> RdrName
-mkUnqual sp n = Unqual (mkOccFS sp n)
+mkUnqual :: NameSpace -> FastString -> RdrName
+mkUnqual sp n = Unqual (mkOccNameFS sp n)
  
-mkVarUnqual :: UserFS -> RdrName
-mkVarUnqual n = Unqual (mkOccFS varName n)
+mkVarUnqual :: FastString -> RdrName
+mkVarUnqual n = Unqual (mkVarOccFS n)
  
-mkQual :: NameSpace -> (UserFS, UserFS) -> RdrName
-mkQual sp (m, n) = Qual (mkModuleFS m) (mkOccFS sp n)
+mkQual :: NameSpace -> (FastString, FastString) -> RdrName
+mkQual sp (m, n) = Qual (mkModuleFS m) (mkOccNameFS sp n)
  
  getRdrName :: NamedThing thing => thing -> RdrName
  getRdrName name = nameRdrName (getName name)
@@ -213,7 +205,7 @@ instance Outputable RdrName where
      ppr (Qual mod occ) = ppr mod <> dot <> ppr occ <+> ppr_name_space occ
      ppr (Orig mod occ) = ppr mod <> dot <> ppr occ <+> ppr_name_space occ
  
-ppr_name_space occ = ifPprDebug (parens (occNameFlavour occ))
+ppr_name_space occ = ifPprDebug (parens (pprNonVarNameSpace (occNameSpace occ)))
  
  instance OutputableBndr RdrName where
      pprBndr _ n 
diff --git a/ghc/compiler/cmm/CLabel.hs b/ghc/compiler/cmm/CLabel.hs

index 296ad91..fb13589 100644 (file)
--- a/ghc/compiler/cmm/CLabel.hs
+++ b/ghc/compiler/cmm/CLabel.hs
@@ -740,10 +740,10 @@ pprCLbl (CC_Label cc)             = ppr cc
  pprCLbl (CCS_Label ccs)        = ppr ccs
  
  pprCLbl (ModuleInitLabel mod way _)    
-   = ptext SLIT("__stginit_") <> ftext (moduleFS mod)
+   = ptext SLIT("__stginit_") <> ppr mod
         <> char '_' <> text way
  pprCLbl (PlainModuleInitLabel mod _)   
-   = ptext SLIT("__stginit_") <> ftext (moduleFS mod)
+   = ptext SLIT("__stginit_") <> ppr mod
  
  ppIdFlavor :: IdLabelInfo -> SDoc
  ppIdFlavor x = pp_cSEP <>
diff --git a/ghc/compiler/cmm/Cmm.hs b/ghc/compiler/cmm/Cmm.hs

index 304ddb2..13961c1 100644 (file)
--- a/ghc/compiler/cmm/Cmm.hs
+++ b/ghc/compiler/cmm/Cmm.hs
@@ -30,6 +30,7 @@ import CLabel         ( CLabel )
  import ForeignCall     ( CCallConv )
  import Unique          ( Unique, Uniquable(..) )
  import FastString      ( FastString )
+import DATA_WORD       ( Word8 )
  
  -----------------------------------------------------------------------------
  --             Cmm, CmmTop, CmmBasicBlock
@@ -251,9 +252,8 @@ data CmmStatic
         -- align to next N-byte boundary (N must be a power of 2).
    | CmmDataLabel CLabel
         -- label the current position in this section.
-  | CmmString String
+  | CmmString [Word8]
         -- string of 8-bit values only, not zero terminated.
-       -- ToDo: might be more honest to use [Word8] here?
  
  -----------------------------------------------------------------------------
  --             Global STG registers
diff --git a/ghc/compiler/cmm/CmmLex.x b/ghc/compiler/cmm/CmmLex.x

index 8515b3e..c2efd17 100644 (file)
--- a/ghc/compiler/cmm/CmmLex.x
+++ b/ghc/compiler/cmm/CmmLex.x
@@ -227,10 +227,10 @@ tok_decimal span buf len
    = return (L span (CmmT_Int  $! parseInteger buf len 10 octDecDigit))
  
  tok_octal span buf len 
-  = return (L span (CmmT_Int  $! parseInteger (stepOn buf) (len-1) 8 octDecDigit))
+  = return (L span (CmmT_Int  $! parseInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
  
  tok_hexadecimal span buf len 
-  = return (L span (CmmT_Int  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hexDigit))
+  = return (L span (CmmT_Int  $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
  
  tok_float str = CmmT_Float $! readRational str
  
diff --git a/ghc/compiler/cmm/CmmParse.y b/ghc/compiler/cmm/CmmParse.y

index 45f411b..cfb2a9d 100644 (file)
--- a/ghc/compiler/cmm/CmmParse.y
+++ b/ghc/compiler/cmm/CmmParse.y
@@ -48,6 +48,7 @@ import Constants      ( wORD_SIZE )
  import Outputable
  
  import Monad           ( when )
+import Data.Char       ( ord )
  
  #include "HsVersions.h"
  }
@@ -177,7 +178,7 @@ static      :: { ExtFCode [CmmStatic] }
                              return [CmmStaticLit (getLit e)] }
         | type ';'                      { return [CmmUninitialised
                                                         (machRepByteWidth $1)] }
-        | 'bits8' '[' ']' STRING ';'   { return [CmmString $4] }
+        | 'bits8' '[' ']' STRING ';'   { return [mkString $4] }
          | 'bits8' '[' INT ']' ';'      { return [CmmUninitialised 
                                                         (fromIntegral $3)] }
          | typenot8 '[' INT ']' ';'     { return [CmmUninitialised 
@@ -427,6 +428,9 @@ section "rodata" = ReadOnlyData
  section "bss"   = UninitialisedData
  section s       = OtherSection s
  
+mkString :: String -> CmmStatic
+mkString s = CmmString (map (fromIntegral.ord) s)
+
  -- mkMachOp infers the type of the MachOp from the type of its first
  -- argument.  We assume that this is correct: for MachOps that don't have
  -- symmetrical args (e.g. shift ops), the first arg determines the type of
diff --git a/ghc/compiler/cmm/PprC.hs b/ghc/compiler/cmm/PprC.hs

index 7427f50..9fece36 100644 (file)
--- a/ghc/compiler/cmm/PprC.hs
+++ b/ghc/compiler/cmm/PprC.hs
@@ -45,6 +45,7 @@ import Data.Bits        ( shiftR )
  import Char             ( ord, chr )
  import IO               ( Handle )
  import DATA_BITS
+import Data.Word       ( Word8 )
  
  #ifdef DEBUG
  import PprCmm          () -- instances only
@@ -881,25 +882,21 @@ machRepSignedCType r | r == wordRep = ptext SLIT("I_")
  -- ---------------------------------------------------------------------
  -- print strings as valid C strings
  
--- Assumes it contains only characters '\0'..'\xFF'!
-pprFSInCStyle :: FastString -> SDoc
-pprFSInCStyle fs = pprStringInCStyle (unpackFS fs)
-
-pprStringInCStyle :: String -> SDoc
+pprStringInCStyle :: [Word8] -> SDoc
  pprStringInCStyle s = doubleQuotes (text (concatMap charToC s))
  
-charToC :: Char -> String
-charToC '\"' = "\\\""
-charToC '\'' = "\\\'"
-charToC '\\' = "\\\\"
-charToC c | c >= ' ' && c <= '~' = [c]
-          | c > '\xFF' = panic ("charToC "++show c)
-          | otherwise = ['\\',
+charToC :: Word8 -> String
+charToC w = 
+  case chr (fromIntegral w) of
+       '\"' -> "\\\""
+       '\'' -> "\\\'"
+       '\\' -> "\\\\"
+       c | c >= ' ' && c <= '~' -> [c]
+          | otherwise -> ['\\',
                           chr (ord '0' + ord c `div` 64),
                           chr (ord '0' + ord c `div` 8 `mod` 8),
                           chr (ord '0' + ord c         `mod` 8)]
  
-
  -- ---------------------------------------------------------------------------
  -- Initialising static objects with floating-point numbers.  We can't
  -- just emit the floating point number, because C will cast it to an int
diff --git a/ghc/compiler/cmm/PprCmm.hs b/ghc/compiler/cmm/PprCmm.hs

index 3c3e976..6e8367d 100644 (file)
--- a/ghc/compiler/cmm/PprCmm.hs
+++ b/ghc/compiler/cmm/PprCmm.hs
@@ -51,6 +51,7 @@ import FastString   ( mkFastString )
  import Data.List    ( intersperse, groupBy )
  import IO           ( Handle )
  import Maybe       ( isJust )
+import Data.Char    ( chr )
  
  pprCmms :: [Cmm] -> SDoc
  pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms))
@@ -391,7 +392,8 @@ pprStatic s = case s of
      CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i)
      CmmAlign i         -> nest 4 $ text "align" <+> int i
      CmmDataLabel clbl  -> pprCLabel clbl <> colon
-    CmmString s'       -> nest 4 $ text "I8[]" <+> doubleQuotes (text s')
+    CmmString s'       -> nest 4 $ text "I8[]" <+> 
+                          doubleQuotes (text (map (chr.fromIntegral) s'))
  
  -- --------------------------------------------------------------------------
  -- Registers, whether local (temps) or global
diff --git a/ghc/compiler/codeGen/CgProf.hs b/ghc/compiler/codeGen/CgProf.hs

index aaab2fc..1488e34 100644 (file)
--- a/ghc/compiler/codeGen/CgProf.hs
+++ b/ghc/compiler/codeGen/CgProf.hs
@@ -43,7 +43,7 @@ import MachOp
  import CmmUtils                ( zeroCLit, mkIntCLit, mkLblExpr )
  import CLabel          ( mkCCLabel, mkCCSLabel, mkRtsDataLabel )
  
-import Module          ( moduleUserString )
+import Module          ( moduleString )
  import Id              ( Id )
  import CostCentre
  import StgSyn          ( GenStgExpr(..), StgExpr )
@@ -292,7 +292,7 @@ emitCostCentreDecl
     -> Code
  emitCostCentreDecl cc = do 
    { label <- mkStringCLit (costCentreUserName cc)
-  ; modl  <- mkStringCLit (moduleUserString (cc_mod cc))
+  ; modl  <- mkStringCLit (moduleString (cc_mod cc))
    ; let
       lits = [ zero,    -- StgInt ccID,
               label,    -- char *label,
diff --git a/ghc/compiler/codeGen/CgUtils.hs b/ghc/compiler/codeGen/CgUtils.hs

index 68958d2..2f69927 100644 (file)
--- a/ghc/compiler/codeGen/CgUtils.hs
+++ b/ghc/compiler/codeGen/CgUtils.hs
@@ -54,11 +54,12 @@ import ListSetOps   ( assocDefault )
  import Util            ( filterOut, sortLe )
  import DynFlags                ( DynFlags(..), HscTarget(..) )
  import Packages                ( HomeModules )
-import FastString      ( LitString, FastString, unpackFS )
+import FastString      ( LitString, FastString, bytesFS )
  import Outputable
  
  import Char            ( ord )
  import DATA_BITS
+import DATA_WORD       ( Word8 )
  import Maybe           ( isNothing )
  
  -------------------------------------------------------------------------
@@ -77,7 +78,8 @@ addIdReps ids = [(idCgRep id, id) | id <- ids]
  -------------------------------------------------------------------------
  
  cgLit :: Literal -> FCode CmmLit
-cgLit (MachStr s) = mkStringCLit (unpackFS s)
+cgLit (MachStr s) = mkByteStringCLit (bytesFS s)
+ -- not unpackFS; we want the UTF-8 byte stream.
  cgLit other_lit   = return (mkSimpleLit other_lit)
  
  mkSimpleLit :: Literal -> CmmLit
@@ -308,10 +310,13 @@ emitRODataLits lbl lits
  mkStringCLit :: String -> FCode CmmLit
  -- Make a global definition for the string,
  -- and return its label
-mkStringCLit str 
+mkStringCLit str = mkByteStringCLit (map (fromIntegral.ord) str)
+
+mkByteStringCLit :: [Word8] -> FCode CmmLit
+mkByteStringCLit bytes
    = do         { uniq <- newUnique
         ; let lbl = mkStringLitLabel uniq
-       ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString str]
+       ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes]
         ; return (CmmLabel lbl) }
  
  -------------------------------------------------------------------------
diff --git a/ghc/compiler/codeGen/ClosureInfo.lhs b/ghc/compiler/codeGen/ClosureInfo.lhs

index b0e9e23..a5362e6 100644 (file)
--- a/ghc/compiler/codeGen/ClosureInfo.lhs
+++ b/ghc/compiler/codeGen/ClosureInfo.lhs
@@ -69,7 +69,7 @@ import StaticFlags    ( opt_SccProfilingOn, opt_OmitBlackHoling,
  import Id              ( Id, idType, idArity, idName )
  import DataCon         ( DataCon, dataConTyCon, isNullaryRepDataCon, dataConName )
  import Name            ( Name, nameUnique, getOccName, getOccString )
-import OccName         ( occNameUserString )
+import OccName         ( occNameString )
  import Type            ( isUnLiftedType, Type, repType, splitTyConApp_maybe )
  import TcType          ( tcSplitSigmaTy )
  import TyCon           ( isFunTyCon, isAbstractTyCon )
@@ -930,12 +930,12 @@ closureValDescr, closureTypeDescr :: ClosureInfo -> String
  closureValDescr (ClosureInfo {closureDescr = descr}) 
    = descr
  closureValDescr (ConInfo {closureCon = con})
-  = occNameUserString (getOccName con)
+  = occNameString (getOccName con)
  
  closureTypeDescr (ClosureInfo { closureType = ty })
    = getTyDescription ty
  closureTypeDescr (ConInfo { closureCon = data_con })
-  = occNameUserString (getOccName (dataConTyCon data_con))
+  = occNameString (getOccName (dataConTyCon data_con))
  
  getTyDescription :: Type -> String
  getTyDescription ty
diff --git a/ghc/compiler/deSugar/Check.lhs b/ghc/compiler/deSugar/Check.lhs

index 309aab2..6d7784d 100644 (file)
--- a/ghc/compiler/deSugar/Check.lhs
+++ b/ghc/compiler/deSugar/Check.lhs
@@ -18,7 +18,8 @@ import DsUtils                ( EquationInfo(..), MatchResult(..),
  import MatchLit                ( tidyLitPat, tidyNPat )
  import Id              ( Id, idType )
  import DataCon         ( DataCon, dataConTyCon, dataConOrigArgTys, dataConFieldLabels )
-import Name             ( Name, mkInternalName, getOccName, isDataSymOcc, getName, mkVarOcc )
+import Name             ( Name, mkInternalName, getOccName, isDataSymOcc,
+                         getName, mkVarOccFS )
  import TysWiredIn
  import PrelNames       ( unboundKey )
  import TyCon            ( tyConDataCons, tupleTyConBoxity, isTupleTyCon )
@@ -382,7 +383,7 @@ make_row_vars used_lits (_, EqnInfo { eqn_pats = pats})
       new_var = hash_x
  
  hash_x = mkInternalName unboundKey {- doesn't matter much -}
-                    (mkVarOcc FSLIT("#x"))
+                    (mkVarOccFS FSLIT("#x"))
                      noSrcLoc
  
  make_row_vars_for_constructor :: (EqnNo, EquationInfo) -> [WarningPat]
diff --git a/ghc/compiler/deSugar/DsForeign.lhs b/ghc/compiler/deSugar/DsForeign.lhs

index d784eb8..52956a0 100644 (file)
--- a/ghc/compiler/deSugar/DsForeign.lhs
+++ b/ghc/compiler/deSugar/DsForeign.lhs
@@ -28,9 +28,8 @@ import SMRep          ( argMachRep, typeCgRep )
  import CoreUtils       ( exprType, mkInlineMe )
  import Id              ( Id, idType, idName, mkSysLocal, setInlinePragma )
  import Literal         ( Literal(..), mkStringLit )
-import Module          ( moduleString )
+import Module          ( moduleFS )
  import Name            ( getOccString, NamedThing(..) )
-import OccName         ( encodeFS )
  import Type            ( repType, coreEqType )
  import TcType          ( Type, mkFunTys, mkForAllTys, mkTyConApp,
                           mkFunTy, tcSplitTyConApp_maybe, 
@@ -146,7 +145,7 @@ dsFImport id (CImport cconv safety header lib spec)
    = dsCImport id spec cconv safety no_hdrs       `thenDs` \(ids, h, c) ->
      returnDs (ids, h, c, if no_hdrs then Nothing else Just header)
    where
-    no_hdrs = nullFastString header
+    no_hdrs = nullFS header
  
    -- FIXME: the `lib' field is needed for .NET ILX generation when invoking
    --       routines that are external to the .NET runtime, but GHC doesn't
@@ -246,7 +245,7 @@ dsFCall fn_id fcall no_hdrs
         the_ccall_app = mkFCall ccall_uniq fcall val_args ccall_result_ty
         work_rhs      = mkLams tvs (mkLams work_arg_ids the_ccall_app)
         work_id       = setImpInline no_hdrs $  -- See comments with setImpInline
-                       mkSysLocal (encodeFS FSLIT("$wccall")) work_uniq worker_ty
+                       mkSysLocal FSLIT("$wccall") work_uniq worker_ty
  
         -- Build the wrapper
         work_app     = mkApps (mkVarApps (Var work_id) tvs) val_args
@@ -356,7 +355,7 @@ dsFExportDynamic id cconv
       getModuleDs                               `thenDs` \ mod_name -> 
       let 
          -- hack: need to get at the name of the C stub we're about to generate.
-       fe_nm      = mkFastString (moduleString mod_name ++ "_" ++ toCName fe_id)
+       fe_nm      = mkFastString (unpackFS (zEncodeFS (moduleFS mod_name)) ++ "_" ++ toCName fe_id)
       in
       newSysLocalDs arg_ty                      `thenDs` \ cback ->
       dsLookupGlobalId newStablePtrName         `thenDs` \ newStablePtrId ->
diff --git a/ghc/compiler/deSugar/DsMeta.hs b/ghc/compiler/deSugar/DsMeta.hs

index e5e079e..fcbcc78 100644 (file)
--- a/ghc/compiler/deSugar/DsMeta.hs
+++ b/ghc/compiler/deSugar/DsMeta.hs
@@ -30,16 +30,16 @@ import qualified Language.Haskell.TH as TH
  import HsSyn
  import Class (FunDep)
  import PrelNames  ( rationalTyConName, integerTyConName, negateName )
-import OccName   ( isDataOcc, isTvOcc, occNameUserString )
+import OccName   ( isDataOcc, isTvOcc, occNameString )
  -- To avoid clashes with DsMeta.varName we must make a local alias for OccName.varName
  -- we do this by removing varName from the import of OccName above, making
  -- a qualified instance of OccName and using OccNameAlias.varName where varName
  -- ws previously used in this file.
  import qualified OccName
  
-import Module    ( Module, mkModule, moduleUserString )
+import Module    ( Module, mkModule, moduleString )
  import Id         ( Id, mkLocalId )
-import OccName   ( mkOccFS )
+import OccName   ( mkOccNameFS )
  import Name       ( Name, mkExternalName, localiseName, nameOccName, nameModule, 
                     isExternalName, getSrcLoc )
  import NameEnv
@@ -911,7 +911,7 @@ globalVar name
         ; MkC uni <- coreIntLit (getKey (getUnique name))
         ; rep2 mkNameLName [occ,uni] }
    where
-      name_mod = moduleUserString (nameModule name)
+      name_mod = moduleString (nameModule name)
        name_occ = nameOccName name
        mk_varg | OccName.isDataOcc name_occ = mkNameG_dName
               | OccName.isVarOcc  name_occ = mkNameG_vName
@@ -963,7 +963,7 @@ wrapNongenSyms binds (MkC body)
              ; return (NonRec id var) }
  
  occNameLit :: Name -> DsM (Core String)
-occNameLit n = coreStringLit (occNameUserString (nameOccName n))
+occNameLit n = coreStringLit (occNameString (nameOccName n))
  
  
  -- %*********************************************************************
@@ -1390,7 +1390,7 @@ thSyn = mkModule "Language.Haskell.TH.Syntax"
  thLib = mkModule "Language.Haskell.TH.Lib"
  
  mk_known_key_name mod space str uniq 
-  = mkExternalName uniq mod (mkOccFS space str) 
+  = mkExternalName uniq mod (mkOccNameFS space str) 
                    Nothing noSrcLoc
  
  libFun = mk_known_key_name thLib OccName.varName
diff --git a/ghc/compiler/deSugar/DsUtils.lhs b/ghc/compiler/deSugar/DsUtils.lhs

index b77bb96..1465554 100644 (file)
--- a/ghc/compiler/deSugar/DsUtils.lhs
+++ b/ghc/compiler/deSugar/DsUtils.lhs
@@ -69,11 +69,12 @@ import PrelNames    ( unpackCStringName, unpackCStringUtf8Name,
                           plusIntegerName, timesIntegerName, smallIntegerDataConName, 
                           lengthPName, indexPName )
  import Outputable
-import UnicodeUtil      ( intsToUtf8 )
  import SrcLoc          ( Located(..), unLoc )
  import Util             ( isSingleton, notNull, zipEqual, sortWith )
  import ListSetOps      ( assocDefault )
  import FastString
+
+import Data.Char       ( ord )
  \end{code}
  
  
@@ -469,7 +470,7 @@ mkSmallIntegerLit small_integer_data_con i = mkConApp small_integer_data_con [mk
  mkStringExpr str = mkStringExprFS (mkFastString str)
  
  mkStringExprFS str
-  | nullFastString str
+  | nullFS str
    = returnDs (mkNilExpr charTy)
  
    | lengthFS str == 1
@@ -478,17 +479,17 @@ mkStringExprFS str
      in
      returnDs (mkConsExpr charTy the_char (mkNilExpr charTy))
  
-  | all safeChar int_chars
+  | all safeChar chars
    = dsLookupGlobalId unpackCStringName `thenDs` \ unpack_id ->
      returnDs (App (Var unpack_id) (Lit (MachStr str)))
  
    | otherwise
    = dsLookupGlobalId unpackCStringUtf8Name     `thenDs` \ unpack_id ->
-    returnDs (App (Var unpack_id) (Lit (MachStr (mkFastString (intsToUtf8 int_chars)))))
+    returnDs (App (Var unpack_id) (Lit (MachStr str)))
  
    where
-    int_chars = unpackIntFS str
-    safeChar c = c >= 1 && c <= 0xFF
+    chars = unpackFS str
+    safeChar c = ord c >= 1 && ord c <= 0xFF
  \end{code}
  
  
diff --git a/ghc/compiler/ghci/ByteCodeGen.lhs b/ghc/compiler/ghci/ByteCodeGen.lhs

index 4d2fa73..f526ed9 100644 (file)
--- a/ghc/compiler/ghci/ByteCodeGen.lhs
+++ b/ghc/compiler/ghci/ByteCodeGen.lhs
@@ -53,7 +53,8 @@ import OrdList
  import Constants       ( wORD_SIZE )
  
  import Data.List       ( intersperse, sortBy, zip4, zip5, partition )
-import Foreign         ( Ptr, castPtr, mallocBytes, pokeByteOff, Word8 )
+import Foreign         ( Ptr, castPtr, mallocBytes, pokeByteOff, Word8,
+                         withForeignPtr )
  import Foreign.C       ( CInt )
  import Control.Exception       ( throwDyn )
  
@@ -1084,18 +1085,18 @@ pushAtom d p (AnnLit lit)
          pushStr s 
             = let getMallocvilleAddr
                      = case s of
-                         FastString _ l ba -> 
-                            -- sigh, a string in the heap is no good to us.
-                            -- We need a static C pointer, since the type of 
-                            -- a string literal is Addr#.  So, copy the string 
-                            -- into C land and remember the pointer so we can
-                           -- free it later.
-                            let n = I# l
-                            -- CAREFUL!  Chars are 32 bits in ghc 4.09+
-                            in  ioToBc (mallocBytes (n+1)) `thenBc` \ ptr ->
+                         FastString _ n _ fp _ -> 
+                           -- we could grab the Ptr from the ForeignPtr,
+                           -- but then we have no way to control its lifetime.
+                           -- In reality it'll probably stay alive long enoungh
+                           -- by virtue of the global FastString table, but
+                           -- to be on the safe side we copy the string into
+                           -- a malloc'd area of memory.
+                                ioToBc (mallocBytes (n+1)) `thenBc` \ ptr ->
                                  recordMallocBc ptr         `thenBc_`
                                  ioToBc (
-                                   do memcpy ptr ba (fromIntegral n)
+                                   withForeignPtr fp $ \p -> do
+                                     memcpy ptr p (fromIntegral n)
                                       pokeByteOff ptr n (fromIntegral (ord '\0') :: Word8)
                                        return ptr
                                     )
@@ -1110,7 +1111,7 @@ pushAtom d p other
                (pprCoreExpr (deAnnotate (undefined, other)))
  
  foreign import ccall unsafe "memcpy"
- memcpy :: Ptr a -> ByteArray# -> CInt -> IO ()
+ memcpy :: Ptr a -> Ptr b -> CInt -> IO ()
  
  
  -- -----------------------------------------------------------------------------
diff --git a/ghc/compiler/ghci/ByteCodeLink.lhs b/ghc/compiler/ghci/ByteCodeLink.lhs

index ee64b8a..875f1d6 100644 (file)
--- a/ghc/compiler/ghci/ByteCodeLink.lhs
+++ b/ghc/compiler/ghci/ByteCodeLink.lhs
@@ -21,10 +21,10 @@ import ObjLink              ( lookupSymbol )
  
  import Name            ( Name,  nameModule, nameOccName, isExternalName )
  import NameEnv
-import OccName         ( occNameString )
+import OccName         ( occNameFS )
  import PrimOp          ( PrimOp, primOpOcc )
-import Module          ( moduleString  )
-import FastString      ( FastString(..), unpackFS )
+import Module          ( moduleFS )
+import FastString      ( FastString(..), unpackFS, zEncodeFS )
  import Outputable
  import Panic            ( GhcException(..) )
  
@@ -256,12 +256,12 @@ linkFail who what
  -- HACKS!!!  ToDo: cleaner
  nameToCLabel :: Name -> String{-suffix-} -> String
  nameToCLabel n suffix
-   = moduleString (nameModule n)
-     ++ '_':occNameString (nameOccName n) ++ '_':suffix
+   = unpackFS (zEncodeFS (moduleFS (nameModule n)))
+     ++ '_': unpackFS (zEncodeFS (occNameFS (nameOccName n))) ++ '_':suffix
  
  primopToCLabel :: PrimOp -> String{-suffix-} -> String
  primopToCLabel primop suffix
-   = let str = "GHCziPrimopWrappers_" ++ occNameString (primOpOcc primop) ++ '_':suffix
+   = let str = "GHCziPrimopWrappers_" ++ unpackFS (zEncodeFS (occNameFS (primOpOcc primop))) ++ '_':suffix
       in --trace ("primopToCLabel: " ++ str)
          str
  \end{code}
diff --git a/ghc/compiler/ghci/InteractiveUI.hs b/ghc/compiler/ghci/InteractiveUI.hs

index 0bf37dc..8fee9ba 100644 (file)
--- a/ghc/compiler/ghci/InteractiveUI.hs
+++ b/ghc/compiler/ghci/InteractiveUI.hs
@@ -25,10 +25,10 @@ import PprTyThing
  import Outputable
  
  -- for createtags (should these come via GHC?)
-import Module( moduleUserString )
-import Name( nameSrcLoc, nameModule, nameOccName )
-import OccName( pprOccName )
-import SrcLoc( isGoodSrcLoc, srcLocFile, srcLocLine, srcLocCol )
+import Module          ( moduleString )
+import Name            ( nameSrcLoc, nameModule, nameOccName )
+import OccName         ( pprOccName )
+import SrcLoc          ( isGoodSrcLoc, srcLocFile, srcLocLine, srcLocCol )
  
  -- Other random utilities
  import Digraph         ( flattenSCCs )
@@ -813,7 +813,7 @@ createTagsFile session tagskind tagFile = do
          is_interpreted <- GHC.moduleIsInterpreted session m
          -- should we just skip these?
          when (not is_interpreted) $
-          throwDyn (CmdLineError ("module '" ++ moduleUserString m ++ "' is not interpreted"))
+          throwDyn (CmdLineError ("module '" ++ moduleString m ++ "' is not interpreted"))
  
          mbModInfo <- GHC.getModuleInfo session m
          let unqual 
diff --git a/ghc/compiler/hsSyn/Convert.lhs b/ghc/compiler/hsSyn/Convert.lhs

index 96623bb..9dddd29 100644 (file)
--- a/ghc/compiler/hsSyn/Convert.lhs
+++ b/ghc/compiler/hsSyn/Convert.lhs
@@ -20,7 +20,8 @@ import qualified Name ( Name, mkInternalName, getName )
  import Module   ( Module, mkModule )
  import RdrHsSyn        ( mkClassDecl, mkTyData )
  import qualified OccName
-import OccName ( startsVarId, startsVarSym, startsConId, startsConSym )
+import OccName ( startsVarId, startsVarSym, startsConId, startsConSym,
+                 pprNameSpace )
  import SrcLoc  ( Located(..), SrcSpan )
  import Type    ( Type )
  import TysWiredIn ( unitTyCon, tupleTyCon, tupleCon, trueDataCon, nilDataCon, consDataCon )
@@ -550,7 +551,7 @@ okOcc ns str@(c:_)
  
  badOcc :: OccName.NameSpace -> String -> SDoc
  badOcc ctxt_ns occ 
-  = ptext SLIT("Illegal") <+> text (OccName.nameSpaceString ctxt_ns) 
+  = ptext SLIT("Illegal") <+> pprNameSpace ctxt_ns
         <+> ptext SLIT("name:") <+> quotes (text occ)
  
  thRdrName :: OccName.NameSpace -> String -> TH.NameFlavour -> RdrName
@@ -605,7 +606,7 @@ mk_uniq_occ ns occ uniq
  
  -- The packing and unpacking is rather turgid :-(
  mk_occ :: OccName.NameSpace -> String -> OccName.OccName
-mk_occ ns occ = OccName.mkOccFS ns (mkFastString occ)
+mk_occ ns occ = OccName.mkOccNameFS ns (mkFastString occ)
  
  mk_ghc_ns :: TH.NameSpace -> OccName.NameSpace
  mk_ghc_ns TH.DataName  = OccName.dataName
diff --git a/ghc/compiler/hsSyn/HsDecls.lhs b/ghc/compiler/hsSyn/HsDecls.lhs

index c977496..5253d11 100644 (file)
--- a/ghc/compiler/hsSyn/HsDecls.lhs
+++ b/ghc/compiler/hsSyn/HsDecls.lhs
@@ -721,8 +721,8 @@ instance Outputable ForeignImport where
          ptext SLIT("dynamic")
        pprCEntity _      _   (CWrapper) = ptext SLIT("wrapper")
        --
-      pprLib lib | nullFastString lib = empty
-                | otherwise          = char '[' <> ppr lib <> char ']'
+      pprLib lib | nullFS lib = empty
+                | otherwise  = char '[' <> ppr lib <> char ']'
  
  instance Outputable ForeignExport where
    ppr (CExport  (CExportStatic lbl cconv)) = 
diff --git a/ghc/compiler/hsSyn/HsUtils.lhs b/ghc/compiler/hsSyn/HsUtils.lhs

index 612e57a..f8efa6c 100644 (file)
--- a/ghc/compiler/hsSyn/HsUtils.lhs
+++ b/ghc/compiler/hsSyn/HsUtils.lhs
@@ -27,7 +27,7 @@ import RdrName                ( RdrName, getRdrName, mkRdrUnqual )
  import Var             ( Id )
  import Type            ( Type )
  import DataCon         ( DataCon, dataConWrapId, dataConSourceArity )
-import OccName         ( mkVarOcc )
+import OccName         ( mkVarOccFS )
  import Name            ( Name )
  import BasicTypes      ( RecFlag(..) )
  import SrcLoc
@@ -136,7 +136,7 @@ mkHsOpApp e1 op e2 = OpApp e1 (noLoc (HsVar op)) (error "mkOpApp:fixity") e2
  
  mkHsSplice e = HsSplice unqualSplice e
  
-unqualSplice = mkRdrUnqual (mkVarOcc FSLIT("splice"))
+unqualSplice = mkRdrUnqual (mkVarOccFS FSLIT("splice"))
                 -- A name (uniquified later) to
                 -- identify the splice
  
diff --git a/ghc/compiler/iface/LoadIface.lhs b/ghc/compiler/iface/LoadIface.lhs

index 356cf22..8c496f7 100644 (file)
--- a/ghc/compiler/iface/LoadIface.lhs
+++ b/ghc/compiler/iface/LoadIface.lhs
@@ -45,7 +45,7 @@ import NameEnv
  import MkId            ( seqId )
  import Module          ( Module, ModLocation(ml_hi_file), emptyModuleEnv, 
                           addBootSuffix_maybe,
-                         extendModuleEnv, lookupModuleEnv, moduleUserString
+                         extendModuleEnv, lookupModuleEnv, moduleString
                         )
  import OccName         ( OccName, mkOccEnv, lookupOccEnv, mkClassTyConOcc, mkClassDataConOcc,
                           mkSuperDictSelOcc, mkDataConWrapperOcc, mkDataConWorkerOcc )
@@ -312,7 +312,7 @@ loadDecl ignore_prags mod (_version, decl)
         -- imported name, to fix the module correctly in the cache
      mk_new_bndr mod mb_parent occ 
         = newGlobalBinder mod occ mb_parent 
-                         (importedSrcLoc (moduleUserString mod))
+                         (importedSrcLoc (moduleString mod))
  
      doc = ptext SLIT("Declaration for") <+> ppr (ifName decl)
  
diff --git a/ghc/compiler/iface/MkIface.lhs b/ghc/compiler/iface/MkIface.lhs

index 5be56bf..2f15ee3 100644 (file)
--- a/ghc/compiler/iface/MkIface.lhs
+++ b/ghc/compiler/iface/MkIface.lhs
@@ -214,7 +214,7 @@ import OccName              ( OccName, OccEnv, mkOccEnv, lookupOccEnv, emptyOccEnv,
                           isEmptyOccSet, intersectOccSet, intersectsOccSet,
                           occNameFS, isTcOcc )
  import Module          ( Module, moduleFS,
-                         ModLocation(..), mkSysModuleFS, moduleUserString,
+                         ModLocation(..), mkModuleFS, moduleString,
                           ModuleEnv, emptyModuleEnv, lookupModuleEnv,
                           extendModuleEnv_C
                         )
@@ -726,7 +726,7 @@ mkIfaceExports :: NameSet -> [(Module, [GenAvailInfo OccName])]
    -- Group by module and sort by occurrence
    -- This keeps the list in canonical order
  mkIfaceExports exports 
-  = [ (mkSysModuleFS fs, eltsFM avails)
+  = [ (mkModuleFS fs, eltsFM avails)
      | (fs, avails) <- fmToList groupFM
      ]
    where
@@ -768,7 +768,7 @@ checkOldIface :: HscEnv
  
  checkOldIface hsc_env mod_summary source_unchanged maybe_iface
    = do { showPass (hsc_dflags hsc_env) 
-                  ("Checking old interface for " ++ moduleUserString (ms_mod mod_summary)) ;
+                  ("Checking old interface for " ++ moduleString (ms_mod mod_summary)) ;
  
         ; initIfaceCheck hsc_env $
           check_old_iface mod_summary source_unchanged maybe_iface
diff --git a/ghc/compiler/main/DriverMkDepend.hs b/ghc/compiler/main/DriverMkDepend.hs

index fe2d8f3..80d906c 100644 (file)
--- a/ghc/compiler/main/DriverMkDepend.hs
+++ b/ghc/compiler/main/DriverMkDepend.hs
@@ -20,7 +20,7 @@ import HscTypes               ( HscEnv, IsBootInterface, msObjFilePath, msHsFilePath )
  import Packages                ( PackageIdH(..) )
  import SysTools                ( newTempName )
  import qualified SysTools
-import Module          ( Module, ModLocation(..), mkModule, moduleUserString,
+import Module          ( Module, ModLocation(..), mkModule, 
                           addBootSuffix_maybe )
  import Digraph         ( SCC(..) )
  import Finder          ( findModule, FindResult(..) )
diff --git a/ghc/compiler/main/DriverPipeline.hs b/ghc/compiler/main/DriverPipeline.hs

index 85099e8..171cecf 100644 (file)
--- a/ghc/compiler/main/DriverPipeline.hs
+++ b/ghc/compiler/main/DriverPipeline.hs
@@ -1396,9 +1396,9 @@ getOptionsFromSource file
                        | otherwise -> return []
  
  getOptionsFromStringBuffer :: StringBuffer -> FilePath -> [(Int,String)]
-getOptionsFromStringBuffer buffer@(StringBuffer _ len# _) fn = 
+getOptionsFromStringBuffer buffer@(StringBuffer _ len _) fn = 
    let 
-       ls = lines (lexemeToString buffer (I# len#))  -- lazy, so it's ok
+       ls = lines (lexemeToString buffer len)  -- lazy, so it's ok
    in
    look 1 ls
    where
diff --git a/ghc/compiler/main/Finder.lhs b/ghc/compiler/main/Finder.lhs

index 81dedb8..fbde40f 100644 (file)
--- a/ghc/compiler/main/Finder.lhs
+++ b/ghc/compiler/main/Finder.lhs
@@ -267,7 +267,7 @@ searchPathExts paths mod exts
         return result
  
    where
-    basename = dots_to_slashes (moduleUserString mod)
+    basename = dots_to_slashes (moduleString mod)
  
      to_search :: [(FilePath, IO FinderCacheEntry)]
      to_search = [ (file, fn path basename)
@@ -347,7 +347,7 @@ mkHomeModLocation2 :: DynFlags
                    -> String    -- Suffix
                    -> IO ModLocation
  mkHomeModLocation2 dflags mod src_basename ext = do
-   let mod_basename = dots_to_slashes (moduleUserString mod)
+   let mod_basename = dots_to_slashes (moduleString mod)
  
     obj_fn  <- mkObjPath  dflags src_basename mod_basename
     hi_fn   <- mkHiPath   dflags src_basename mod_basename
@@ -420,7 +420,7 @@ mkStubPaths dflags mod location
    = let
                 stubdir = stubDir dflags
  
-               mod_basename = dots_to_slashes (moduleUserString mod)
+               mod_basename = dots_to_slashes (moduleString mod)
                 src_basename = basenameOf (expectJust "mkStubPaths" 
                                                 (ml_hs_file location))
  
diff --git a/ghc/compiler/main/GHC.hs b/ghc/compiler/main/GHC.hs

index 77cd9d4..7e0ec2f 100644 (file)
--- a/ghc/compiler/main/GHC.hs
+++ b/ghc/compiler/main/GHC.hs
@@ -628,7 +628,7 @@ load2 s@(Session ref) how_much mod_graph = do
               when (ghci_mode == BatchCompile && isJust ofile && not do_linking) $
                 debugTraceMsg dflags 1 (text ("Warning: output was redirected with -o, " ++
                                               "but no output will be generated\n" ++
-                                             "because there is no " ++ moduleUserString main_mod ++ " module."))
+                                             "because there is no " ++ moduleString main_mod ++ " module."))
  
               -- link everything together
                linkresult <- link ghci_mode dflags do_linking (hsc_HPT hsc_env1)
diff --git a/ghc/compiler/main/HscTypes.lhs b/ghc/compiler/main/HscTypes.lhs

index 20e84ab..48041c0 100644 (file)
--- a/ghc/compiler/main/HscTypes.lhs
+++ b/ghc/compiler/main/HscTypes.lhs
@@ -996,7 +996,7 @@ showModMsg use_object mod_summary
                     char ')'])
   where 
      mod     = ms_mod mod_summary 
-    mod_str = moduleUserString mod ++ hscSourceString (ms_hsc_src mod_summary)
+    mod_str = moduleString mod ++ hscSourceString (ms_hsc_src mod_summary)
  \end{code}
  
  
diff --git a/ghc/compiler/nativeGen/PprMach.hs b/ghc/compiler/nativeGen/PprMach.hs

index 69d6573..4392ae7 100644 (file)
--- a/ghc/compiler/nativeGen/PprMach.hs
+++ b/ghc/compiler/nativeGen/PprMach.hs
@@ -697,17 +697,11 @@ pprLabel :: CLabel -> Doc
  pprLabel lbl = pprGloblDecl lbl $$ (pprCLabel_asm lbl <> char ':')
  
  
--- Assume we want to backslash-convert the string
  pprASCII str
-  = vcat (map do1 (str ++ [chr 0]))
+  = vcat (map do1 str) $$ do1 0
      where
-       do1 :: Char -> Doc
-       do1 c = ptext SLIT("\t.byte\t0x") <> hshow (ord c)
-
-       hshow :: Int -> Doc
-       hshow n | n >= 0 && n <= 255
-               = char (tab !! (n `div` 16)) <> char (tab !! (n `mod` 16))
-       tab = "0123456789ABCDEF"
+       do1 :: Word8 -> Doc
+       do1 w = ptext SLIT("\t.byte\t") <> int (fromIntegral w)
  
  pprAlign bytes =
         IF_ARCH_alpha(ptextSLIT(".align ") <> int pow2,
diff --git a/ghc/compiler/ndpFlatten/FlattenMonad.hs b/ghc/compiler/ndpFlatten/FlattenMonad.hs

index 43e804c..4540508 100644 (file)
--- a/ghc/compiler/ndpFlatten/FlattenMonad.hs
+++ b/ghc/compiler/ndpFlatten/FlattenMonad.hs
@@ -67,7 +67,6 @@ import Monad      (mplus)
  import Panic        (panic)
  import Outputable   (Outputable(ppr), pprPanic)
  import UniqSupply   (UniqSupply, splitUniqSupply, uniqFromSupply)
-import OccName     (UserFS)
  import Var          (Var, idType)
  import Id          (Id, mkSysLocal)
  import Name        (Name)
@@ -86,6 +85,7 @@ import PrimOp     ( PrimOp(..) )
  import PrelInfo            ( primOpId )
  import CoreSyn      (Expr(..), Bind(..), CoreBndr, CoreExpr, CoreBind, mkApps)
  import CoreUtils    (exprType)
+import FastString   (FastString)
  
  -- friends
  import NDPCoreUtils (parrElemTy)
@@ -176,7 +176,7 @@ runFlatten hsc_env eps us m
  -- generate a new local variable whose name is based on the given lexeme and
  -- whose type is as specified in the second argument (EXPORTED)
  --
-newVar           :: UserFS -> Type -> Flatten Var
+newVar           :: FastString -> Type -> Flatten Var
  newVar lexeme ty  = Flatten $ \state ->
    let
      (us1, us2) = splitUniqSupply (us state)
@@ -187,7 +187,7 @@ newVar lexeme ty  = Flatten $ \state ->
  -- generate a non-recursive binding using a new binder whose name is derived
  -- from the given lexeme (EXPORTED)
  --
-mkBind          :: UserFS -> CoreExpr -> Flatten (CoreBndr, CoreBind)
+mkBind          :: FastString -> CoreExpr -> Flatten (CoreBndr, CoreBind)
  mkBind lexeme e  =
    do
      v <- newVar lexeme (exprType e)
diff --git a/ghc/compiler/parser/Ctype.lhs b/ghc/compiler/parser/Ctype.lhs

index dfdb94a..dbe4e9f 100644 (file)
--- a/ghc/compiler/parser/Ctype.lhs
+++ b/ghc/compiler/parser/Ctype.lhs
@@ -9,6 +9,7 @@ module Ctype
         , is_lower      -- Char# -> Bool
         , is_upper      -- Char# -> Bool
         , is_digit      -- Char# -> Bool
+       , is_alphanum   -- Char# -> Bool
  
         , is_hexdigit, is_octdigit
         , hexDigit, octDecDigit
@@ -50,6 +51,7 @@ is_space  = is_ctype cSpace
  is_lower  = is_ctype cLower
  is_upper  = is_ctype cUpper
  is_digit  = is_ctype cDigit
+is_alphanum = is_ctype (cLower+cUpper+cDigit)
  \end{code}
  
  Utils
@@ -241,99 +243,99 @@ charType c = case c of
     '\158' -> 0                         -- \236
     '\159' -> 0                         -- \237
     '\160' -> cSpace                    --
-   '\161' -> cAny + cSymbol            -- ¡
-   '\162' -> cAny + cSymbol            -- ¢
-   '\163' -> cAny + cSymbol            -- £
-   '\164' -> cAny + cSymbol            -- ¤
-   '\165' -> cAny + cSymbol            -- ¥
-   '\166' -> cAny + cSymbol            -- ¦
-   '\167' -> cAny + cSymbol            -- §
-   '\168' -> cAny + cSymbol            -- ¨
-   '\169' -> cAny + cSymbol            -- ©
-   '\170' -> cAny + cSymbol            -- ª
-   '\171' -> cAny + cSymbol            -- «
-   '\172' -> cAny + cSymbol            -- ¬
-   '\173' -> cAny + cSymbol            -- 
-   '\174' -> cAny + cSymbol            -- ®
-   '\175' -> cAny + cSymbol            -- ¯
-   '\176' -> cAny + cSymbol            -- °
-   '\177' -> cAny + cSymbol            -- ±
-   '\178' -> cAny + cSymbol            -- ²
-   '\179' -> cAny + cSymbol            -- ³
-   '\180' -> cAny + cSymbol            -- ´
-   '\181' -> cAny + cSymbol            -- µ
-   '\182' -> cAny + cSymbol            -- ¶
-   '\183' -> cAny + cSymbol            -- ·
-   '\184' -> cAny + cSymbol            -- ¸
-   '\185' -> cAny + cSymbol            -- ¹
-   '\186' -> cAny + cSymbol            -- º
-   '\187' -> cAny + cSymbol            -- »
-   '\188' -> cAny + cSymbol            -- ¼
-   '\189' -> cAny + cSymbol            -- ½
-   '\190' -> cAny + cSymbol            -- ¾
-   '\191' -> cAny + cSymbol            -- ¿
-   '\192' -> cAny + cIdent  + cUpper   -- À
-   '\193' -> cAny + cIdent  + cUpper   -- Á
-   '\194' -> cAny + cIdent  + cUpper   -- Â
-   '\195' -> cAny + cIdent  + cUpper   -- Ã
-   '\196' -> cAny + cIdent  + cUpper   -- Ä
-   '\197' -> cAny + cIdent  + cUpper   -- Å
-   '\198' -> cAny + cIdent  + cUpper   -- Æ
-   '\199' -> cAny + cIdent  + cUpper   -- Ç
-   '\200' -> cAny + cIdent  + cUpper   -- È
-   '\201' -> cAny + cIdent  + cUpper   -- É
-   '\202' -> cAny + cIdent  + cUpper   -- Ê
-   '\203' -> cAny + cIdent  + cUpper   -- Ë
-   '\204' -> cAny + cIdent  + cUpper   -- Ì
-   '\205' -> cAny + cIdent  + cUpper   -- Í
-   '\206' -> cAny + cIdent  + cUpper   -- Î
-   '\207' -> cAny + cIdent  + cUpper   -- Ï
-   '\208' -> cAny + cIdent  + cUpper   -- Ð
-   '\209' -> cAny + cIdent  + cUpper   -- Ñ
-   '\210' -> cAny + cIdent  + cUpper   -- Ò
-   '\211' -> cAny + cIdent  + cUpper   -- Ó
-   '\212' -> cAny + cIdent  + cUpper   -- Ô
-   '\213' -> cAny + cIdent  + cUpper   -- Õ
-   '\214' -> cAny + cIdent  + cUpper   -- Ö
-   '\215' -> cAny + cSymbol + cLower   -- ×
-   '\216' -> cAny + cIdent  + cUpper   -- Ø
-   '\217' -> cAny + cIdent  + cUpper   -- Ù
-   '\218' -> cAny + cIdent  + cUpper   -- Ú
-   '\219' -> cAny + cIdent  + cUpper   -- Û
-   '\220' -> cAny + cIdent  + cUpper   -- Ü
-   '\221' -> cAny + cIdent  + cUpper   -- Ý
-   '\222' -> cAny + cIdent  + cUpper   -- Þ
-   '\223' -> cAny + cIdent             -- ß
-   '\224' -> cAny + cIdent  + cLower   -- à
-   '\225' -> cAny + cIdent  + cLower   -- á
-   '\226' -> cAny + cIdent  + cLower   -- â
-   '\227' -> cAny + cIdent  + cLower   -- ã
-   '\228' -> cAny + cIdent  + cLower   -- ä
-   '\229' -> cAny + cIdent  + cLower   -- å
-   '\230' -> cAny + cIdent  + cLower   -- æ
-   '\231' -> cAny + cIdent  + cLower   -- ç
-   '\232' -> cAny + cIdent  + cLower   -- è
-   '\233' -> cAny + cIdent  + cLower   -- é
-   '\234' -> cAny + cIdent  + cLower   -- ê
-   '\235' -> cAny + cIdent  + cLower   -- ë
-   '\236' -> cAny + cIdent  + cLower   -- ì
-   '\237' -> cAny + cIdent  + cLower   -- í
-   '\238' -> cAny + cIdent  + cLower   -- î
-   '\239' -> cAny + cIdent  + cLower   -- ï
-   '\240' -> cAny + cIdent  + cLower   -- ð
-   '\241' -> cAny + cIdent  + cLower   -- ñ
-   '\242' -> cAny + cIdent  + cLower   -- ò
-   '\243' -> cAny + cIdent  + cLower   -- ó
-   '\244' -> cAny + cIdent  + cLower   -- ô
-   '\245' -> cAny + cIdent  + cLower   -- õ
-   '\246' -> cAny + cIdent  + cLower   -- ö
-   '\247' -> cAny + cSymbol            -- ÷
-   '\248' -> cAny + cIdent             -- ø
-   '\249' -> cAny + cIdent  + cLower   -- ù
-   '\250' -> cAny + cIdent  + cLower   -- ú
-   '\251' -> cAny + cIdent  + cLower   -- û
-   '\252' -> cAny + cIdent  + cLower   -- ü
-   '\253' -> cAny + cIdent  + cLower   -- ý
-   '\254' -> cAny + cIdent  + cLower   -- þ
-   '\255' -> cAny + cIdent  + cLower   -- ÿ
+   '\161' -> cAny + cSymbol            -- ¡
+   '\162' -> cAny + cSymbol            -- ¢
+   '\163' -> cAny + cSymbol            -- £
+   '\164' -> cAny + cSymbol            -- ¤
+   '\165' -> cAny + cSymbol            -- ¥
+   '\166' -> cAny + cSymbol            -- ¦
+   '\167' -> cAny + cSymbol            -- §
+   '\168' -> cAny + cSymbol            -- ¨
+   '\169' -> cAny + cSymbol            -- ©
+   '\170' -> cAny + cSymbol            -- ª
+   '\171' -> cAny + cSymbol            -- «
+   '\172' -> cAny + cSymbol            -- ¬
+   '\173' -> cAny + cSymbol            -- 
+   '\174' -> cAny + cSymbol            -- ®
+   '\175' -> cAny + cSymbol            -- ¯
+   '\176' -> cAny + cSymbol            -- °
+   '\177' -> cAny + cSymbol            -- ±
+   '\178' -> cAny + cSymbol            -- ²
+   '\179' -> cAny + cSymbol            -- ³
+   '\180' -> cAny + cSymbol            -- ´
+   '\181' -> cAny + cSymbol            -- µ
+   '\182' -> cAny + cSymbol            -- ¶
+   '\183' -> cAny + cSymbol            -- ·
+   '\184' -> cAny + cSymbol            -- ¸
+   '\185' -> cAny + cSymbol            -- ¹
+   '\186' -> cAny + cSymbol            -- º
+   '\187' -> cAny + cSymbol            -- »
+   '\188' -> cAny + cSymbol            -- ¼
+   '\189' -> cAny + cSymbol            -- ½
+   '\190' -> cAny + cSymbol            -- ¾
+   '\191' -> cAny + cSymbol            -- ¿
+   '\192' -> cAny + cIdent  + cUpper   -- À
+   '\193' -> cAny + cIdent  + cUpper   -- Á
+   '\194' -> cAny + cIdent  + cUpper   -- Â
+   '\195' -> cAny + cIdent  + cUpper   -- Ã
+   '\196' -> cAny + cIdent  + cUpper   -- Ä
+   '\197' -> cAny + cIdent  + cUpper   -- Å
+   '\198' -> cAny + cIdent  + cUpper   -- Æ
+   '\199' -> cAny + cIdent  + cUpper   -- Ç
+   '\200' -> cAny + cIdent  + cUpper   -- È
+   '\201' -> cAny + cIdent  + cUpper   -- É
+   '\202' -> cAny + cIdent  + cUpper   -- Ê
+   '\203' -> cAny + cIdent  + cUpper   -- Ë
+   '\204' -> cAny + cIdent  + cUpper   -- Ì
+   '\205' -> cAny + cIdent  + cUpper   -- Í
+   '\206' -> cAny + cIdent  + cUpper   -- Î
+   '\207' -> cAny + cIdent  + cUpper   -- Ï
+   '\208' -> cAny + cIdent  + cUpper   -- Ð
+   '\209' -> cAny + cIdent  + cUpper   -- Ñ
+   '\210' -> cAny + cIdent  + cUpper   -- Ò
+   '\211' -> cAny + cIdent  + cUpper   -- Ó
+   '\212' -> cAny + cIdent  + cUpper   -- Ô
+   '\213' -> cAny + cIdent  + cUpper   -- Õ
+   '\214' -> cAny + cIdent  + cUpper   -- Ö
+   '\215' -> cAny + cSymbol + cLower   -- ×
+   '\216' -> cAny + cIdent  + cUpper   -- Ø
+   '\217' -> cAny + cIdent  + cUpper   -- Ù
+   '\218' -> cAny + cIdent  + cUpper   -- Ú
+   '\219' -> cAny + cIdent  + cUpper   -- Û
+   '\220' -> cAny + cIdent  + cUpper   -- Ü
+   '\221' -> cAny + cIdent  + cUpper   -- Ý
+   '\222' -> cAny + cIdent  + cUpper   -- Þ
+   '\223' -> cAny + cIdent             -- ß
+   '\224' -> cAny + cIdent  + cLower   -- à
+   '\225' -> cAny + cIdent  + cLower   -- á
+   '\226' -> cAny + cIdent  + cLower   -- â
+   '\227' -> cAny + cIdent  + cLower   -- ã
+   '\228' -> cAny + cIdent  + cLower   -- ä
+   '\229' -> cAny + cIdent  + cLower   -- å
+   '\230' -> cAny + cIdent  + cLower   -- æ
+   '\231' -> cAny + cIdent  + cLower   -- ç
+   '\232' -> cAny + cIdent  + cLower   -- è
+   '\233' -> cAny + cIdent  + cLower   -- é
+   '\234' -> cAny + cIdent  + cLower   -- ê
+   '\235' -> cAny + cIdent  + cLower   -- ë
+   '\236' -> cAny + cIdent  + cLower   -- ì
+   '\237' -> cAny + cIdent  + cLower   -- í
+   '\238' -> cAny + cIdent  + cLower   -- î
+   '\239' -> cAny + cIdent  + cLower   -- ï
+   '\240' -> cAny + cIdent  + cLower   -- ð
+   '\241' -> cAny + cIdent  + cLower   -- ñ
+   '\242' -> cAny + cIdent  + cLower   -- ò
+   '\243' -> cAny + cIdent  + cLower   -- ó
+   '\244' -> cAny + cIdent  + cLower   -- ô
+   '\245' -> cAny + cIdent  + cLower   -- õ
+   '\246' -> cAny + cIdent  + cLower   -- ö
+   '\247' -> cAny + cSymbol            -- ÷
+   '\248' -> cAny + cIdent             -- ø
+   '\249' -> cAny + cIdent  + cLower   -- ù
+   '\250' -> cAny + cIdent  + cLower   -- ú
+   '\251' -> cAny + cIdent  + cLower   -- û
+   '\252' -> cAny + cIdent  + cLower   -- ü
+   '\253' -> cAny + cIdent  + cLower   -- ý
+   '\254' -> cAny + cIdent  + cLower   -- þ
+   '\255' -> cAny + cIdent  + cLower   -- ÿ
  \end{code}
diff --git a/ghc/compiler/parser/Lexer.x b/ghc/compiler/parser/Lexer.x

index eb00e90..38908a0 100644 (file)
--- a/ghc/compiler/parser/Lexer.x
+++ b/ghc/compiler/parser/Lexer.x
@@ -1,5 +1,5 @@
  -----------------------------------------------------------------------------
--- (c) The University of Glasgow, 2003
+-- (c) The University of Glasgow, 2006
  --
  -- GHC's lexer.
  --
@@ -43,35 +43,38 @@ import Ctype
  import Util            ( maybePrefixMatch, readRational )
  
  import DATA_BITS
-import Char
+import Data.Char
  import Ratio
  --import TRACE
  }
  
-$whitechar   = [\ \t\n\r\f\v\xa0]
+$unispace    = \x05
+$whitechar   = [\ \t\n\r\f\v\xa0 $unispace]
  $white_no_nl = $whitechar # \n
  
  $ascdigit  = 0-9
-$unidigit  = \x01
+$unidigit  = \x03
+$decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  $digit     = [$ascdigit $unidigit]
  
  $special   = [\(\)\,\;\[\]\`\{\}]
  $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
-$unisymbol = \x02
+$unisymbol = \x04
  $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  
-$unilarge  = \x03
+$unilarge  = \x01
  $asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
  $large     = [$asclarge $unilarge]
  
-$unismall  = \x04
+$unismall  = \x02
  $ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
  $small     = [$ascsmall $unismall \_]
  
-$graphic   = [$small $large $symbol $digit $special \:\"\']
+$unigraphic = \x06
+$graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
  
  $octit    = 0-7
-$hexit     = [$digit A-F a-f]
+$hexit     = [$decdigit A-F a-f]
  $symchar   = [$symbol \:]
  $nl        = [\n\r]
  $idchar    = [$small $large $digit \']
@@ -82,7 +85,7 @@ $idchar    = [$small $large $digit \']
  @varsym    = $symbol $symchar*
  @consym    = \: $symchar*
  
-@decimal     = $digit+
+@decimal     = $decdigit+
  @octal       = $octit+
  @hexadecimal = $hexit+
  @exponent    = [eE] [\-\+]? @decimal
@@ -154,13 +157,13 @@ $white_no_nl+                             ;
  
  -- single-line line pragmas, of the form
  --    # <line> "<file>" <extra-stuff> \n
-<line_prag1> $digit+                   { setLine line_prag1a }
+<line_prag1> $decdigit+                        { setLine line_prag1a }
  <line_prag1a> \" [$graphic \ ]* \"     { setFile line_prag1b }
  <line_prag1b> .*                       { pop }
  
  -- Haskell-style line pragmas, of the form
  --    {-# LINE <line> "<file>" #-}
-<line_prag2> $digit+                   { setLine line_prag2a }
+<line_prag2> $decdigit+                        { setLine line_prag2a }
  <line_prag2a> \" [$graphic \ ]* \"     { setFile line_prag2b }
  <line_prag2b> "#-}"|"-}"               { pop }
     -- NOTE: accept -} at the end of a LINE pragma, for compatibility
@@ -554,6 +557,13 @@ reservedSymsFM = listToUFM $
         ,(">-", ITrarrowtail,   bit arrowsBit)
         ,("-<<",        ITLarrowtail,   bit arrowsBit)
         ,(">>-",        ITRarrowtail,   bit arrowsBit)
+
+#if __GLASGOW_HASKELL__ >= 605
+       ,("∀",        ITforall,       bit tvBit)
+       ,("→",   ITrarrow,    0)
+       ,("←",   ITlarrow,    0)
+       ,("⋯",        ITdotdot,       0)
+#endif
         ]
  
  -- -----------------------------------------------------------------------------
@@ -670,23 +680,29 @@ splitQualName :: StringBuffer -> Int -> (FastString,FastString)
  -- takes a StringBuffer and a length, and returns the module name
  -- and identifier parts of a qualified name.  Splits at the *last* dot,
  -- because of hierarchical module names.
-splitQualName orig_buf len = split orig_buf 0 0
+splitQualName orig_buf len = split orig_buf orig_buf
    where
-    split buf dot_off n
-       | n == len                = done dot_off
-       | lookAhead buf n == '.'  = split2 buf n (n+1)
-       | otherwise               = split buf dot_off (n+1)     
+    split buf dot_buf
+       | orig_buf `byteDiff` buf >= len  = done dot_buf
+       | c == '.'                        = found_dot buf'
+       | otherwise                       = split buf' dot_buf
+      where
+       (c,buf') = nextChar buf
    
      -- careful, we might get names like M....
      -- so, if the character after the dot is not upper-case, this is
      -- the end of the qualifier part.
-    split2 buf dot_off n
-       | isUpper (lookAhead buf n) = split buf dot_off (n+1)
-       | otherwise                 = done dot_off
-
-    done dot_off =
-       (lexemeToFastString orig_buf dot_off, 
-        lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
+    found_dot buf -- buf points after the '.'
+       | isUpper c    = split buf' buf
+       | otherwise    = done buf
+      where
+       (c,buf') = nextChar buf
+
+    done dot_buf =
+       (lexemeToFastString orig_buf (qual_size - 1),
+        lexemeToFastString dot_buf (len - qual_size))
+      where
+       qual_size = orig_buf `byteDiff` dot_buf
  
  varid span buf len = 
    case lookupUFM reservedWordsFM fs of
@@ -726,19 +742,19 @@ tok_decimal span buf len
    = return (L span (ITinteger  $! parseInteger buf len 10 octDecDigit))
  
  tok_octal span buf len 
-  = return (L span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 8 octDecDigit))
+  = return (L span (ITinteger  $! parseInteger (offsetBytes 2 buf) (len-2) 8 octDecDigit))
  
  tok_hexadecimal span buf len 
-  = return (L span (ITinteger  $! parseInteger (stepOnBy 2 buf) (len-2) 16 hexDigit))
+  = return (L span (ITinteger  $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
  
  prim_decimal span buf len 
    = return (L span (ITprimint  $! parseInteger buf (len-1) 10 octDecDigit))
  
  prim_octal span buf len 
-  = return (L span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 8 octDecDigit))
+  = return (L span (ITprimint  $! parseInteger (offsetBytes 2 buf) (len-3) 8 octDecDigit))
  
  prim_hexadecimal span buf len 
-  = return (L span (ITprimint  $! parseInteger (stepOnBy 2 buf) (len-3) 16 hexDigit))
+  = return (L span (ITprimint  $! parseInteger (offsetBytes 2 buf) (len-3) 16 hexDigit))
  
  tok_float        str = ITrational   $! readRational str
  prim_float       str = ITprimfloat  $! readRational str
@@ -839,7 +855,7 @@ lex_string_tok span buf len = do
  lex_string :: String -> P Token
  lex_string s = do
    i <- getInput
-  case alexGetChar i of
+  case alexGetChar' i of
      Nothing -> lit_error
  
      Just ('"',i)  -> do
@@ -848,14 +864,15 @@ lex_string s = do
         if glaexts
           then do
             i <- getInput
-           case alexGetChar i of
+           case alexGetChar' i of
               Just ('#',i) -> do
                    setInput i
                    if any (> '\xFF') s
                      then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
-                    else let s' = mkFastStringNarrow (reverse s) in
-                        -- always a narrow string/byte array
+                    else let s' = mkZFastString (reverse s) in
                          return (ITprimstring s')
+                       -- mkZFastString is a hack to avoid encoding the
+                       -- string in UTF-8.  We just want the exact bytes.
               _other ->
                 return (ITstring (mkFastString (reverse s)))
           else
@@ -866,11 +883,11 @@ lex_string s = do
                 setInput i; lex_string s
         | Just (c,i) <- next, is_space c -> do 
                 setInput i; lex_stringgap s
-       where next = alexGetChar i
+       where next = alexGetChar' i
  
-    Just _ -> do
-       c <- lex_char
-       lex_string (c:s)
+    Just (c, i) -> do
+       c' <- lex_char c i
+       lex_string (c':s)
  
  lex_stringgap s = do
    c <- getCharOrFail
@@ -890,7 +907,7 @@ lex_char_tok :: Action
  lex_char_tok span buf len = do -- We've seen '
     i1 <- getInput      -- Look ahead to first character
     let loc = srcSpanStart span
-   case alexGetChar i1 of
+   case alexGetChar' i1 of
         Nothing -> lit_error 
  
         Just ('\'', i2@(AI end2 _ _)) -> do     -- We've seen ''
@@ -905,14 +922,15 @@ lex_char_tok span buf len = do    -- We've seen '
                   lit_ch <- lex_escape
                   mc <- getCharOrFail   -- Trailing quote
                   if mc == '\'' then finish_char_tok loc lit_ch
-                               else lit_error 
+                               else do setInput i2; lit_error 
  
-        Just (c, i2@(AI end2 _ _)) | not (is_any c) -> lit_error
-                             | otherwise      ->
+        Just (c, i2@(AI end2 _ _)) 
+               | not (isAny c) -> lit_error
+               | otherwise ->
  
                 -- We've seen 'x, where x is a valid character
                 --  (i.e. not newline etc) but not a quote or backslash
-          case alexGetChar i2 of       -- Look ahead one more character
+          case alexGetChar' i2 of      -- Look ahead one more character
                 Nothing -> lit_error
                 Just ('\'', i3) -> do   -- We've seen 'x'
                         setInput i3 
@@ -922,7 +940,7 @@ lex_char_tok span buf len = do      -- We've seen '
                         th_exts <- extension thEnabled  
                         let (AI end _ _) = i1
                         if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
-                                  else lit_error
+                                  else do setInput i2; lit_error
  
  finish_char_tok :: SrcLoc -> Char -> P (Located Token)
  finish_char_tok loc ch -- We've already seen the closing quote
@@ -930,7 +948,7 @@ finish_char_tok loc ch      -- We've already seen the closing quote
    = do glaexts <- extension glaExtsEnabled
         i@(AI end _ _) <- getInput
         if glaexts then do
-               case alexGetChar i of
+               case alexGetChar' i of
                         Just ('#',i@(AI end _ _)) -> do
                                 setInput i
                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
@@ -939,14 +957,16 @@ finish_char_tok loc ch    -- We've already seen the closing quote
                 else do
                    return (L (mkSrcSpan loc end) (ITchar ch))
  
-lex_char :: P Char
-lex_char = do
-  mc <- getCharOrFail
-  case mc of
-      '\\' -> lex_escape
-      c | is_any c -> return c
+lex_char :: Char -> AlexInput -> P Char
+lex_char c inp = do
+  case c of
+      '\\' -> do setInput inp; lex_escape
+      c | isAny c -> do setInput inp; return c
        _other -> lit_error
  
+isAny c | c > '\xff' = isPrint c
+       | otherwise  = is_any c
+
  lex_escape :: P Char
  lex_escape = do
    c <- getCharOrFail
@@ -972,11 +992,11 @@ lex_escape = do
  
         c1 ->  do
            i <- getInput
-          case alexGetChar i of
+          case alexGetChar' i of
             Nothing -> lit_error
             Just (c2,i2) -> 
-              case alexGetChar i2 of
-               Nothing -> lit_error
+              case alexGetChar' i2 of
+               Nothing -> do setInput i2; lit_error
                 Just (c3,i3) -> 
                    let str = [c1,c2,c3] in
                    case [ (c,rest) | (p,c) <- silly_escape_chars,
@@ -991,22 +1011,22 @@ lex_escape = do
  
  readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
  readNum is_digit base conv = do
+  i <- getInput
    c <- getCharOrFail
    if is_digit c 
         then readNum2 is_digit base conv (conv c)
-       else lit_error
+       else do setInput i; lit_error
  
  readNum2 is_digit base conv i = do
    input <- getInput
    read i input
    where read i input = do
-         case alexGetChar input of
+         case alexGetChar' input of
             Just (c,input') | is_digit c -> do
                 read (i*base + conv c) input'
             _other -> do
-               setInput input
                 if i >= 0 && i <= 0x10FFFF
-                  then return (chr i)
+                  then do setInput input; return (chr i)
                    else lit_error
  
  silly_escape_chars = [
@@ -1046,12 +1066,16 @@ silly_escape_chars = [
         ("DEL", '\DEL')
         ]
  
+-- before calling lit_error, ensure that the current input is pointing to
+-- the position of the error in the buffer.  This is so that we can report
+-- a correct location to the user, but also so we can detect UTF-8 decoding
+-- errors if they occur.
  lit_error = lexError "lexical error in string/character literal"
  
  getCharOrFail :: P Char
  getCharOrFail =  do
    i <- getInput
-  case alexGetChar i of
+  case alexGetChar' i of
         Nothing -> lexError "unexpected end-of-file in string/character literal"
         Just (c,i)  -> do setInput i; return c
  
@@ -1134,21 +1158,74 @@ setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } ()
  data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer
  
  alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI _ _ s) = prevChar s '\n'
+alexInputPrevChar (AI _ _ buf) = prevChar buf '\n'
  
  alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
  alexGetChar (AI loc ofs s) 
    | atEnd s   = Nothing
-  | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` Just (c, (AI loc' ofs' s'))
-  where c    = currentChar s
-        loc' = advanceSrcLoc loc c
-        ofs' = advanceOffs c ofs
-       s'   = stepOn s
+  | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` 
+               Just (adj_c, (AI loc' ofs' s'))
+  where (c,s') = nextChar s
+        loc'   = advanceSrcLoc loc c
+        ofs'   = advanceOffs c ofs
+
+       non_graphic     = '\x0'
+       upper           = '\x1'
+       lower           = '\x2'
+       digit           = '\x3'
+       symbol          = '\x4'
+       space           = '\x5'
+       other_graphic   = '\x6'
+
+       adj_c 
+#if __GLASGOW_HASKELL__ < 605
+         = c  -- no Unicode support
+#else
+         | c <= '\x04' = non_graphic
+         | c <= '\xff' = c
+         | otherwise = 
+               case generalCategory c of
+                 UppercaseLetter       -> upper
+                 LowercaseLetter       -> lower
+                 TitlecaseLetter       -> upper
+                 ModifierLetter        -> other_graphic
+                 OtherLetter           -> other_graphic
+                 NonSpacingMark        -> other_graphic
+                 SpacingCombiningMark  -> other_graphic
+                 EnclosingMark         -> other_graphic
+                 DecimalNumber         -> digit
+                 LetterNumber          -> other_graphic
+                 OtherNumber           -> other_graphic
+                 ConnectorPunctuation  -> other_graphic
+                 DashPunctuation       -> other_graphic
+                 OpenPunctuation       -> other_graphic
+                 ClosePunctuation      -> other_graphic
+                 InitialQuote          -> other_graphic
+                 FinalQuote            -> other_graphic
+                 OtherPunctuation      -> other_graphic
+                 MathSymbol            -> symbol
+                 CurrencySymbol        -> symbol
+                 ModifierSymbol        -> symbol
+                 OtherSymbol           -> symbol
+                 Space                 -> space
+                 _other                -> non_graphic
+#endif
+
+-- This version does not squash unicode characters, it is used when
+-- lexing strings.
+alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar' (AI loc ofs s) 
+  | atEnd s   = Nothing
+  | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` 
+               Just (c, (AI loc' ofs' s'))
+  where (c,s') = nextChar s
+        loc'   = advanceSrcLoc loc c
+        ofs'   = advanceOffs c ofs
  
-       advanceOffs :: Char -> Int -> Int
-       advanceOffs '\n' offs = 0
-       advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
-       advanceOffs _    offs = offs + 1
+advanceOffs :: Char -> Int -> Int
+advanceOffs '\n' offs = 0
+advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
+advanceOffs _    offs = offs + 1
  
  getInput :: P AlexInput
  getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b)
@@ -1255,7 +1332,7 @@ srcParseErr buf len
              else hcat [ptext SLIT("parse error on input "),
                         char '`', text token, char '\'']
      ]
-  where token = lexemeToString (stepOnBy (-len) buf) len
+  where token = lexemeToString (offsetBytes (-len) buf) len
  
  -- Report a parse failure, giving the span of the previous token as
  -- the location of the error.  This is the entry point for errors
@@ -1266,14 +1343,12 @@ srcParseFail = P $ \PState{ buffer = buf, last_len = len,
      PFailed last_loc (srcParseErr buf len)
  
  -- A lexical error is reported at a particular position in the source file,
--- not over a token range.  TODO: this is slightly wrong, because we record
--- the error at the character position following the one which caused the
--- error.  We should somehow back up by one character.
+-- not over a token range.
  lexError :: String -> P a
  lexError str = do
    loc <- getSrcLoc
-  i@(AI end _ _) <- getInput
-  failLocMsgP loc end str
+  i@(AI end _ buf) <- getInput
+  reportLexError loc end buf False str
  
  -- -----------------------------------------------------------------------------
  -- This is the top-level function: called from the parser each time a
@@ -1282,7 +1357,7 @@ lexError str = do
  lexer :: (Located Token -> P a) -> P a
  lexer cont = do
    tok@(L _ tok__) <- lexToken
-  -- trace ("token: " ++ show tok__) $ do
+  --trace ("token: " ++ show tok__) $ do
    cont tok
  
  lexToken :: P (Located Token)
@@ -1294,13 +1369,24 @@ lexToken = do
      AlexEOF -> do let span = mkSrcSpan loc1 loc1
                   setLastToken span 0
                   return (L span ITeof)
-    AlexError (AI loc2 _ _) -> do failLocMsgP loc1 loc2 "lexical error"
+    AlexError (AI loc2 _ buf) -> do 
+       reportLexError loc1 loc2 buf True "lexical error"
      AlexSkip inp2 _ -> do
         setInput inp2
         lexToken
      AlexToken inp2@(AI end _ buf2) len t -> do
         setInput inp2
         let span = mkSrcSpan loc1 end
-       span `seq` setLastToken span len
-       t span buf len
+       let bytes = byteDiff buf buf2
+       span `seq` setLastToken span bytes
+       t span buf bytes
+
+reportLexError loc1 loc2 buf is_prev str = 
+  let 
+       c | is_prev   = prevChar buf '\0'
+         | otherwise = fst (nextChar buf)
+  in
+  if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
+    then failLocMsgP loc2 loc2 "UTF-8 decoding error"
+    else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
  }
diff --git a/ghc/compiler/parser/Parser.y.pp b/ghc/compiler/parser/Parser.y.pp

index 844cc86..b4acb89 100644 (file)
--- a/ghc/compiler/parser/Parser.y.pp
+++ b/ghc/compiler/parser/Parser.y.pp
@@ -25,7 +25,7 @@ import Type           ( funTyCon )
  import ForeignCall     ( Safety(..), CExportSpec(..), CLabelString,
                           CCallConv(..), CCallTarget(..), defaultCCallConv
                         )
-import OccName         ( UserFS, varName, dataName, tcClsName, tvName )
+import OccName         ( varName, dataName, tcClsName, tvName )
  import DataCon         ( DataCon, dataConName )
  import SrcLoc          ( Located(..), unLoc, getLoc, noLoc, combineSrcSpans,
                           SrcSpan, combineLocs, srcLocFile, 
@@ -1469,7 +1469,7 @@ varsym_no_minus :: { Located RdrName } -- varsym not including '-'
  -- These special_ids are treated as keywords in various places, 
  -- but as ordinary ids elsewhere.   'special_id' collects all these
  -- except 'unsafe' and 'forall' whose treatment differs depending on context
-special_id :: { Located UserFS }
+special_id :: { Located FastString }
  special_id
         : 'as'                  { L1 FSLIT("as") }
         | 'qualified'           { L1 FSLIT("qualified") }
@@ -1480,7 +1480,7 @@ special_id
         | 'stdcall'             { L1 FSLIT("stdcall") }
         | 'ccall'               { L1 FSLIT("ccall") }
  
-special_sym :: { Located UserFS }
+special_sym :: { Located FastString }
  special_sym : '!'      { L1 FSLIT("!") }
             | '.'       { L1 FSLIT(".") }
             | '*'       { L1 FSLIT("*") }
diff --git a/ghc/compiler/parser/ParserCore.y b/ghc/compiler/parser/ParserCore.y

index d8fceeb..3210583 100644 (file)
--- a/ghc/compiler/parser/ParserCore.y
+++ b/ghc/compiler/parser/ParserCore.y
@@ -72,7 +72,7 @@ module        :: { HsExtCore RdrName }
           : '%module' modid tdefs vdefgs { HsExtCore $2 $3 $4 }
  
  modid  :: { Module }
-       : CNAME                  { mkSysModuleFS (mkFastString $1) }
+       : CNAME                  { mkModuleFS (mkFastString $1) }
  
  -------------------------------------------------------------
  --     Type and newtype declarations are in HsSyn syntax
@@ -262,25 +262,25 @@ lit       :: { Literal }
         | '(' STRING '::' aty ')'       { MachStr (mkFastString $2) }
  
  tv_occ :: { OccName }
-       : NAME  { mkSysOcc tvName $1 }
+       : NAME  { mkOccName tvName $1 }
  
  var_occ        :: { OccName }
-       : NAME  { mkSysOcc varName $1 }
+       : NAME  { mkVarOcc $1 }
  
  
  -- Type constructor
  q_tc_name      :: { IfaceExtName }
-        : modid '.' CNAME      { ExtPkg $1 (mkSysOcc tcName $3) }
+        : modid '.' CNAME      { ExtPkg $1 (mkOccName tcName $3) }
  
  -- Data constructor in a pattern or data type declaration; use the dataName, 
  -- because that's what we expect in Core case patterns
  d_pat_occ :: { OccName }
-        : CNAME      { mkSysOcc dataName $1 }
+        : CNAME      { mkOccName dataName $1 }
  
  -- Data constructor occurrence in an expression;
  -- use the varName because that's the worker Id
  d_occ :: { OccName }
-       : CNAME { mkSysOcc varName $1 }
+       : CNAME { mkVarOcc $1 }
  
  {
  
diff --git a/ghc/compiler/parser/RdrHsSyn.lhs b/ghc/compiler/parser/RdrHsSyn.lhs

index 2d18d6d..6ff15e7 100644 (file)
--- a/ghc/compiler/parser/RdrHsSyn.lhs
+++ b/ghc/compiler/parser/RdrHsSyn.lhs
@@ -59,7 +59,7 @@ import TysWiredIn     ( unitTyCon )
  import ForeignCall     ( CCallConv, Safety, CCallTarget(..), CExportSpec(..),
                           DNCallSpec(..), DNKind(..), CLabelString )
  import OccName         ( srcDataName, varName, isDataOcc, isTcOcc, 
-                         occNameUserString )
+                         occNameString )
  import SrcLoc
  import OrdList         ( OrdList, fromOL )
  import Bag             ( Bag, emptyBag, snocBag, consBag, foldrBag )
@@ -800,8 +800,8 @@ mkExport :: CallConv
  mkExport (CCall  cconv) (L loc entity, v, ty) = return $ 
    ForD (ForeignExport v ty (CExport (CExportStatic entity' cconv)) False)
    where
-    entity' | nullFastString entity = mkExtName (unLoc v)
-           | otherwise             = entity
+    entity' | nullFS entity = mkExtName (unLoc v)
+           | otherwise     = entity
  mkExport DNCall (L loc entity, v, ty) =
    parseError (getLoc v){-TODO: not quite right-}
         "Foreign export is not yet supported for .NET"
@@ -811,10 +811,9 @@ mkExport DNCall (L loc entity, v, ty) =
  -- of the Haskell name is then performed, so if you foreign export (++),
  -- it's external name will be "++". Too bad; it's important because we don't
  -- want z-encoding (e.g. names with z's in them shouldn't be doubled)
--- (This is why we use occNameUserString.)
  --
  mkExtName :: RdrName -> CLabelString
-mkExtName rdrNm = mkFastString (occNameUserString (rdrNameOcc rdrNm))
+mkExtName rdrNm = mkFastString (occNameString (rdrNameOcc rdrNm))
  \end{code}
  
  
diff --git a/ghc/compiler/prelude/PrelNames.lhs b/ghc/compiler/prelude/PrelNames.lhs

index 0d99121..eb26d34 100644 (file)
--- a/ghc/compiler/prelude/PrelNames.lhs
+++ b/ghc/compiler/prelude/PrelNames.lhs
@@ -50,9 +50,8 @@ module PrelNames (
  #include "HsVersions.h"
  
  import Module    ( Module, mkModule )
-import OccName   ( dataName, tcName, clsName, varName, mkOccFS
-                 )
-                 
+import OccName   ( dataName, tcName, clsName, varName, mkOccNameFS,
+                   mkVarOccFS )
  import RdrName   ( RdrName, nameRdrName, mkOrig, rdrNameOcc, mkUnqual )
  import Unique    ( Unique, Uniquable(..), hasKey,
                     mkPreludeMiscIdUnique, mkPreludeDataConUnique,
@@ -75,7 +74,7 @@ import FastString
  This *local* name is used by the interactive stuff
  
  \begin{code}
-itName uniq = mkInternalName uniq (mkOccFS varName FSLIT("it")) noSrcLoc
+itName uniq = mkInternalName uniq (mkOccNameFS varName FSLIT("it")) noSrcLoc
  \end{code}
  
  \begin{code}
@@ -415,10 +414,10 @@ inrDataCon_RDR     = dataQual_RDR pREL_BASE FSLIT("Inr")
  genUnitDataCon_RDR = dataQual_RDR pREL_BASE FSLIT("Unit")
  
  ----------------------
-varQual_RDR  mod str = mkOrig mod (mkOccFS varName str)
-tcQual_RDR   mod str = mkOrig mod (mkOccFS tcName str)
-clsQual_RDR  mod str = mkOrig mod (mkOccFS clsName str)
-dataQual_RDR mod str = mkOrig mod (mkOccFS dataName str)
+varQual_RDR  mod str = mkOrig mod (mkOccNameFS varName str)
+tcQual_RDR   mod str = mkOrig mod (mkOccNameFS tcName str)
+clsQual_RDR  mod str = mkOrig mod (mkOccNameFS clsName str)
+dataQual_RDR mod str = mkOrig mod (mkOccNameFS dataName str)
  \end{code}
  
  %************************************************************************
@@ -656,17 +655,17 @@ tcQual   = mk_known_key_name tcName
  clsQual  = mk_known_key_name clsName
  
  mk_known_key_name space mod str uniq 
-  = mkExternalName uniq mod (mkOccFS space str) 
+  = mkExternalName uniq mod (mkOccNameFS space str) 
                    Nothing noSrcLoc
  
  conName :: Name -> FastString -> Unique -> Name
  conName tycon occ uniq
-  = mkExternalName uniq (nameModule tycon) (mkOccFS dataName occ) 
+  = mkExternalName uniq (nameModule tycon) (mkOccNameFS dataName occ) 
                    (Just tycon) noSrcLoc
  
  methName :: Name -> FastString -> Unique -> Name
  methName cls occ uniq
-  = mkExternalName uniq (nameModule cls) (mkOccFS varName occ) 
+  = mkExternalName uniq (nameModule cls) (mkVarOccFS occ) 
                    (Just cls) noSrcLoc
  \end{code}
  
diff --git a/ghc/compiler/prelude/PrelRules.lhs b/ghc/compiler/prelude/PrelRules.lhs

index e0b2347..9cdddc9 100644 (file)
--- a/ghc/compiler/prelude/PrelRules.lhs
+++ b/ghc/compiler/prelude/PrelRules.lhs
@@ -36,7 +36,7 @@ import TyCon          ( tyConDataCons_maybe, isEnumerationTyCon, isNewTyCon )
  import DataCon         ( dataConTag, dataConTyCon, dataConWorkId, fIRST_TAG )
  import CoreUtils       ( cheapEqExpr, exprIsConApp_maybe )
  import Type            ( tyConAppTyCon, coreEqType )
-import OccName         ( occNameUserString)
+import OccName         ( occNameFS )
  import PrelNames       ( unpackCStringFoldrName, unpackCStringFoldrIdKey, hasKey,
                           eqStringName, unpackCStringIdKey )
  import Maybes          ( orElse )
@@ -58,7 +58,7 @@ import DATA_WORD      ( Word64 )
  primOpRules :: PrimOp -> Name -> [CoreRule]
  primOpRules op op_name = primop_rule op
    where
-    rule_name = mkFastString (occNameUserString (primOpOcc op))
+    rule_name = occNameFS (primOpOcc op)
      rule_name_case = rule_name `appendFS` FSLIT("->case")
  
         -- A useful shorthand
diff --git a/ghc/compiler/prelude/PrimOp.lhs b/ghc/compiler/prelude/PrimOp.lhs

index e99eb9d..a650352 100644 (file)
--- a/ghc/compiler/prelude/PrimOp.lhs
+++ b/ghc/compiler/prelude/PrimOp.lhs
@@ -22,7 +22,7 @@ import TysWiredIn
  
  import NewDemand
  import Var             ( TyVar )
-import OccName         ( OccName, pprOccName, mkVarOcc )
+import OccName         ( OccName, pprOccName, mkVarOccFS )
  import TyCon           ( TyCon, isPrimTyCon, tyConPrimRep, PrimRep(..) )
  import Type            ( Type, mkForAllTys, mkFunTy, mkFunTys, tyConAppTyCon,
                           typePrimRep )
@@ -113,10 +113,10 @@ data PrimOpInfo
                 [Type] 
                 Type 
  
-mkDyadic str  ty = Dyadic  (mkVarOcc str) ty
-mkMonadic str ty = Monadic (mkVarOcc str) ty
-mkCompare str ty = Compare (mkVarOcc str) ty
-mkGenPrimOp str tvs tys ty = GenPrimOp (mkVarOcc str) tvs tys ty
+mkDyadic str  ty = Dyadic  (mkVarOccFS str) ty
+mkMonadic str ty = Monadic (mkVarOccFS str) ty
+mkCompare str ty = Compare (mkVarOccFS str) ty
+mkGenPrimOp str tvs tys ty = GenPrimOp (mkVarOccFS str) tvs tys ty
  \end{code}
  
  %************************************************************************
diff --git a/ghc/compiler/prelude/TysPrim.lhs b/ghc/compiler/prelude/TysPrim.lhs

index 7d397d6..2f6168b 100644 (file)
--- a/ghc/compiler/prelude/TysPrim.lhs
+++ b/ghc/compiler/prelude/TysPrim.lhs
@@ -46,7 +46,7 @@ module TysPrim(
  
  import Var             ( TyVar, mkTyVar )
  import Name            ( Name, BuiltInSyntax(..), mkInternalName, mkWiredInName )
-import OccName         ( mkOccFS, tcName, mkTyVarOcc )
+import OccName         ( mkOccNameFS, tcName, mkTyVarOcc )
  import TyCon           ( TyCon, ArgVrcs, mkPrimTyCon, mkLiftedPrimTyCon,
                           PrimRep(..) )
  import Type            ( mkTyConApp, mkTyConTy, mkTyVarTys, mkTyVarTy,
@@ -100,7 +100,7 @@ primTyCons
  
  mkPrimTc :: FastString -> Unique -> TyCon -> Name
  mkPrimTc fs uniq tycon
-  = mkWiredInName gHC_PRIM (mkOccFS tcName fs) 
+  = mkWiredInName gHC_PRIM (mkOccNameFS tcName fs) 
                   uniq
                   Nothing               -- No parent object
                   (ATyCon tycon)        -- Relevant TyCon
diff --git a/ghc/compiler/prelude/TysWiredIn.lhs b/ghc/compiler/prelude/TysWiredIn.lhs

index e7dea60..ceb4df5 100644 (file)
--- a/ghc/compiler/prelude/TysWiredIn.lhs
+++ b/ghc/compiler/prelude/TysWiredIn.lhs
@@ -62,16 +62,18 @@ import Module               ( Module )
  import RdrName         ( nameRdrName )
  import Name            ( Name, BuiltInSyntax(..), nameUnique, nameOccName, 
                           nameModule, mkWiredInName )
-import OccName         ( mkOccFS, tcName, dataName, mkTupleOcc, mkDataConWorkerOcc )
+import OccName         ( mkOccNameFS, tcName, dataName, mkTupleOcc,
+                         mkDataConWorkerOcc )
  import DataCon         ( DataCon, mkDataCon, dataConWorkId, dataConSourceArity )
  import Var             ( TyVar, tyVarKind )
  import TyCon           ( TyCon, AlgTyConRhs(DataTyCon), tyConDataCons,
-                         mkTupleTyCon, mkAlgTyCon, tyConName
-                       )
+                         mkTupleTyCon, mkAlgTyCon, tyConName )
  
-import BasicTypes      ( Arity, RecFlag(..), Boxity(..), isBoxed, StrictnessMark(..) )
+import BasicTypes      ( Arity, RecFlag(..), Boxity(..), isBoxed,
+                         StrictnessMark(..) )
  
-import Type            ( Type, mkTyConTy, mkTyConApp, mkTyVarTy, mkTyVarTys, TyThing(..) )
+import Type            ( Type, mkTyConTy, mkTyConApp, mkTyVarTy, mkTyVarTys,
+                         TyThing(..) )
  import Kind            ( mkArrowKinds, liftedTypeKind, ubxTupleKind )
  import Unique          ( incrUnique, mkTupleTyConUnique,
                           mkTupleDataConUnique, mkPArrDataConUnique )
@@ -114,14 +116,14 @@ wiredInTyCons = [ unitTyCon       -- Not treated like other tuples, because
  \begin{code}
  mkWiredInTyConName :: BuiltInSyntax -> Module -> FastString -> Unique -> TyCon -> Name
  mkWiredInTyConName built_in mod fs uniq tycon
-  = mkWiredInName mod (mkOccFS tcName fs) uniq
+  = mkWiredInName mod (mkOccNameFS tcName fs) uniq
                   Nothing               -- No parent object
                   (ATyCon tycon)        -- Relevant TyCon
                   built_in
  
  mkWiredInDataConName :: BuiltInSyntax -> Module -> FastString -> Unique -> DataCon -> Name -> Name
  mkWiredInDataConName built_in mod fs uniq datacon parent
-  = mkWiredInName mod (mkOccFS dataName fs) uniq
+  = mkWiredInName mod (mkOccNameFS dataName fs) uniq
                   (Just parent)         -- Name of parent TyCon
                   (ADataCon datacon)    -- Relevant DataCon
                   built_in
@@ -535,7 +537,7 @@ mkPArrFakeCon arity  = data_con
         tyvar     = head alphaTyVars
         tyvarTys  = replicate arity $ mkTyVarTy tyvar
          nameStr   = mkFastString ("MkPArr" ++ show arity)
-       name      = mkWiredInName pREL_PARR (mkOccFS dataName nameStr) uniq
+       name      = mkWiredInName pREL_PARR (mkOccNameFS dataName nameStr) uniq
                                   Nothing (ADataCon data_con) UserSyntax
         uniq      = mkPArrDataConUnique arity
  
diff --git a/ghc/compiler/profiling/CostCentre.lhs b/ghc/compiler/profiling/CostCentre.lhs

index 3616ccb..f4a6ba9 100644 (file)
--- a/ghc/compiler/profiling/CostCentre.lhs
+++ b/ghc/compiler/profiling/CostCentre.lhs
@@ -32,9 +32,7 @@ module CostCentre (
  #include "HsVersions.h"
  
  import Var             ( Id )
-import Name            ( UserFS, EncodedFS, encodeFS, decode,
-                         getOccName, occNameFS
-                       )
+import Name            ( getOccName, occNameFS )
  import Module          ( Module )
  import Outputable      
  import FastTypes
@@ -120,7 +118,7 @@ data CostCentre
                 cc_mod  :: Module       -- Name of module defining this CC.
      }
  
-type CcName = EncodedFS
+type CcName = FastString
  
  data IsDupdCC
    = OriginalCC -- This says how the CC is *used*.  Saying that
@@ -200,9 +198,9 @@ maybeSingletonCCS _                 = Nothing
  Building cost centres
  
  \begin{code}
-mkUserCC :: UserFS -> Module -> CostCentre
+mkUserCC :: FastString -> Module -> CostCentre
  mkUserCC cc_name mod
-  = NormalCC { cc_name = encodeFS cc_name, cc_mod =  mod,
+  = NormalCC { cc_name = cc_name, cc_mod =  mod,
                cc_is_dupd = OriginalCC, cc_is_caf = NotCafCC {-might be changed-}
      }
  
@@ -370,5 +368,5 @@ ppCostCentreLbl (NormalCC {cc_name = n, cc_mod = m, cc_is_caf = is_caf})
  costCentreUserName (NoCostCentre)  = "NO_CC"
  costCentreUserName (AllCafsCC {})  = "CAF"
  costCentreUserName cc@(NormalCC {cc_name = name, cc_is_caf = is_caf})
-  =  case is_caf of { CafCC -> "CAF:";   _ -> "" } ++ decode (unpackFS name)
+  =  case is_caf of { CafCC -> "CAF:";   _ -> "" } ++ unpackFS name
  \end{code}
diff --git a/ghc/compiler/rename/RnEnv.lhs b/ghc/compiler/rename/RnEnv.lhs

index 99d6a34..2be3bfd 100644 (file)
--- a/ghc/compiler/rename/RnEnv.lhs
+++ b/ghc/compiler/rename/RnEnv.lhs
@@ -50,7 +50,8 @@ import TcRnMonad
  import Name            ( Name, nameIsLocalOrFrom, mkInternalName, isWiredInName,
                           nameSrcLoc, nameOccName, nameModule, nameParent, isExternalName )
  import NameSet
-import OccName         ( tcName, isDataOcc, occNameFlavour, reportIfUnused )
+import OccName         ( tcName, isDataOcc, pprNonVarNameSpace, occNameSpace,
+                         reportIfUnused )
  import Module          ( Module )
  import PrelNames       ( mkUnboundName, rOOT_MAIN, iNTERACTIVE, consDataConKey, hasKey )
  import UniqSupply
@@ -747,7 +748,8 @@ warnUnusedName :: (Name, Maybe Provenance) -> RnM ()
  warnUnusedName (name, prov)
    = addWarnAt loc $
      sep [msg <> colon, 
-        nest 2 $ occNameFlavour (nameOccName name) <+> quotes (ppr name)]
+        nest 2 $ pprNonVarNameSpace (occNameSpace (nameOccName name))
+                       <+> quotes (ppr name)]
         -- TODO should be a proper span
    where
      (loc,msg) = case prov of
@@ -778,7 +780,8 @@ shadowedNameWarn doc shadow
  
  unknownNameErr rdr_name
    = sep [ptext SLIT("Not in scope:"), 
-        nest 2 $ occNameFlavour (rdrNameOcc rdr_name) <+> quotes (ppr rdr_name)]
+        nest 2 $ pprNonVarNameSpace (occNameSpace (rdrNameOcc rdr_name))
+                 <+> quotes (ppr rdr_name)]
  
  unknownInstBndrErr cls op
    = quotes (ppr op) <+> ptext SLIT("is not a (visible) method of class") <+> quotes (ppr cls)
diff --git a/ghc/compiler/rename/RnExpr.lhs b/ghc/compiler/rename/RnExpr.lhs

index 53a412f..95d7b83 100644 (file)
--- a/ghc/compiler/rename/RnExpr.lhs
+++ b/ghc/compiler/rename/RnExpr.lhs
@@ -37,7 +37,6 @@ import Name           ( Name, nameOccName, nameIsLocalOrFrom )
  import NameSet
  import RdrName         ( RdrName, emptyGlobalRdrEnv, extendLocalRdrEnv, lookupLocalRdrEnv )
  import LoadIface       ( loadHomeInterface )
-import UnicodeUtil     ( stringToUtf8 )
  import UniqFM          ( isNullUFM )
  import UniqSet         ( emptyUniqSet )
  import List            ( nub )
@@ -932,7 +931,7 @@ mkAssertErrorExpr
    = getSrcSpanM                        `thenM` \ sloc ->
      let
         expr = HsApp (L sloc (HsVar assertErrorName)) (L sloc (HsLit msg))
-       msg  = HsStringPrim (mkFastString (stringToUtf8 (showSDoc (ppr sloc))))
+       msg  = HsStringPrim (mkFastString (showSDoc (ppr sloc)))
      in
      returnM (expr, emptyFVs)
  \end{code}
diff --git a/ghc/compiler/rename/RnNames.lhs b/ghc/compiler/rename/RnNames.lhs

index bf6e54a..4cdb241 100644 (file)
--- a/ghc/compiler/rename/RnNames.lhs
+++ b/ghc/compiler/rename/RnNames.lhs
@@ -26,15 +26,17 @@ import TcRnMonad
  
  import FiniteMap
  import PrelNames       ( pRELUDE, isUnboundName, main_RDR_Unqual )
-import Module          ( Module, moduleUserString, unitModuleEnv, 
+import Module          ( Module, moduleString, unitModuleEnv, 
                           lookupModuleEnv, moduleEnvElts, foldModuleEnv )
  import Name            ( Name, nameSrcLoc, nameOccName, nameModule, isWiredInName,
                           nameParent, nameParent_maybe, isExternalName,
                           isBuiltInSyntax )
  import NameSet
  import NameEnv
-import OccName         ( srcDataName, isTcOcc, occNameFlavour, OccEnv, 
-                         mkOccEnv, lookupOccEnv, emptyOccEnv, extendOccEnv )
+import OccName         ( srcDataName, isTcOcc, pprNonVarNameSpace,
+                         occNameSpace,
+                         OccEnv, mkOccEnv, lookupOccEnv, emptyOccEnv,
+                         extendOccEnv )
  import HscTypes                ( GenAvailInfo(..), AvailInfo,
                           HomePackageTable, PackageIfaceTable, 
                           unQualInScope, 
@@ -683,7 +685,7 @@ reportDeprecations tcg_env
        ,        Just deprec_txt <- lookupDeprec hpt pit name
        = setSrcSpan (importSpecLoc imp_spec) $
         addWarn (sep [ptext SLIT("Deprecated use of") <+> 
-                       occNameFlavour (nameOccName name) <+> 
+                       pprNonVarNameSpace (occNameSpace (nameOccName name)) <+> 
                         quotes (ppr name),
                       (parens imp_msg) <> colon,
                       (ppr deprec_txt) ])
@@ -958,7 +960,7 @@ printMinimalImports imps
                                  (vcat (map ppr_mod_ie mod_ies)) })
     }
    where
-    mkFilename this_mod = moduleUserString this_mod ++ ".imports"
+    mkFilename this_mod = moduleString this_mod ++ ".imports"
      ppr_mod_ie (mod_name, ies) 
         | mod_name == pRELUDE 
         = empty
diff --git a/ghc/compiler/simplCore/SetLevels.lhs b/ghc/compiler/simplCore/SetLevels.lhs

index 4b1c01d..f8ab29d 100644 (file)
--- a/ghc/compiler/simplCore/SetLevels.lhs
+++ b/ghc/compiler/simplCore/SetLevels.lhs
@@ -60,8 +60,8 @@ import CoreUtils      ( exprType, exprIsTrivial, exprIsCheap, mkPiTypes )
  import CoreFVs         -- all of it
  import CoreSubst       ( Subst, emptySubst, extendInScope, extendIdSubst,
                           cloneIdBndr, cloneRecIdBndrs )
-import Id              ( Id, idType, mkSysLocalUnencoded, 
-                         isOneShotLambda, zapDemandIdInfo,
+import Id              ( Id, idType, mkSysLocal, isOneShotLambda,
+                         zapDemandIdInfo,
                           idSpecialisation, idWorkerInfo, setIdInfo
                         )
  import IdInfo          ( workerExists, vanillaIdInfo, isEmptySpecInfo )
@@ -69,7 +69,7 @@ import Var            ( Var )
  import VarSet
  import VarEnv
  import Name            ( getOccName )
-import OccName         ( occNameUserString )
+import OccName         ( occNameString )
  import Type            ( isUnLiftedType, Type )
  import BasicTypes      ( TopLevelFlag(..) )
  import UniqSupply
@@ -796,9 +796,9 @@ newPolyBndrs dest_lvl env abs_vars bndrs
      in
      returnLvl (extendPolyLvlEnv dest_lvl env abs_vars (bndrs `zip` new_bndrs), new_bndrs)
    where
-    mk_poly_bndr bndr uniq = mkSysLocalUnencoded (mkFastString str) uniq poly_ty
+    mk_poly_bndr bndr uniq = mkSysLocal (mkFastString str) uniq poly_ty
                            where
-                            str     = "poly_" ++ occNameUserString (getOccName bndr)
+                            str     = "poly_" ++ occNameString (getOccName bndr)
                              poly_ty = mkPiTypes abs_vars (idType bndr)
         
  
@@ -807,7 +807,7 @@ newLvlVar :: String
           -> LvlM Id
  newLvlVar str vars body_ty     
    = getUniqueUs        `thenLvl` \ uniq ->
-    returnUs (mkSysLocalUnencoded (mkFastString str) uniq (mkPiTypes vars body_ty))
+    returnUs (mkSysLocal (mkFastString str) uniq (mkPiTypes vars body_ty))
      
  -- The deeply tiresome thing is that we have to apply the substitution
  -- to the rules inside each Id.  Grr.  But it matters.
diff --git a/ghc/compiler/simplCore/SimplMonad.lhs b/ghc/compiler/simplCore/SimplMonad.lhs

index b82562e..bc09e11 100644 (file)
--- a/ghc/compiler/simplCore/SimplMonad.lhs
+++ b/ghc/compiler/simplCore/SimplMonad.lhs
@@ -34,7 +34,6 @@ import UniqSupply     ( uniqsFromSupply, uniqFromSupply, splitUniqSupply,
                         )
  import DynFlags                ( SimplifierSwitch(..), DynFlags, DynFlag(..), dopt )
  import StaticFlags     ( opt_PprStyle_Debug, opt_HistorySize )
-import OccName         ( EncodedFS )
  import Unique          ( Unique )
  import Maybes          ( expectJust )
  import FiniteMap       ( FiniteMap, emptyFM, isEmptyFM, lookupFM, addToFM, plusFM_C, fmToList )
@@ -160,7 +159,7 @@ getDOptsSmpl :: SimplM DynFlags
  getDOptsSmpl 
     = SM (\dflags us sc -> (dflags, us, sc))
  
-newId :: EncodedFS -> Type -> SimplM Id
+newId :: FastString -> Type -> SimplM Id
  newId fs ty = getUniqueSmpl    `thenSmpl` \ uniq ->
               returnSmpl (mkSysLocal fs uniq ty)
  \end{code}
diff --git a/ghc/compiler/simplCore/Simplify.lhs b/ghc/compiler/simplCore/Simplify.lhs

index 8859140..17a7969 100644 (file)
--- a/ghc/compiler/simplCore/Simplify.lhs
+++ b/ghc/compiler/simplCore/Simplify.lhs
@@ -28,7 +28,6 @@ import Id             ( Id, idType, idInfo, idArity, isDataConWorkId,
                         )
  import MkId            ( eRROR_ID )
  import Literal         ( mkStringLit )
-import OccName         ( encodeFS )
  import IdInfo          ( OccInfo(..), isLoopBreaker,
                           setArityInfo, zapDemandInfo,
                           setUnfoldingInfo, 
@@ -1875,7 +1874,7 @@ mkDupableAlt env case_bndr' cont alt
      )                                                  `thenSmpl` \ (final_bndrs', final_args) ->
  
         -- See comment about "$j" name above
-    newId (encodeFS FSLIT("$j")) (mkPiTypes final_bndrs' rhs_ty')      `thenSmpl` \ join_bndr ->
+    newId FSLIT("$j") (mkPiTypes final_bndrs' rhs_ty') `thenSmpl` \ join_bndr ->
         -- Notice the funky mkPiTypes.  If the contructor has existentials
         -- it's possible that the join point will be abstracted over
         -- type varaibles as well as term variables.
diff --git a/ghc/compiler/stgSyn/CoreToStg.lhs b/ghc/compiler/stgSyn/CoreToStg.lhs

index 9ae0d27..824caba 100644 (file)
--- a/ghc/compiler/stgSyn/CoreToStg.lhs
+++ b/ghc/compiler/stgSyn/CoreToStg.lhs
@@ -30,7 +30,7 @@ import VarSet
  import VarEnv
  import Maybes          ( maybeToBool )
  import Name            ( getOccName, isExternalName, nameOccName )
-import OccName         ( occNameUserString, occNameFS )
+import OccName         ( occNameString, occNameFS )
  import BasicTypes       ( Arity )
  import Packages                ( HomeModules )
  import StaticFlags     ( opt_RuntimeTypes )
@@ -688,7 +688,7 @@ coreToStgLet let_no_escape bind body
  is_join_var :: Id -> Bool
  -- A hack (used only for compiler debuggging) to tell if
  -- a variable started life as a join point ($j)
-is_join_var j = occNameUserString (getOccName j) == "$j"
+is_join_var j = occNameString (getOccName j) == "$j"
  \end{code}
  
  \begin{code}
diff --git a/ghc/compiler/typecheck/Inst.lhs b/ghc/compiler/typecheck/Inst.lhs

index 71d3e84..d6cf344 100644 (file)
--- a/ghc/compiler/typecheck/Inst.lhs
+++ b/ghc/compiler/typecheck/Inst.lhs
@@ -79,7 +79,7 @@ import DataCon        ( DataCon, dataConTyVars, dataConStupidTheta, dataConName, dataCo
  import Id      ( Id, idName, idType, mkUserLocal, mkLocalId )
  import PrelInfo        ( isNoDictClass )
  import Name    ( Name, mkMethodOcc, getOccName, getSrcLoc, nameModule,
-                 isInternalName, setNameUnique, mkSystemVarNameEncoded )
+                 isInternalName, setNameUnique, mkSystemVarName )
  import NameSet ( addOneToNameSet )
  import Literal ( inIntRange )
  import Var     ( TyVar, tyVarKind, setIdType )
@@ -398,9 +398,7 @@ newLitInst orig lit expected_ty     -- Make a LitInst
    = do         { loc <- getInstLoc orig
         ; new_uniq <- newUnique
         ; let
-               lit_nm   = mkSystemVarNameEncoded new_uniq FSLIT("lit")
-               -- The "encoded" bit means that we don't need to
-               -- z-encode the string every time we call this!
+               lit_nm   = mkSystemVarName new_uniq FSLIT("lit")
                 lit_inst = LitInst lit_nm lit expected_ty loc
         ; extendLIE lit_inst
         ; return (HsVar (instToId lit_inst)) }
diff --git a/ghc/compiler/typecheck/TcClassDcl.lhs b/ghc/compiler/typecheck/TcClassDcl.lhs

index b382af9..fbb450a 100644 (file)
--- a/ghc/compiler/typecheck/TcClassDcl.lhs
+++ b/ghc/compiler/typecheck/TcClassDcl.lhs
@@ -52,7 +52,6 @@ import RdrName                ( RdrName, mkDerivedRdrName )
  import Outputable
  import PrelNames       ( genericTyConNames )
  import DynFlags
-import UnicodeUtil     ( stringToUtf8 )
  import ErrUtils                ( dumpIfSet_dyn )
  import Util            ( count, lengthIs, isSingleton, lengthExceeds )
  import Unique          ( Uniquable(..) )
@@ -487,7 +486,7 @@ mkDefMethRhs origin clas inst_tys sel_id loc NoDefMeth
    where
      error_rhs  = noLoc $ HsLam (mkMatchGroup [mkSimpleMatch wild_pats simple_rhs])
      simple_rhs = nlHsApp (nlHsVar (getName nO_METHOD_BINDING_ERROR_ID)) 
-                      (nlHsLit (HsStringPrim (mkFastString (stringToUtf8 error_msg))))
+                      (nlHsLit (HsStringPrim (mkFastString error_msg)))
      error_msg = showSDoc (hcat [ppr loc, text "|", ppr sel_id ])
  
         -- When the type is of form t1 -> t2 -> t3
diff --git a/ghc/compiler/typecheck/TcGenDeriv.lhs b/ghc/compiler/typecheck/TcGenDeriv.lhs

index faa32ec..94bb152 100644 (file)
--- a/ghc/compiler/typecheck/TcGenDeriv.lhs
+++ b/ghc/compiler/typecheck/TcGenDeriv.lhs
@@ -809,7 +809,7 @@ gen_Read_binds get_fixity tycon
      ident_pat s  = nlConPat ident_RDR  [nlLitPat (mkHsString s)]  -- Ident "foo"
      symbol_pat s = nlConPat symbol_RDR [nlLitPat (mkHsString s)]  -- Symbol ">>"
      
-    data_con_str con = occNameUserString (getOccName con)
+    data_con_str con = occNameString (getOccName con)
      
      read_punc c = bindLex (punc_pat c)
      read_arg a ty 
@@ -832,7 +832,7 @@ gen_Read_binds get_fixity tycon
                  | otherwise
                  = [bindLex (ident_pat lbl_str)]
                  where  
-                  lbl_str = occNameUserString (getOccName lbl) 
+                  lbl_str = occNameString (getOccName lbl) 
  \end{code}
  
  
@@ -899,7 +899,7 @@ gen_Show_binds get_fixity tycon
  
              dc_nm          = getName data_con
              dc_occ_nm      = getOccName data_con
-             con_str        = occNameUserString dc_occ_nm
+             con_str        = occNameString dc_occ_nm
              op_con_str     = wrapOpParens con_str
              backquote_str  = wrapOpBackquotes con_str
  
@@ -916,7 +916,7 @@ gen_Show_binds get_fixity tycon
                         -- it seems tidier to have them both sides.
                  where
                    occ_nm   = getOccName l
-                  nm       = wrapOpParens (occNameUserString occ_nm)
+                  nm       = wrapOpParens (occNameString occ_nm)
  
               show_args                      = zipWith show_arg bs_needed arg_tys
              (show_arg1:show_arg2:_) = show_args
@@ -1128,7 +1128,7 @@ gen_Data_binds fix_env tycon
      constr_args dc =
          [ -- nlHsIntLit (toInteger (dataConTag dc)),           -- Tag
            nlHsVar data_type_name,                              -- DataType
-          nlHsLit (mkHsString (occNameUserString dc_occ)),     -- String name
+          nlHsLit (mkHsString (occNameString dc_occ)), -- String name
             nlList  labels,                                     -- Field labels
            nlHsVar fixity]                                      -- Fixity
         where
@@ -1458,7 +1458,7 @@ mk_tc_deriv_name tycon str
    = mkDerivedRdrName tc_name mk_occ
    where
      tc_name = tyConName tycon
-    mk_occ tc_occ = mkOccFS varName (mkFastString new_str)
+    mk_occ tc_occ = mkVarOccFS (mkFastString new_str)
                   where
                     new_str = str ++ occNameString tc_occ ++ "#"
  \end{code}
diff --git a/ghc/compiler/typecheck/TcInstDcls.lhs b/ghc/compiler/typecheck/TcInstDcls.lhs

index 432d3c8..04fbafb 100644 (file)
--- a/ghc/compiler/typecheck/TcInstDcls.lhs
+++ b/ghc/compiler/typecheck/TcInstDcls.lhs
@@ -35,7 +35,6 @@ import Var            ( Id, idName, idType )
  import MkId            ( mkDictFunId, rUNTIME_ERROR_ID )
  import FunDeps         ( checkInstFDs )
  import Name            ( Name, getSrcLoc )
-import UnicodeUtil     ( stringToUtf8 )
  import Maybe           ( catMaybes )
  import SrcLoc          ( srcLocSpan, unLoc, noLoc, Located(..), srcSpanStart )
  import ListSetOps      ( minusList )
@@ -405,7 +404,7 @@ tcInstDecl2 (InstInfo { iSpec = ispec, iBinds = binds })
                 -- Hardly beautiful, but only three extra lines.
             nlHsApp (noLoc $ TyApp (nlHsVar rUNTIME_ERROR_ID) 
                                    [idType this_dict_id])
-                 (nlHsLit (HsStringPrim (mkFastString (stringToUtf8 msg))))
+                 (nlHsLit (HsStringPrim (mkFastString msg)))
  
           | otherwise   -- The common case
           = mkHsConApp dict_constr inst_tys' (map HsVar scs_and_meths)
diff --git a/ghc/compiler/typecheck/TcRnDriver.lhs b/ghc/compiler/typecheck/TcRnDriver.lhs

index b2e665f..7e3aae2 100644 (file)
--- a/ghc/compiler/typecheck/TcRnDriver.lhs
+++ b/ghc/compiler/typecheck/TcRnDriver.lhs
@@ -63,7 +63,7 @@ import ErrUtils               ( Messages, mkDumpDoc, showPass )
  import Id              ( Id, mkExportedLocalId, isLocalId, idName, idType )
  import Var             ( Var )
  import Module           ( Module, ModuleEnv, moduleEnvElts, elemModuleEnv )
-import OccName         ( mkVarOcc, mkOccFS, varName )
+import OccName         ( mkVarOccFS )
  import Name            ( Name, NamedThing(..), isExternalName, getSrcLoc, isWiredInName,
                           mkExternalName )
  import NameSet
@@ -734,7 +734,7 @@ checkMain
          dflags    <- getDOpts ;
          let { main_mod = mainModIs dflags ;
                main_fn  = case mainFunIs dflags of {
-                               Just fn -> mkRdrUnqual (mkVarOcc (mkFastString fn)) ;
+                               Just fn -> mkRdrUnqual (mkVarOccFS (mkFastString fn)) ;
                                 Nothing -> main_RDR_Unqual } } ;
         
          check_main ghci_mode tcg_env main_mod main_fn
@@ -776,7 +776,7 @@ check_main ghci_mode tcg_env main_mod main_fn
         -- for 'main' in the interface file!
  
         ; let { root_main_name =  mkExternalName rootMainKey rOOT_MAIN 
-                                  (mkOccFS varName FSLIT("main")) 
+                                  (mkVarOccFS FSLIT("main")) 
                                    (Just main_name) (getSrcLoc main_name)
               ; root_main_id = mkExportedLocalId root_main_name ty
               ; main_bind    = noLoc (VarBind root_main_id main_expr) }
diff --git a/ghc/compiler/typecheck/TcSplice.lhs b/ghc/compiler/typecheck/TcSplice.lhs

index 2844ab4..578c96b 100644 (file)
--- a/ghc/compiler/typecheck/TcSplice.lhs
+++ b/ghc/compiler/typecheck/TcSplice.lhs
@@ -41,7 +41,7 @@ import NameEnv                ( lookupNameEnv )
  import HscTypes                ( lookupType, ExternalPackageState(..), emptyModDetails )
  import OccName
  import Var             ( Id, TyVar, idType )
-import Module          ( moduleUserString )
+import Module          ( moduleString )
  import TcRnMonad
  import IfaceEnv                ( lookupOrig )
  import Class           ( Class, classExtraBigSig )
@@ -416,7 +416,7 @@ instance TH.Quasi (IOEnv (Env TcGblEnv TcLclEnv)) where
    qReport True msg  = addErr (text msg)
    qReport False msg = addReport (text msg)
  
-  qCurrentModule = do { m <- getModule; return (moduleUserString m) }
+  qCurrentModule = do { m <- getModule; return (moduleString m) }
    qReify v = reify v
    qRecover = recoverM
  
@@ -659,8 +659,8 @@ reifyName thing
         -- have free variables, we may need to generate NameL's for them.
    where
      name    = getName thing
-    mod     = moduleUserString (nameModule name)
-    occ_str = occNameUserString occ
+    mod     = moduleString (nameModule name)
+    occ_str = occNameString occ
      occ     = nameOccName name
      mk_varg | OccName.isDataOcc occ = TH.mkNameG_d
             | OccName.isVarOcc  occ = TH.mkNameG_v
diff --git a/ghc/compiler/types/TypeRep.lhs b/ghc/compiler/types/TypeRep.lhs

index dc53445..7bb863a 100644 (file)
--- a/ghc/compiler/types/TypeRep.lhs
+++ b/ghc/compiler/types/TypeRep.lhs
@@ -33,7 +33,7 @@ import Kind
  import Var       ( Var, Id, TyVar, tyVarKind )
  import VarSet     ( TyVarSet )
  import Name      ( Name, NamedThing(..), BuiltInSyntax(..), mkWiredInName )
-import OccName   ( mkOccFS, tcName, parenSymOcc )
+import OccName   ( mkOccNameFS, tcName, parenSymOcc )
  import BasicTypes ( IPName, tupleParens )
  import TyCon     ( TyCon, mkFunTyCon, tyConArity, tupleTyConBoxity, isTupleTyCon, isRecursiveTyCon, isNewTyCon )
  import Class     ( Class )
@@ -273,7 +273,7 @@ funTyCon = mkFunTyCon funTyConName (mkArrowKinds [argTypeKind, openTypeKind] lif
         -- a prefix way, thus:  (->) Int# Int#.  And this is unusual.
  
  funTyConName = mkWiredInName gHC_PRIM
-                       (mkOccFS tcName FSLIT("(->)"))
+                       (mkOccNameFS tcName FSLIT("(->)"))
                         funTyConKey
                         Nothing                 -- No parent object
                         (ATyCon funTyCon)       -- Relevant TyCon
diff --git a/ghc/compiler/utils/Binary.hs b/ghc/compiler/utils/Binary.hs

index 1902ff1..7b40bd2 100644 (file)
--- a/ghc/compiler/utils/Binary.hs
+++ b/ghc/compiler/utils/Binary.hs
@@ -58,26 +58,7 @@ import UniqFM
  import FastMutInt
  import PackageConfig           ( PackageId, packageIdFS, fsToPackageId )
  
-#if __GLASGOW_HASKELL__ < 503
-import DATA_IOREF
-import DATA_BITS
-import DATA_INT
-import DATA_WORD
-import Char
-import Monad
-import Exception
-import GlaExts hiding (ByteArray, newByteArray, freezeByteArray)
-import Array
-import IO
-import PrelIOBase              ( IOError(..), IOErrorType(..)
-#if __GLASGOW_HASKELL__ > 411
-                               , IOException(..)
-#endif
-                               )
-import PrelReal                        ( Ratio(..) )
-import PrelIOBase              ( IO(..) )
-import IOExts                  ( openFileEx, IOModeEx(..) )
-#else
+import Foreign
  import Data.Array.IO
  import Data.Array
  import Data.Bits
@@ -102,44 +83,12 @@ import GHC.Handle          ( openFileEx, IOModeEx(..) )
  #else
  import System.IO               ( openBinaryFile )
  #endif
-#endif
  
  #if __GLASGOW_HASKELL__ < 601
  openBinaryFile f mode = openFileEx f (BinaryMode mode)
  #endif
  
-#if __GLASGOW_HASKELL__ < 503
-type BinArray = MutableByteArray RealWorld Int
-newArray_ bounds     = stToIO (newCharArray bounds)
-unsafeWrite arr ix e = stToIO (writeWord8Array arr ix e)
-unsafeRead  arr ix   = stToIO (readWord8Array arr ix)
-#if __GLASGOW_HASKELL__ < 411
-newByteArray#        = newCharArray#
-#endif
-hPutArray h arr sz   = hPutBufBAFull h arr sz
-hGetArray h sz       = hGetBufBAFull h sz
-
-mkIOError :: IOErrorType -> String -> Maybe Handle -> Maybe FilePath -> Exception
-mkIOError t location maybe_hdl maybe_filename
-  = IOException (IOError maybe_hdl t location ""
-#if __GLASGOW_HASKELL__ > 411
-                        maybe_filename
-#endif
-               )
-
-eofErrorType = EOF
-
-#ifndef SIZEOF_HSINT
-#define SIZEOF_HSINT  INT_SIZE_IN_BYTES
-#endif
-
-#ifndef SIZEOF_HSWORD
-#define SIZEOF_HSWORD WORD_SIZE_IN_BYTES
-#endif
-
-#else
  type BinArray = IOUArray Int Word8
-#endif
  
  ---------------------------------------------------------------
  --             BinHandle
@@ -741,13 +690,17 @@ constructDictionary j fm = array (0,j-1) (eltsUFM fm)
  --             Reading and writing FastStrings
  ---------------------------------------------------------
  
-putFS bh (FastString id l ba) = do
-  put_ bh (I# l)
-  putByteArray bh ba l
-putFS bh s = error ("Binary.put_(FastString): " ++ unpackFS s)
-       -- Note: the length of the FastString is *not* the same as
-       -- the size of the ByteArray: the latter is rounded up to a
-       -- multiple of the word size.
+putFS bh (FastString id l _ buf _) = do
+  put_ bh l
+  withForeignPtr buf $ \ptr -> 
+    let 
+       go n | n == l    = return ()
+            | otherwise = do
+               b <- peekElemOff ptr n
+               putByte bh b
+               go (n+1)
+   in 
+   go 0
    
  {- -- possible faster version, not quite there yet:
  getFS bh@BinMem{} = do
@@ -757,16 +710,24 @@ getFS bh@BinMem{} = do
    return $! (mkFastSubStringBA# arr off l)
  -}
  getFS bh = do
-  (I# l) <- get bh
-  (BA ba) <- getByteArray bh (I# l)
-  return $! (mkFastSubStringBA# ba 0# l)
+  l <- get bh
+  fp <- mallocForeignPtrBytes l
+  withForeignPtr fp $ \ptr -> do
+  let 
+       go n | n == l = mkFastStringForeignPtr ptr fp l
+            | otherwise = do
+               b <- getByte bh
+               pokeElemOff ptr n b
+               go (n+1)
+  --
+  go 0
  
  instance Binary PackageId where
    put_ bh pid = put_ bh (packageIdFS pid)
    get bh = do { fs <- get bh; return (fsToPackageId fs) }
  
  instance Binary FastString where
-  put_ bh f@(FastString id l ba) =
+  put_ bh f@(FastString id l _ fp _) =
      case getUserData bh of { 
         UserData { ud_next = j_r, ud_map = out_r, ud_dict = dict} -> do
      out <- readIORef out_r
diff --git a/ghc/compiler/utils/BufWrite.hs b/ghc/compiler/utils/BufWrite.hs

index 6d00e46..b15089e 100644 (file)
--- a/ghc/compiler/utils/BufWrite.hs
+++ b/ghc/compiler/utils/BufWrite.hs
@@ -31,17 +31,11 @@ import Char         ( ord )
  import Foreign
  import IO
  
-#if __GLASGOW_HASKELL__ < 503
-import PrelIOBase      ( IO(..) )
-import IOExts          ( hPutBufFull )
-#else
  import GHC.IOBase      ( IO(..) )
  import System.IO       ( hPutBuf )
-#endif
-
-import GLAEXTS         ( touch#, byteArrayContents#, Int(..), Int#, Addr# )
+import GHC.Ptr         ( Ptr(..) )
  
-import PrimPacked      ( Ptr(..) )
+import GLAEXTS         ( Int(..), Int#, Addr# )
  
  -- -----------------------------------------------------------------------------
  
@@ -88,22 +82,17 @@ bPutStr b@(BufHandle buf r hdl) str = do
                 loop cs (i+1)
    
  bPutFS :: BufHandle -> FastString -> IO ()
-bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len# arr#) = do
-  let len = I# len#
+bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len _ fp _) =
+ withForeignPtr fp $ \ptr -> do
    i <- readFastMutInt r
    if (i + len) >= buf_size
         then do hPutBuf hdl buf i
                 writeFastMutInt r 0
                 if (len >= buf_size) 
-                   then do
-                        let a# = byteArrayContents# arr#
-                        hPutBuf hdl (Ptr a#) len
-                        touch fs
+                   then hPutBuf hdl ptr len
                     else bPutFS b fs
         else do
-               let a# = byteArrayContents# arr#
-               copyBytes (buf `plusPtr` i) (Ptr a#) len
-               touch fs
+               copyBytes (buf `plusPtr` i) ptr len
                 writeFastMutInt r (i+len)
  bPutFS _ _ = panic "bPutFS"
  
@@ -128,8 +117,6 @@ bFlush b@(BufHandle buf r hdl) = do
    free buf
    return ()
  
-touch r = IO $ \s -> case touch# r s of s -> (# s, () #)
-
  #if 0
  myPutBuf s hdl buf i = 
    modifyIOError (\e -> ioeSetErrorString e (ioeGetErrorString e ++ ':':s ++ " (" ++ show buf ++ "," ++ show i ++ ")")) $
diff --git a/ghc/compiler/utils/Encoding.hs b/ghc/compiler/utils/Encoding.hs

new file mode 100644 (file)

index 0000000..d15c021
--- /dev/null
+++ b/ghc/compiler/utils/Encoding.hs
@@ -0,0 +1,386 @@
+{-# OPTIONS_GHC -O #-}
+-- -----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow, 1997-2003
+--
+-- Character encodings 
+--
+-- -----------------------------------------------------------------------------
+
+module Encoding ( 
+       -- * UTF-8
+       utf8DecodeChar#,
+       utf8PrevChar,
+       utf8CharStart,
+       utf8DecodeChar,
+       utf8DecodeString,
+       utf8EncodeChar,
+       utf8EncodeString,
+       utf8EncodedLength,
+       countUTF8Chars,
+
+       -- * Latin-1
+       latin1DecodeChar,
+       latin1EncodeChar,
+
+       -- * Z-encoding
+       zEncodeString,
+       zDecodeString
+  ) where
+
+#define COMPILING_FAST_STRING
+#include "HsVersions.h"
+import Foreign
+import Data.Char       ( ord, chr, isDigit, digitToInt, isHexDigit )
+import Numeric         ( showHex )
+
+import GHC.Ptr         ( Ptr(..) )
+import GHC.Base
+
+-- -----------------------------------------------------------------------------
+-- Latin-1
+
+latin1DecodeChar ptr = do
+  w <- peek ptr
+  return (unsafeChr (fromIntegral w), ptr `plusPtr` 1)
+
+latin1EncodeChar c ptr = do
+  poke ptr (fromIntegral (ord c))
+  return (ptr `plusPtr` 1)
+
+-- -----------------------------------------------------------------------------
+-- UTF-8
+
+-- We can't write the decoder as efficiently as we'd like without
+-- resorting to unboxed extensions, unfortunately.  I tried to write
+-- an IO version of this function, but GHC can't eliminate boxed
+-- results from an IO-returning function.
+--
+-- We assume we can ignore overflow when parsing a multibyte character here.
+-- To make this safe, we add extra sentinel bytes to unparsed UTF-8 sequences
+-- before decoding them (see StringBuffer.hs).
+
+{-# INLINE utf8DecodeChar# #-}
+utf8DecodeChar# :: Addr# -> (# Char#, Addr# #)
+utf8DecodeChar# a# =
+  let ch0 = word2Int# (indexWord8OffAddr# a# 0#) in
+  case () of 
+    _ | ch0 <=# 0x7F# -> (# chr# ch0, a# `plusAddr#` 1# #)
+
+      | ch0 >=# 0xC0# && ch0 <=# 0xDF# ->
+       let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+       if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
+       (# chr# (((ch0 -# 0xC0#) `uncheckedIShiftL#` 6#) +#
+                 (ch1 -# 0x80#)),
+          a# `plusAddr#` 2# #)
+
+      | ch0 >=# 0xE0# && ch0 <=# 0xEF# ->
+       let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+       if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
+       let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+       if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else
+       (# chr# (((ch0 -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+                ((ch1 -# 0x80#) `uncheckedIShiftL#` 6#)  +#
+                 (ch2 -# 0x80#)),
+          a# `plusAddr#` 3# #)
+
+     | ch0 >=# 0xF0# && ch0 <=# 0xF8# ->
+       let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+       if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
+       let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+       if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else
+       let ch3 = word2Int# (indexWord8OffAddr# a# 3#) in
+       if ch3 <# 0x80# || ch3 >=# 0xC0# then fail 3# else
+       (# chr# (((ch0 -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+                ((ch1 -# 0x80#) `uncheckedIShiftL#` 12#) +#
+                ((ch2 -# 0x80#) `uncheckedIShiftL#` 6#)  +#
+                 (ch3 -# 0x80#)),
+          a# `plusAddr#` 4# #)
+
+      | otherwise -> fail 1#
+  where
+       -- all invalid sequences end up here:
+       fail n = (# '\0'#, a# `plusAddr#` n #)
+       -- '\xFFFD' would be the usual replacement character, but
+       -- that's a valid symbol in Haskell, so will result in a
+       -- confusing parse error later on.  Instead we use '\0' which
+       -- will signal a lexer error immediately.
+
+utf8DecodeChar :: Ptr Word8 -> (Char, Ptr Word8)
+utf8DecodeChar (Ptr a#) = ( C# c#, Ptr b# )
+  where (# c#, b# #) = utf8DecodeChar# a#
+
+-- UTF-8 is cleverly designed so that we can always figure out where
+-- the start of the current character is, given any position in a
+-- stream.  This function finds the start of the previous character,
+-- assuming there *is* a previous character.
+utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8)
+utf8PrevChar p = utf8CharStart (p `plusPtr` (-1))
+
+utf8CharStart :: Ptr Word8 -> IO (Ptr Word8)
+utf8CharStart p = go p
+ where go p = do w <- peek p
+                if (w .&. 0xC0) == 0x80
+                       then go (p `plusPtr` (-1))
+                       else return p
+
+utf8DecodeString :: Ptr Word8 -> Int -> IO [Char]
+STRICT2(utf8DecodeString)
+utf8DecodeString (Ptr a#) (I# len#)
+  = unpack a#
+  where
+    end# = addr2Int# (a# `plusAddr#` len#)
+
+    unpack p#
+       | addr2Int# p# >=# end# = return []
+       | otherwise  =
+       case utf8DecodeChar# p# of
+          (# c#, q# #) -> do
+               chs <- unpack q#
+               return (C# c# : chs)
+
+countUTF8Chars :: Ptr Word8 -> Int -> IO Int
+countUTF8Chars ptr bytes = go ptr 0
+  where
+       end = ptr `plusPtr` bytes
+
+       STRICT2(go)
+       go ptr n 
+          | ptr >= end = return n
+          | otherwise  = do
+               case utf8DecodeChar# (unPtr ptr) of
+                 (# c, a #) -> go (Ptr a) (n+1)
+
+unPtr (Ptr a) = a
+
+utf8EncodeChar c ptr =
+  let x = ord c in
+  case () of
+    _ | x > 0 && x <= 0x007f -> do
+         poke ptr (fromIntegral x)
+         return (ptr `plusPtr` 1)
+       -- NB. '\0' is encoded as '\xC0\x80', not '\0'.  This is so that we
+       -- can have 0-terminated UTF-8 strings (see GHC.Base.unpackCStringUtf8).
+      | x <= 0x07ff -> do
+         poke ptr (fromIntegral (0xC0 .|. ((x `shiftR` 6) .&. 0x1F)))
+         pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x .&. 0x3F)))
+         return (ptr `plusPtr` 2)
+      | x <= 0xffff -> do
+         poke ptr (fromIntegral (0xE0 .|. (x `shiftR` 12) .&. 0x0F))
+         pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x `shiftR` 6) .&. 0x3F))
+         pokeElemOff ptr 2 (fromIntegral (0x80 .|. (x .&. 0x3F)))
+         return (ptr `plusPtr` 3)
+      | otherwise -> do
+         poke ptr (fromIntegral (0xF0 .|. (x `shiftR` 18)))
+         pokeElemOff ptr 1 (fromIntegral (0x80 .|. ((x `shiftR` 12) .&. 0x3F)))
+         pokeElemOff ptr 2 (fromIntegral (0x80 .|. ((x `shiftR` 6) .&. 0x3F)))
+         pokeElemOff ptr 3 (fromIntegral (0x80 .|. (x .&. 0x3F)))
+         return (ptr `plusPtr` 4)
+
+utf8EncodeString :: Ptr Word8 -> String -> IO ()
+utf8EncodeString ptr str = go ptr str
+  where STRICT2(go)
+       go ptr [] = return ()
+       go ptr (c:cs) = do
+         ptr' <- utf8EncodeChar c ptr
+         go ptr' cs
+
+utf8EncodedLength :: String -> Int
+utf8EncodedLength str = go 0 str
+  where STRICT2(go)
+       go n [] = n
+        go n (c:cs)
+         | ord c > 0 && ord c <= 0x007f = go (n+1) cs
+         | ord c <= 0x07ff = go (n+2) cs
+         | ord c <= 0xffff = go (n+3) cs       
+         | otherwise       = go (n+4) cs       
+
+-- -----------------------------------------------------------------------------
+-- The Z-encoding
+
+{-
+This is the main name-encoding and decoding function.  It encodes any
+string into a string that is acceptable as a C name.  This is the name
+by which things are known right through the compiler.
+
+The basic encoding scheme is this.  
+
+* Tuples (,,,) are coded as Z3T
+
+* Alphabetic characters (upper and lower) and digits
+       all translate to themselves; 
+       except 'Z', which translates to 'ZZ'
+       and    'z', which translates to 'zz'
+  We need both so that we can preserve the variable/tycon distinction
+
+* Most other printable characters translate to 'zx' or 'Zx' for some
+       alphabetic character x
+
+* The others translate as 'znnnU' where 'nnn' is the decimal number
+        of the character
+
+       Before          After
+       --------------------------
+       Trak            Trak
+       foo_wib         foozuwib
+       >               zg
+       >1              zg1
+       foo#            foozh
+       foo##           foozhzh
+       foo##1          foozhzh1
+       fooZ            fooZZ   
+       :+              ZCzp
+       ()              Z0T     0-tuple
+       (,,,,)          Z5T     5-tuple  
+       (# #)           Z1H     unboxed 1-tuple (note the space)
+       (#,,,,#)        Z5H     unboxed 5-tuple
+               (NB: There is no Z1T nor Z0H.)
+-}
+
+type UserString = String       -- As the user typed it
+type EncodedString = String    -- Encoded form
+
+
+zEncodeString :: UserString -> EncodedString
+zEncodeString cs = case maybe_tuple cs of
+               Just n  -> n            -- Tuples go to Z2T etc
+               Nothing -> go cs
+         where
+               go []     = []
+               go (c:cs) = encode_ch c ++ go cs
+
+unencodedChar :: Char -> Bool  -- True for chars that don't need encoding
+unencodedChar 'Z' = False
+unencodedChar 'z' = False
+unencodedChar c   =  c >= 'a' && c <= 'z'
+                 || c >= 'A' && c <= 'Z'
+                 || c >= '0' && c <= '9'
+
+encode_ch :: Char -> EncodedString
+encode_ch c | unencodedChar c = [c]    -- Common case first
+
+-- Constructors
+encode_ch '('  = "ZL"  -- Needed for things like (,), and (->)
+encode_ch ')'  = "ZR"  -- For symmetry with (
+encode_ch '['  = "ZM"
+encode_ch ']'  = "ZN"
+encode_ch ':'  = "ZC"
+encode_ch 'Z'  = "ZZ"
+
+-- Variables
+encode_ch 'z'  = "zz"
+encode_ch '&'  = "za"
+encode_ch '|'  = "zb"
+encode_ch '^'  = "zc"
+encode_ch '$'  = "zd"
+encode_ch '='  = "ze"
+encode_ch '>'  = "zg"
+encode_ch '#'  = "zh"
+encode_ch '.'  = "zi"
+encode_ch '<'  = "zl"
+encode_ch '-'  = "zm"
+encode_ch '!'  = "zn"
+encode_ch '+'  = "zp"
+encode_ch '\'' = "zq"
+encode_ch '\\' = "zr"
+encode_ch '/'  = "zs"
+encode_ch '*'  = "zt"
+encode_ch '_'  = "zu"
+encode_ch '%'  = "zv"
+encode_ch c    = 'z' : if isDigit (head hex_str) then hex_str
+                                                else '0':hex_str
+  where hex_str = showHex (ord c) "U"
+  -- ToDo: we could improve the encoding here in various ways.
+  -- eg. strings of unicode characters come out as 'z1234Uz5678U', we
+  -- could remove the 'U' in the middle (the 'z' works as a separator).
+
+zDecodeString :: EncodedString -> UserString
+zDecodeString [] = []
+zDecodeString ('Z' : d : rest) 
+  | isDigit d = decode_tuple   d rest
+  | otherwise = decode_upper   d : zDecodeString rest
+zDecodeString ('z' : d : rest)
+  | isDigit d = decode_num_esc d rest
+  | otherwise = decode_lower   d : zDecodeString rest
+zDecodeString (c   : rest) = c : zDecodeString rest
+
+decode_upper, decode_lower :: Char -> Char
+
+decode_upper 'L' = '('
+decode_upper 'R' = ')'
+decode_upper 'M' = '['
+decode_upper 'N' = ']'
+decode_upper 'C' = ':'
+decode_upper 'Z' = 'Z'
+decode_upper ch  = {-pprTrace "decode_upper" (char ch)-} ch
+               
+decode_lower 'z' = 'z'
+decode_lower 'a' = '&'
+decode_lower 'b' = '|'
+decode_lower 'c' = '^'
+decode_lower 'd' = '$'
+decode_lower 'e' = '='
+decode_lower 'g' = '>'
+decode_lower 'h' = '#'
+decode_lower 'i' = '.'
+decode_lower 'l' = '<'
+decode_lower 'm' = '-'
+decode_lower 'n' = '!'
+decode_lower 'p' = '+'
+decode_lower 'q' = '\''
+decode_lower 'r' = '\\'
+decode_lower 's' = '/'
+decode_lower 't' = '*'
+decode_lower 'u' = '_'
+decode_lower 'v' = '%'
+decode_lower ch  = {-pprTrace "decode_lower" (char ch)-} ch
+
+-- Characters not having a specific code are coded as z224U (in hex)
+decode_num_esc d rest
+  = go (digitToInt d) rest
+  where
+    go n (c : rest) | isHexDigit c = go (16*n + digitToInt c) rest
+    go n ('U' : rest)           = chr n : zDecodeString rest
+    go n other = error ("decode_num_esc: " ++ show n ++  ' ':other)
+
+decode_tuple :: Char -> EncodedString -> UserString
+decode_tuple d rest
+  = go (digitToInt d) rest
+  where
+       -- NB. recurse back to zDecodeString after decoding the tuple, because
+       -- the tuple might be embedded in a longer name.
+    go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
+    go 0 ('T':rest)    = "()" ++ zDecodeString rest
+    go n ('T':rest)    = '(' : replicate (n-1) ',' ++ ")" ++ zDecodeString rest
+    go 1 ('H':rest)    = "(# #)" ++ zDecodeString rest
+    go n ('H':rest)    = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ zDecodeString rest
+    go n other = error ("decode_tuple: " ++ show n ++ ' ':other)
+
+{-
+Tuples are encoded as
+       Z3T or Z3H
+for 3-tuples or unboxed 3-tuples respectively.  No other encoding starts 
+       Z<digit>
+
+* "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
+  There are no unboxed 0-tuples.  
+
+* "()" is the tycon for a boxed 0-tuple.
+  There are no boxed 1-tuples.
+-}
+
+maybe_tuple :: UserString -> Maybe EncodedString
+
+maybe_tuple "(# #)" = Just("Z1H")
+maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
+                                (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
+                                other               -> Nothing
+maybe_tuple "()" = Just("Z0T")
+maybe_tuple ('(' : cs)       = case count_commas (0::Int) cs of
+                                (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
+                                other         -> Nothing
+maybe_tuple other           = Nothing
+
+count_commas :: Int -> String -> (Int, String)
+count_commas n (',' : cs) = count_commas (n+1) cs
+count_commas n cs        = (n,cs)
diff --git a/ghc/compiler/utils/FastString.lhs b/ghc/compiler/utils/FastString.lhs

index 52512d3..2558c56 100644 (file)
--- a/ghc/compiler/utils/FastString.lhs
+++ b/ghc/compiler/utils/FastString.lhs
@@ -1,8 +1,10 @@
  %
-% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
+% (c) The University of Glasgow, 1997-2006
  %
-\section{Fast strings}
+\begin{code}
+{-# OPTIONS -fglasgow-exts -O #-}
  
+{-
  FastString:    A compact, hash-consed, representation of character strings.
                 Comparison is O(1), and you can get a Unique from them.
                 Generated by the FSLIT macro
@@ -15,40 +17,46 @@ LitString:  Just a wrapper for the Addr# of a C string (Ptr CChar).
                 Turn into SDoc with Outputable.ptext
  
  Use LitString unless you want the facilities of FastString
-
-\begin{code}
+-}
  module FastString
         (
+       -- * FastStrings
         FastString(..),     -- not abstract, for now.
  
-        mkFastString,       -- :: String -> FastString
-        mkFastStringNarrow, -- :: String -> FastString
-        mkFastSubString,    -- :: Addr -> Int -> Int -> FastString
-
-       mkFastString#,      -- :: Addr# -> FastString
-        mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
-
-        mkFastStringInt,    -- :: [Int] -> FastString
-
-        uniqueOfFS,        -- :: FastString -> Int#
-       lengthFS,           -- :: FastString -> Int
-       nullFastString,     -- :: FastString -> Bool
+       -- ** Construction
+        mkFastString,
+       mkFastStringBytes,
+       mkFastStringForeignPtr,
+       mkFastString#,
+       mkZFastString,
+       mkZFastStringBytes,
  
+       -- ** Deconstruction
         unpackFS,           -- :: FastString -> String
-       unpackIntFS,        -- :: FastString -> [Int]
-       appendFS,           -- :: FastString -> FastString -> FastString
-        headFS,                    -- :: FastString -> Char
-        headIntFS,         -- :: FastString -> Int
-        tailFS,                    -- :: FastString -> FastString
-       concatFS,           -- :: [FastString] -> FastString
-        consFS,             -- :: Char -> FastString -> FastString
-       indexFS,            -- :: FastString -> Int -> Char
-       nilFS,              -- :: FastString
-
-        hPutFS,                    -- :: Handle -> FastString -> IO ()
-
+       bytesFS,            -- :: FastString -> [Word8]
+
+       -- ** Encoding
+       isZEncoded,
+       zEncodeFS,
+
+       -- ** Operations
+        uniqueOfFS,
+       lengthFS,
+       nullFS,
+       appendFS,
+        headFS,
+        tailFS,
+       concatFS,
+        consFS,
+       nilFS,
+
+       -- ** Outputing
+        hPutFS,
+
+       -- * LitStrings
         LitString, 
-       mkLitString#        -- :: Addr# -> LitString
+       mkLitString#,
+       strLength
         ) where
  
  -- This #define suppresses the "import FastString" that
@@ -56,64 +64,49 @@ module FastString
  #define COMPILING_FAST_STRING
  #include "HsVersions.h"
  
-#if __GLASGOW_HASKELL__ < 503
-import PrelIOBase      ( IO(..) )
-#else
-import GHC.IOBase      ( IO(..) )
-#endif
+import Encoding
  
-import PrimPacked
+import Foreign
+import Foreign.C
  import GLAEXTS
  import UNSAFE_IO       ( unsafePerformIO )
  import MONAD_ST                ( stToIO )
  import DATA_IOREF      ( IORef, newIORef, readIORef, writeIORef )
+import System.IO       ( hPutBuf )
  
-#if __GLASGOW_HASKELL__ < 503
-import PrelArr         ( STArray(..), newSTArray )
-#else
  import GHC.Arr         ( STArray(..), newSTArray )
-#endif
-
-#if __GLASGOW_HASKELL__ >= 504
-import GHC.IOBase
-import GHC.Handle
-import Foreign.C
-#else
-import IOExts          ( hPutBufBAFull )
-#endif
+import GHC.IOBase      ( IO(..) )
  
  import IO
-import Char             ( chr, ord )
  
  #define hASH_TBL_SIZE  4091
-\end{code} 
  
-@FastString@s are packed representations of strings
-with a unique id for fast comparisons. The unique id
-is assigned when creating the @FastString@, using
-a hash table to map from the character string representation
-to the unique ID.
  
-\begin{code}
-data FastString
-  = FastString   -- packed repr. on the heap.
-      Int#       -- unique id
-                --  0 => string literal, comparison
-                --  will
-      Int#       -- length
-      ByteArray# -- stuff
-
-  | UnicodeStr   -- if contains characters outside '\1'..'\xFF'
-      Int#       -- unique id
-      [Int]      -- character numbers
+{-|
+A 'FastString' is an array of bytes, hashed to support fast O(1)
+comparison.  It is also associated with a character encoding, so that
+we know how to convert a 'FastString' to the local encoding, or to the
+Z-encoding used by the compiler internally.
  
-instance Eq FastString where
-       -- shortcut for real FastStrings
-  (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2
-  a == b = case cmpFS a b of { LT -> False; EQ -> True;  GT -> False }
+'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
+-}
+
+data FastString = FastString {
+      uniq    :: {-# UNPACK #-} !Int,       -- unique id
+      n_bytes :: {-# UNPACK #-} !Int,       -- number of bytes
+      n_chars :: {-# UNPACK #-} !Int,    -- number of chars
+      buf     :: {-# UNPACK #-} !(ForeignPtr Word8),
+      enc     :: FSEncoding
+  }
+
+data FSEncoding
+  = ZEncoded
+       -- including strings that don't need any encoding
+  | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
+       -- A UTF-8 string with a memoized Z-encoding
  
-  (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2
-  a /= b = case cmpFS a b of { LT -> True;  EQ -> False; GT -> True  }
+instance Eq FastString where
+  f1 == f2  =  uniq f1 == uniq f2
  
  instance Ord FastString where
         -- Compares lexicographically, not by unique
@@ -130,360 +123,311 @@ instance Ord FastString where
  instance Show FastString where
     show fs = show (unpackFS fs)
  
-lengthFS :: FastString -> Int
-lengthFS (FastString _ l# _) = I# l#
-lengthFS (UnicodeStr _ s) = length s
-
-nullFastString :: FastString -> Bool
-nullFastString (FastString _ l# _) = l# ==# 0#
-nullFastString (UnicodeStr _ []) = True
-nullFastString (UnicodeStr _ (_:_)) = False
-
-unpackFS :: FastString -> String
-unpackFS (FastString _ l# ba#) = unpackNBytesBA (BA ba#) (I# l#)
-unpackFS (UnicodeStr _ s) = map chr s
-
-unpackIntFS :: FastString -> [Int]
-unpackIntFS (UnicodeStr _ s) = s
-unpackIntFS fs = map ord (unpackFS fs)
-
-appendFS :: FastString -> FastString -> FastString
-appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
-
-concatFS :: [FastString] -> FastString
-concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
-
-headFS :: FastString -> Char
-headFS (FastString _ l# ba#) = 
- if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
-headFS (UnicodeStr _ (c:_)) = chr c
-headFS (UnicodeStr _ []) = error ("headFS: empty FS")
-
-headIntFS :: FastString -> Int
-headIntFS (UnicodeStr _ (c:_)) = c
-headIntFS fs = ord (headFS fs)
-
-indexFS :: FastString -> Int -> Char
-indexFS f i@(I# i#) =
- case f of
-   FastString _ l# ba#
-     | l# ># 0# && l# ># i#  -> C# (indexCharArray# ba# i#)
-     | otherwise            -> error (msg (I# l#))
-   UnicodeStr _ s           -> chr (s!!i)
- where
-  msg l =  "indexFS: out of range: " ++ show (l,i)
-
-tailFS :: FastString -> FastString
-tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
-tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
-
-consFS :: Char -> FastString -> FastString
-consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
+cmpFS :: FastString -> FastString -> Ordering
+cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
+  if u1 == u2 then EQ else
+  let l = if l1 <= l2 then l1 else l2 in
+  inlinePerformIO $
+    withForeignPtr buf1 $ \p1 ->
+    withForeignPtr buf2 $ \p2 -> do
+      res <- memcmp p1 p2 l
+      case () of
+       _ | res <  0  -> return LT
+        | res == 0  -> if l1 == l2 then return EQ 
+                                   else if l1 < l2 then return LT
+                                                   else return GT
+        | otherwise -> return GT
  
-uniqueOfFS :: FastString -> Int#
-uniqueOfFS (FastString u# _ _) = u#
-uniqueOfFS (UnicodeStr u# _) = u#
+#ifndef __HADDOCK__
+foreign import ccall unsafe "ghc_memcmp" 
+  memcmp :: Ptr a -> Ptr b -> Int -> IO Int
+#endif
  
-nilFS = mkFastString ""
-\end{code}
+-- -----------------------------------------------------------------------------
+-- Construction
  
+{-
  Internally, the compiler will maintain a fast string symbol
  table, providing sharing and fast comparison. Creation of
  new @FastString@s then covertly does a lookup, re-using the
  @FastString@ if there was a hit.
+-}
  
-Caution: mkFastStringUnicode assumes that if the string is in the
-table, it sits under the UnicodeStr constructor. Other mkFastString
-variants analogously assume the FastString constructor.
-
-\begin{code}
  data FastStringTable = 
   FastStringTable
-    Int#
+    {-# UNPACK #-} !Int
      (MutableArray# RealWorld [FastString])
  
-type FastStringTableVar = IORef FastStringTable
-
-string_table :: FastStringTableVar
+string_table :: IORef FastStringTable
  string_table = 
- unsafePerformIO (
-   stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
-       >>= \ (STArray _ _ arr#) ->
-   newIORef (FastStringTable 0# arr#))
-
-lookupTbl :: FastStringTable -> Int# -> IO [FastString]
-lookupTbl (FastStringTable _ arr#) i# =
-  IO ( \ s# ->
-  readArray# arr# i# s#)
-
-updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
-updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
- IO (\ s# -> case writeArray# arr# i# ls s# of { s2# -> 
-       (# s2#, () #) }) >>
- writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
+ unsafePerformIO $ do
+   (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
+   newIORef (FastStringTable 0 arr#)
+
+lookupTbl :: FastStringTable -> Int -> IO [FastString]
+lookupTbl (FastStringTable _ arr#) (I# i#) =
+  IO $ \ s# -> readArray# arr# i# s#
+
+updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
+updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
+  (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
+  writeIORef fs_table_var (FastStringTable (uid+1) arr#)
  
  mkFastString# :: Addr# -> FastString
-mkFastString# a# =
- case strLength (Ptr a#) of { (I# len#) -> mkFastStringLen# a# len# }
+mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
+  where ptr = Ptr a#
  
-mkFastStringLen# :: Addr# -> Int# -> FastString
-mkFastStringLen# a# len# =
- unsafePerformIO  (
-  readIORef string_table       >>= \ ft@(FastStringTable uid# tbl#) ->
+mkFastStringBytes :: Ptr Word8 -> Int -> FastString
+mkFastStringBytes ptr len = unsafePerformIO $ do
+  ft@(FastStringTable uid tbl#) <- readIORef string_table
    let
-   h = hashStr a# len#
-  in
---  _trace ("hashed: "++show (I# h)) $
-  lookupTbl ft h       >>= \ lookup_result ->
+   h = hashStr ptr len
+   add_it ls = do
+       fs <- copyNewFastString uid ptr len
+       updTbl string_table ft h (fs:ls)
+       {- _trace ("new: " ++ show f_str)   $ -}
+       return fs
+  --
+  lookup_result <- lookupTbl ft h
    case lookup_result of
-    [] -> 
-       -- no match, add it to table by copying out the
-       -- the string into a ByteArray
-       -- _trace "empty bucket" $
-       case copyPrefixStr a# (I# len#) of
-        BA barr# ->  
-          let f_str = FastString uid# len# barr# in
-           updTbl string_table ft h [f_str] >>
-           ({- _trace ("new: " ++ show f_str)   $ -} return f_str)
-    ls -> 
-       -- non-empty `bucket', scan the list looking
-       -- entry with same length and compare byte by byte.
-       -- _trace ("non-empty bucket"++show ls) $
-       case bucket_match ls len# a# of
-        Nothing -> 
-           case copyPrefixStr a# (I# len#) of
-            BA barr# ->  
-              let f_str = FastString uid# len# barr# in
-              updTbl string_table ft h (f_str:ls) >>
-             ( {- _trace ("new: " ++ show f_str)  $ -} return f_str)
-        Just v  -> {- _trace ("re-use: "++show v) $ -} return v)
-  where
-   bucket_match [] _ _ = Nothing
-   bucket_match (v@(FastString _ l# ba#):ls) len# a# =
-      if len# ==# l# && eqStrPrefix a# ba# l# then
-        Just v
-      else
-        bucket_match ls len# a#
-   bucket_match (UnicodeStr _ _ : ls) len# a# =
-      bucket_match ls len# a#
-
-mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
-mkFastSubStringBA# barr# start# len# =
- unsafePerformIO  (
-  readIORef string_table       >>= \ ft@(FastStringTable uid# tbl#) ->
+    [] -> add_it []
+    ls -> do
+       b <- bucket_match ls len ptr
+       case b of
+        Nothing -> add_it ls
+        Just v  -> {- _trace ("re-use: "++show v) $ -} return v
+
+mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
+mkZFastStringBytes ptr len = unsafePerformIO $ do
+  ft@(FastStringTable uid tbl#) <- readIORef string_table
    let
-   h = hashSubStrBA barr# start# len#
-  in
---  _trace ("hashed(b): "++show (I# h)) $
-  lookupTbl ft h               >>= \ lookup_result ->
+   h = hashStr ptr len
+   add_it ls = do
+       fs <- copyNewZFastString uid ptr len
+       updTbl string_table ft h (fs:ls)
+       {- _trace ("new: " ++ show f_str)   $ -}
+       return fs
+  --
+  lookup_result <- lookupTbl ft h
    case lookup_result of
-    [] -> 
-       -- no match, add it to table by copying out the
-       -- the string into a ByteArray
-       -- _trace "empty bucket(b)" $
-       case copySubStrBA (BA barr#) (I# start#) (I# len#) of
-         BA ba# ->  
-          let f_str = FastString uid# len# ba# in
-          updTbl string_table ft h [f_str]     >>
-          -- _trace ("new(b): " ++ show f_str)   $
-         return f_str
-    ls ->
-       -- non-empty `bucket', scan the list looking
-       -- entry with same length and compare byte by byte. 
-       -- _trace ("non-empty bucket(b)"++show ls) $
-       case bucket_match ls start# len# barr# of
-        Nothing ->
-          case copySubStrBA (BA barr#) (I# start#) (I# len#) of
-            BA ba# ->  
-              let f_str = FastString uid# len# ba# in
-              updTbl string_table ft h (f_str:ls) >>
-             -- _trace ("new(b): " ++ show f_str)   $
-             return f_str
-        Just v  -> 
-              -- _trace ("re-use(b): "++show v) $
-             return v
-  )
- where
-   bucket_match [] _ _ _ = Nothing
-   bucket_match (v:ls) start# len# ba# =
-    case v of
-     FastString _ l# barr# ->
-      if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
-        Just v
-      else
-        bucket_match ls start# len# ba#
-     UnicodeStr _ _ -> bucket_match ls start# len# ba#
-
-mkFastStringUnicode :: [Int] -> FastString
-mkFastStringUnicode s =
- unsafePerformIO  (
-  readIORef string_table       >>= \ ft@(FastStringTable uid# tbl#) ->
+    [] -> add_it []
+    ls -> do
+       b <- bucket_match ls len ptr
+       case b of
+        Nothing -> add_it ls
+        Just v  -> {- _trace ("re-use: "++show v) $ -} return v
+
+-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
+-- between this and 'mkFastStringBytes' is that we don't have to copy
+-- the bytes if the string is new to the table.
+mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
+mkFastStringForeignPtr ptr fp len = do
+  ft@(FastStringTable uid tbl#) <- readIORef string_table
+--  _trace ("hashed: "++show (I# h)) $
    let
-   h = hashUnicode s 0#
-  in
---  _trace ("hashed(b): "++show (I# h)) $
-  lookupTbl ft h               >>= \ lookup_result ->
+    h = hashStr ptr len
+    add_it ls = do
+       fs <- mkNewFastString uid ptr fp len
+       updTbl string_table ft h (fs:ls)
+       {- _trace ("new: " ++ show f_str)   $ -}
+       return fs
+  --
+  lookup_result <- lookupTbl ft h
+  case lookup_result of
+    [] -> add_it []
+    ls -> do
+       b <- bucket_match ls len ptr
+       case b of
+        Nothing -> add_it ls
+        Just v  -> {- _trace ("re-use: "++show v) $ -} return v
+
+mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
+mkZFastStringForeignPtr ptr fp len = do
+  ft@(FastStringTable uid tbl#) <- readIORef string_table
+--  _trace ("hashed: "++show (I# h)) $
+  let
+    h = hashStr ptr len
+    add_it ls = do
+       fs <- mkNewZFastString uid ptr fp len
+       updTbl string_table ft h (fs:ls)
+       {- _trace ("new: " ++ show f_str)   $ -}
+       return fs
+  --
+  lookup_result <- lookupTbl ft h
    case lookup_result of
-    [] -> 
-       -- no match, add it to table by copying out the
-       -- the string into a [Int]
-          let f_str = UnicodeStr uid# s in
-          updTbl string_table ft h [f_str]     >>
-          -- _trace ("new(b): " ++ show f_str)   $
-         return f_str
-    ls -> 
-       -- non-empty `bucket', scan the list looking
-       -- entry with same length and compare byte by byte. 
-       -- _trace ("non-empty bucket(b)"++show ls) $
-       case bucket_match ls of
-        Nothing -> 
-              let f_str = UnicodeStr uid# s in
-              updTbl string_table ft h (f_str:ls) >>
-             -- _trace ("new(b): " ++ show f_str)   $
-             return f_str
-        Just v  -> 
-              -- _trace ("re-use(b): "++show v) $
-             return v
-  )
- where
-   bucket_match [] = Nothing
-   bucket_match (v@(UnicodeStr _ s'):ls) =
-       if s' == s then Just v else bucket_match ls
-   bucket_match (FastString _ _ _ : ls) = bucket_match ls
-
-mkFastStringNarrow :: String -> FastString
-mkFastStringNarrow str =
- case packString str of { (I# len#, BA frozen#) -> 
-    mkFastSubStringBA# frozen# 0# len#
- }
- {- 0-indexed array, len# == index to one beyond end of string,
-    i.e., (0,1) => empty string.    -}
+    [] -> add_it []
+    ls -> do
+       b <- bucket_match ls len ptr
+       case b of
+        Nothing -> add_it ls
+        Just v  -> {- _trace ("re-use: "++show v) $ -} return v
  
-mkFastString :: String -> FastString
-mkFastString str = if all good str
-    then mkFastStringNarrow str
-    else mkFastStringUnicode (map ord str)
-    where
-    good c = c >= '\1' && c <= '\xFF'
-
-mkFastStringInt :: [Int] -> FastString
-mkFastStringInt str = if all good str
-    then mkFastStringNarrow (map chr str)
-    else mkFastStringUnicode str
-    where
-    good c = c >= 1 && c <= 0xFF
-
-mkFastSubString :: Addr# -> Int -> Int -> FastString
-mkFastSubString a# (I# start#) (I# len#) =
- mkFastStringLen# (a# `plusAddr#` start#) len#
-\end{code}
  
-\begin{code}
-hashStr  :: Addr# -> Int# -> Int#
+-- | Creates a UTF-8 encoded 'FastString' from a 'String'
+mkFastString :: String -> FastString
+mkFastString str = 
+  inlinePerformIO $ do
+    let l = utf8EncodedLength str
+    buf <- mallocForeignPtrBytes l
+    withForeignPtr buf $ \ptr -> do
+      utf8EncodeString ptr str
+      mkFastStringForeignPtr ptr buf l 
+
+
+-- | Creates a Z-encoded 'FastString' from a 'String'
+mkZFastString :: String -> FastString
+mkZFastString str = 
+  inlinePerformIO $ do
+    let l = Prelude.length str
+    buf <- mallocForeignPtrBytes l
+    withForeignPtr buf $ \ptr -> do
+      pokeCAString (castPtr ptr) str
+      mkZFastStringForeignPtr ptr buf l 
+
+bucket_match [] _ _ = return Nothing
+bucket_match (v@(FastString _ l _ buf _):ls) len ptr
+      | len == l  =  do
+        b <- cmpStringPrefix ptr buf len
+        if b then return (Just v)
+             else bucket_match ls len ptr
+      | otherwise = 
+        bucket_match ls len ptr
+
+mkNewFastString uid ptr fp len = do
+  ref <- newIORef Nothing
+  n_chars <- countUTF8Chars ptr len
+  return (FastString uid len n_chars fp (UTF8Encoded ref))
+
+mkNewZFastString uid ptr fp len = do
+  return (FastString uid len len fp ZEncoded)
+
+
+copyNewFastString uid ptr len = do
+  fp <- copyBytesToForeignPtr ptr len
+  ref <- newIORef Nothing
+  n_chars <- countUTF8Chars ptr len
+  return (FastString uid len n_chars fp (UTF8Encoded ref))
+
+copyNewZFastString uid ptr len = do
+  fp <- copyBytesToForeignPtr ptr len
+  return (FastString uid len len fp ZEncoded)
+
+
+copyBytesToForeignPtr ptr len = do
+  fp <- mallocForeignPtrBytes len
+  withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
+  return fp
+
+cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
+cmpStringPrefix ptr fp len =
+  withForeignPtr fp $ \ptr' -> do
+    r <- memcmp ptr ptr' len
+    return (r == 0)
+
+
+hashStr  :: Ptr Word8 -> Int -> Int
   -- use the Addr to produce a hash value between 0 & m (inclusive)
-hashStr a# len# = loop 0# 0#
+hashStr (Ptr a#) (I# len#) = loop 0# 0#
     where 
-    loop h n | n ==# len# = h
+    loop h n | n ==# len# = I# h
              | otherwise  = loop h2 (n +# 1#)
           where c = ord# (indexCharOffAddr# a# n)
                 h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
  
-hashSubStrBA  :: ByteArray# -> Int# -> Int# -> Int#
- -- use the byte array to produce a hash value between 0 & m (inclusive)
-hashSubStrBA ba# start# len# = loop 0# 0#
-   where 
-    loop h n | n ==# len# = h
-            | otherwise  = loop h2 (n +# 1#)
-         where c = ord# (indexCharArray# ba# (start# +# n))
-               h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
+-- -----------------------------------------------------------------------------
+-- Operations
  
-hashUnicode :: [Int] -> Int# -> Int#
-hashUnicode [] h = h
-hashUnicode (I# c : cs) h = hashUnicode cs ((c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#)
-\end{code}
+-- | Returns the length of the 'FastString' in characters
+lengthFS :: FastString -> Int
+lengthFS f = n_chars f
  
-\begin{code}
-cmpFS :: FastString -> FastString -> Ordering
-cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
-    else compare s1 s2
-cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
-cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
-cmpFS (FastString u1# l1# b1#) (FastString u2# l2# b2#) =
-  if u1# ==# u2# then EQ else
-  let l# = if l1# <=# l2# then l1# else l2# in
-  unsafePerformIO (
-    memcmp b1# b2# l# >>= \ (I# res) ->
-    return (
-    if      res <#  0# then LT
-    else if res ==# 0# then 
-       if l1# ==# l2# then EQ
-       else if l1# <# l2# then LT else GT
-    else                   GT
-    ))
+-- | Returns 'True' if the 'FastString' is Z-encoded
+isZEncoded :: FastString -> Bool
+isZEncoded fs | ZEncoded <- enc fs = True
+               | otherwise          = False
  
-#ifndef __HADDOCK__
-foreign import ccall unsafe "ghc_memcmp" 
-  memcmp :: ByteArray# -> ByteArray# -> Int# -> IO Int
-#endif
+-- | Returns 'True' if the 'FastString' is empty
+nullFS :: FastString -> Bool
+nullFS f  =  n_bytes f == 0
+
+-- | unpacks and decodes the FastString
+unpackFS :: FastString -> String
+unpackFS (FastString _ n_bytes _ buf enc) = 
+  inlinePerformIO $ withForeignPtr buf $ \ptr ->
+    case enc of
+       ZEncoded      -> peekCAStringLen (castPtr ptr,n_bytes)
+       UTF8Encoded _ -> utf8DecodeString ptr n_bytes
+
+bytesFS :: FastString -> [Word8]
+bytesFS (FastString _ n_bytes _ buf enc) = 
+  inlinePerformIO $ withForeignPtr buf $ \ptr ->
+    peekArray n_bytes ptr
+
+-- | returns a Z-encoded version of a 'FastString'.  This might be the
+-- original, if it was already Z-encoded.  The first time this
+-- function is applied to a particular 'FastString', the results are
+-- memoized.
+--
+zEncodeFS :: FastString -> FastString
+zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
+  case enc of
+    ZEncoded -> fs
+    UTF8Encoded ref ->
+      inlinePerformIO $ do
+        m <- readIORef ref
+        case m of
+         Just fs -> return fs
+         Nothing -> do
+            let efs = mkZFastString (zEncodeString (unpackFS fs))
+           writeIORef ref (Just efs)
+           return efs
+
+appendFS :: FastString -> FastString -> FastString
+appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
+
+concatFS :: [FastString] -> FastString
+concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
+
+headFS :: FastString -> Char
+headFS (FastString _ n_bytes _ buf enc) = 
+  inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+    case enc of
+      ZEncoded -> do 
+        w <- peek (castPtr ptr)
+        return (castCCharToChar w)
+      UTF8Encoded _ -> 
+        return (fst (utf8DecodeChar ptr))
+
+tailFS :: FastString -> FastString
+tailFS (FastString _ n_bytes _ buf enc) = 
+  inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+    case enc of
+      ZEncoded -> do
+       return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
+      UTF8Encoded _ -> do
+        let (_,ptr') = utf8DecodeChar ptr
+        let off = ptr' `minusPtr` ptr
+        return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
+
+consFS :: Char -> FastString -> FastString
+consFS c fs = mkFastString (c : unpackFS fs)
+
+uniqueOfFS :: FastString -> Int#
+uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
+
+nilFS = mkFastString ""
  
  -- -----------------------------------------------------------------------------
  -- Outputting 'FastString's
  
-#if __GLASGOW_HASKELL__ >= 504
-
--- this is our own version of hPutBuf for FastStrings, because in
--- 5.04+ we don't have mutable byte arrays and therefore hPutBufBA.
--- The closest is hPutArray in Data.Array.IO, but that does some extra
--- range checks that we want to avoid here.
-
-foreign import ccall unsafe "__hscore_memcpy_dst_off"
-   memcpy_baoff_ba :: RawBuffer -> Int -> RawBuffer -> CSize -> IO (Ptr ())
-
-hPutFS handle (FastString _ l# ba#)
-  | l# ==# 0#  = return ()
-  | otherwise
-   = do wantWritableHandle "hPutFS" handle $ 
-          \ handle_@Handle__{ haFD=fd, haBuffer=ref, haIsStream=stream } -> do
-
-          old_buf@Buffer{ bufBuf=old_raw, bufRPtr=r, bufWPtr=w, bufSize=size }
-           <- readIORef ref
-
-         let count = I# l#
-             raw = unsafeCoerce# ba# :: MutableByteArray# RealWorld
-
-          -- enough room in handle buffer?
-          if (size - w > count)
-               -- There's enough room in the buffer:
-               -- just copy the data in and update bufWPtr.
-           then do memcpy_baoff_ba old_raw w raw (fromIntegral count)
-                   writeIORef ref old_buf{ bufWPtr = w + count }
-                   return ()
-
-               -- else, we have to flush
-           else do flushed_buf <- flushWriteBuffer fd stream old_buf
-                   writeIORef ref flushed_buf
-                   let this_buf = 
-                           Buffer{ bufBuf=raw, bufState=WriteBuffer, 
-                                   bufRPtr=0, bufWPtr=count, bufSize=count }
-                   flushWriteBuffer fd stream this_buf
-                   return ()
-
-#else
-
-hPutFS :: Handle -> FastString -> IO ()
-hPutFS handle (FastString _ l# ba#)
-  | l# ==# 0#  = return ()
-  | otherwise  = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
-                    hPutBufBAFull  handle mba (I# l#)
- where
-  bot = error "hPutFS.ba"
-
-#endif
+-- |Outputs a 'FastString' with /no decoding at all/, that is, you
+-- get the actual bytes in the 'FastString' written to the 'Handle'.
+hPutFS handle (FastString _ len _ fp _)
+  | len == 0  = return ()
+  | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
  
--- ONLY here for debugging the NCG (so -ddump-stix works for string
--- literals); no idea if this is really necessary.  JRS, 010131
-hPutFS handle (UnicodeStr _ is) 
-  = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")
+-- ToDo: we'll probably want an hPutFSLocal, or something, to output
+-- in the current locale's encoding (for error messages and suchlike).
  
  -- -----------------------------------------------------------------------------
  -- LitStrings, here for convenience only.
@@ -492,4 +436,24 @@ type LitString = Ptr ()
  
  mkLitString# :: Addr# -> LitString
  mkLitString# a# = Ptr a#
+
+foreign import ccall unsafe "ghc_strlen" 
+  strLength :: Ptr () -> Int
+
+-- -----------------------------------------------------------------------------
+-- under the carpet
+
+-- Just like unsafePerformIO, but we inline it.
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #)   -> r
+
+pokeCAString :: Ptr CChar -> String -> IO ()
+pokeCAString ptr str =
+  let
+       go [] n     = pokeElemOff ptr n 0
+       go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
+  in
+  go str 0
+
  \end{code}
diff --git a/ghc/compiler/utils/FastTypes.lhs b/ghc/compiler/utils/FastTypes.lhs

index 9f9d903..bb92c8c 100644 (file)
--- a/ghc/compiler/utils/FastTypes.lhs
+++ b/ghc/compiler/utils/FastTypes.lhs
@@ -9,7 +9,7 @@ module FastTypes (
      (+#), (-#), (*#), quotFastInt, negateFastInt,
      (==#), (<#), (<=#), (>=#), (>#),
  
-    FastBool, fastBool, isFastTrue, fastOr
+    FastBool, fastBool, isFastTrue, fastOr, fastAnd
    ) where
  
  #include "HsVersions.h"
diff --git a/ghc/compiler/utils/Pretty.lhs b/ghc/compiler/utils/Pretty.lhs

index 916755e..ec8f1e7 100644 (file)
--- a/ghc/compiler/utils/Pretty.lhs
+++ b/ghc/compiler/utils/Pretty.lhs
@@ -179,26 +179,16 @@ module Pretty (
  
  import BufWrite
  import FastString
-import PrimPacked      ( strLength )
  
  import GLAEXTS
  
  import Numeric (fromRat)
  import IO
  
-#if __GLASGOW_HASKELL__ < 503
-import IOExts          ( hPutBufFull )
-#else
  import System.IO       ( hPutBuf )
-#endif
  
-#if __GLASGOW_HASKELL__ < 503
-import PrelBase                ( unpackCString# )
-#else
  import GHC.Base                ( unpackCString# )
-#endif
-
-import PrimPacked      ( Ptr(..) )
+import GHC.Ptr         ( Ptr(..) )
  
  -- Don't import Util( assertPanic ) because it makes a loop in the module structure
  
diff --git a/ghc/compiler/utils/PrimPacked.lhs b/ghc/compiler/utils/PrimPacked.lhs

deleted file mode 100644 (file)

index f2d034d..0000000
--- a/ghc/compiler/utils/PrimPacked.lhs
+++ /dev/null
@@ -1,265 +0,0 @@
-%
-% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
-%
-\section{Basic ops on packed representations}
-
-Some basic operations for working on packed representations of series
-of bytes (character strings). Used by the interface lexer input
-subsystem, mostly.
-
-\begin{code}
-{-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
-
-module PrimPacked (
-       Ptr(..), nullPtr, plusAddr#,
-       BA(..),
-       packString,        -- :: String -> (Int, BA)
-       unpackNBytesBA,    -- :: BA -> Int -> [Char]
-        strLength,        -- :: Ptr CChar -> Int
-        copyPrefixStr,    -- :: Addr# -> Int -> BA
-        copySubStrBA,     -- :: BA -> Int -> Int -> BA
-        eqStrPrefix,      -- :: Addr# -> ByteArray# -> Int# -> Bool
-        eqStrPrefixBA,    -- :: ByteArray# -> ByteArray# -> Int# -> Int# -> Bool
- ) where
-
--- This #define suppresses the "import FastString" that
--- HsVersions otherwise produces
-#define COMPILING_FAST_STRING
-#include "HsVersions.h"
-
-import GLAEXTS
-import UNSAFE_IO       ( unsafePerformIO )
-
-import MONAD_ST
-import Foreign
-
-#if __GLASGOW_HASKELL__ < 503
-import PrelST
-#else
-import GHC.ST
-#endif
-
-#if __GLASGOW_HASKELL__ >= 504
-import GHC.Ptr ( Ptr(..) )
-#elif __GLASGOW_HASKELL__ >= 500
-import Ptr     ( Ptr(..) )
-#endif
-
-#if __GLASGOW_HASKELL__ < 504
-import PrelIOBase      ( IO(..) )
-#else
-import GHC.IOBase      ( IO(..) )
-#endif
-\end{code}
-
-Compatibility: 4.08 didn't have the Ptr type.
-
-\begin{code}
-#if __GLASGOW_HASKELL__ <= 408
-data Ptr a = Ptr Addr# deriving (Eq, Ord)
-
-nullPtr :: Ptr a
-nullPtr = Ptr (int2Addr# 0#)
-#endif
-
-#if __GLASGOW_HASKELL__ <= 500
--- plusAddr# is a primop in GHC > 5.00
-plusAddr# :: Addr# -> Int# -> Addr#
-plusAddr# a# i# = int2Addr# (addr2Int# a# +# i#)
-#endif
-\end{code}
-
-Wrapper types for bytearrays
-
-\begin{code}
-data BA    = BA  ByteArray#
-data MBA s = MBA (MutableByteArray# s)
-\end{code}
-
-\begin{code}
-packString :: String -> (Int, BA)
-packString str = (l, arr)
- where
-  l@(I# length#) = length str
-
-  arr = runST (do
-    ch_array <- new_ps_array length#
-      -- fill in packed string from "str"
-    fill_in ch_array 0# str
-      -- freeze the puppy:
-    freeze_ps_array ch_array length#
-   )
-
-  fill_in :: MBA s -> Int# -> [Char] -> ST s ()
-  fill_in arr_in# idx [] =
-   return ()
-  fill_in arr_in# idx (C# c : cs) =
-   write_ps_array arr_in# idx c         >>
-   fill_in arr_in# (idx +# 1#) cs
-\end{code}
-
-Unpacking a string
-
-\begin{code}
-unpackNBytesBA :: BA -> Int -> [Char]
-unpackNBytesBA (BA bytes) (I# len)
- = unpack 0#
- where
-    unpack nh
-      | nh >=# len  = []
-      | otherwise   = C# ch : unpack (nh +# 1#)
-      where
-       ch = indexCharArray# bytes nh
-\end{code}
-
-Copying a char string prefix into a byte array.
-
-\begin{code}
-copyPrefixStr :: Addr# -> Int -> BA
-copyPrefixStr a# len@(I# length#) = copy' length#
- where
-   copy' length# = runST (do
-     {- allocate an array that will hold the string
-     -}
-     ch_array <- new_ps_array length#
-     {- Revert back to Haskell-only solution for the moment.
-       _ccall_ memcpy ch_array (A# a) len        >>=  \ () ->
-       write_ps_array ch_array length# (chr# 0#) >>
-     -}
-     -- fill in packed string from "addr"
-     fill_in ch_array 0#
-     -- freeze the puppy:
-     freeze_ps_array ch_array length#
-    )
-
-   fill_in :: MBA s -> Int# -> ST s ()
-   fill_in arr_in# idx
-      | idx ==# length#
-      = return ()
-      | otherwise
-      = case (indexCharOffAddr# a# idx) of { ch ->
-       write_ps_array arr_in# idx ch >>
-       fill_in arr_in# (idx +# 1#) }
-\end{code}
-
-Copying out a substring, assume a 0-indexed string:
-(and positive lengths, thank you).
-
-\begin{code}
-#ifdef UNUSED
-copySubStr :: Addr# -> Int -> Int -> BA
-copySubStr a# (I# start#) length =
-  copyPrefixStr (a# `plusAddr#` start#)  length
-#endif
-
-copySubStrBA :: BA -> Int -> Int -> BA
-copySubStrBA (BA barr#) (I# start#) len@(I# length#) = ba
- where
-  ba = runST (do
-     -- allocate an array that will hold the string
-    ch_array <- new_ps_array length#
-     -- fill in packed string from "addr"
-    fill_in ch_array 0#
-     -- freeze the puppy:
-    freeze_ps_array ch_array length#
-   )
-
-  fill_in :: MBA s -> Int# -> ST s ()
-  fill_in arr_in# idx
-      | idx ==# length#
-      = return ()
-      | otherwise
-      = case (indexCharArray# barr# (start# +# idx)) of { ch ->
-       write_ps_array arr_in# idx ch >>
-       fill_in arr_in# (idx +# 1#) }
-\end{code}
-
-(Very :-) ``Specialised'' versions of some CharArray things...
-[Copied from PackBase; no real reason -- UGH]
-
-\begin{code}
-new_ps_array   :: Int# -> ST s (MBA s)
-write_ps_array :: MBA s -> Int# -> Char# -> ST s () 
-freeze_ps_array :: MBA s -> Int# -> ST s BA
-
-#if __GLASGOW_HASKELL__ < 411
-#define NEW_BYTE_ARRAY newCharArray#
-#else 
-#define NEW_BYTE_ARRAY newPinnedByteArray#
-#endif
-
-new_ps_array size = ST $ \ s ->
-    case (NEW_BYTE_ARRAY size s)  of { (# s2#, barr# #) ->
-    (# s2#, MBA barr# #) }
-
-write_ps_array (MBA barr#) n ch = ST $ \ s# ->
-    case writeCharArray# barr# n ch s# of { s2#   ->
-    (# s2#, () #) }
-
--- same as unsafeFreezeByteArray
-freeze_ps_array (MBA arr#) len# = ST $ \ s# ->
-    case unsafeFreezeByteArray# arr# s# of { (# s2#, frozen# #) ->
-    (# s2#, BA frozen# #) }
-\end{code}
-
-
-Compare two equal-length strings for equality:
-
-\begin{code}
-eqStrPrefix :: Addr# -> ByteArray# -> Int# -> Bool
-eqStrPrefix a# barr# len# = 
-  inlinePerformIO $ do
-   x <- memcmp_ba a# barr# (I# len#)
-   return (x == 0)
-
-#ifdef UNUSED
-eqCharStrPrefix :: Addr# -> Addr# -> Int# -> Bool
-eqCharStrPrefix a1# a2# len# = 
-  inlinePerformIO $ do
-   x <- memcmp a1# a2# (I# len#)
-   return (x == 0)
-#endif
-
-eqStrPrefixBA :: ByteArray# -> ByteArray# -> Int# -> Int# -> Bool
-eqStrPrefixBA b1# b2# start# len# = 
-  inlinePerformIO $ do
-    x <- memcmp_baoff_ba b2# (I# start#) b1# (I# len#)
-    return (x == 0)
-
-#ifdef UNUSED
-eqCharStrPrefixBA :: Addr# -> ByteArray# -> Int# -> Int# -> Bool
-eqCharStrPrefixBA a# b2# start# len# = 
-  inlinePerformIO $ do
-    x <- memcmp_baoff b2# (I# start#) a# (I# len#) 
-    return (x == 0)
-#endif
-\end{code}
-
-\begin{code}
--- Just like unsafePerformIO, but we inline it.  This is safe when
--- there are no side effects, and improves performance.
-{-# INLINE inlinePerformIO #-}
-inlinePerformIO :: IO a -> a
-inlinePerformIO (IO m) = case m realWorld# of (# _, r #)   -> r
-
-#if __GLASGOW_HASKELL__ <= 408
-strLength (Ptr a#) = ghc_strlen a#
-foreign import ccall unsafe "ghc_strlen" 
-  ghc_strlen :: Addr# -> Int
-#else
-foreign import ccall unsafe "ghc_strlen" 
-  strLength :: Ptr () -> Int
-#endif
-
-foreign import ccall unsafe "ghc_memcmp"
-  memcmp :: Addr# -> Addr# -> Int -> IO Int
-
-foreign import ccall unsafe "ghc_memcmp" 
-  memcmp_ba :: Addr# -> ByteArray# -> Int -> IO Int
-
-foreign import ccall unsafe "ghc_memcmp_off"
-  memcmp_baoff :: ByteArray# -> Int -> Addr# -> Int -> IO Int
-
-foreign import ccall unsafe "ghc_memcmp_off"
-  memcmp_baoff_ba :: ByteArray# -> Int -> ByteArray# -> Int -> IO Int
-\end{code}
diff --git a/ghc/compiler/utils/StringBuffer.lhs b/ghc/compiler/utils/StringBuffer.lhs

index e53dbc8..e2eed88 100644 (file)
--- a/ghc/compiler/utils/StringBuffer.lhs
+++ b/ghc/compiler/utils/StringBuffer.lhs
@@ -6,27 +6,32 @@
  Buffers for scanning string input stored in external arrays.
  
  \begin{code}
+{-# OPTIONS_GHC -O #-}
+-- always optimise this module, it's critical
+
  module StringBuffer
         (
          StringBuffer(..),
         -- non-abstract for vs\/HaskellService
  
          -- * Creation\/destruction
-        hGetStringBuffer,     -- :: FilePath     -> IO StringBuffer
-       stringToStringBuffer, -- :: String       -> IO StringBuffer
+        hGetStringBuffer,
+       stringToStringBuffer,
  
-         -- * Lookup
-       currentChar,       -- :: StringBuffer -> Char
-       prevChar,          -- :: StringBuffer -> Char -> Char
-       lookAhead,         -- :: StringBuffer -> Int  -> Char
-       atEnd,             -- :: StringBuffer -> Bool
+       -- * Inspection
+       nextChar,
+       currentChar,
+       prevChar,
+       atEnd,
  
-       -- * Moving
-       stepOn, stepOnBy,
+       -- * Moving and comparison
+       stepOn,
+       offsetBytes,
+       byteDiff,
  
          -- * Conversion
-        lexemeToString,     -- :: StringBuffer -> Int -> String
-        lexemeToFastString, -- :: StringBuffer -> Int -> FastString
+        lexemeToString,
+        lexemeToFastString,
  
          -- * Parsing integers
         parseInteger,
@@ -34,22 +39,19 @@ module StringBuffer
  
  #include "HsVersions.h"
  
-import FastString
-import Panic
+import Encoding
+import FastString      (FastString,mkFastString,mkFastStringBytes)
  
  import GLAEXTS
  
  import Foreign
  
-#if __GLASGOW_HASKELL__ < 503
-import PrelIOBase
-import PrelHandle
-#else
-import GHC.IOBase
-import GHC.IO          ( slurpFile )
-#endif
+import GHC.IOBase              ( IO(..) )
+import GHC.Base                        ( unsafeChr )
+
+import System.IO               ( hGetBuf )
  
-import IO                      ( openFile, hFileSize, IOMode(ReadMode),
+import IO                      ( hFileSize, IOMode(ReadMode),
                                   hClose )
  #if __GLASGOW_HASKELL__ >= 601
  import System.IO               ( openBinaryFile )
@@ -57,37 +59,35 @@ import System.IO            ( openBinaryFile )
  import IOExts                   ( openFileEx, IOModeEx(..) )
  #endif
  
-#if __GLASGOW_HASKELL__ < 503
-import IArray                  ( listArray )
-import ArrayBase               ( UArray(..) )
-import MutableArray
-import IOExts                  ( hGetBufBA )
-#else
-import Data.Array.IArray       ( listArray )
-import Data.Array.MArray       ( unsafeFreeze, newArray_ )
-import Data.Array.Base         ( UArray(..)  )
-import Data.Array.IO           ( IOArray, hGetArray )
-#endif
-
-import Char                    ( ord )
-
  #if __GLASGOW_HASKELL__ < 601
  openBinaryFile fp mode = openFileEx fp (BinaryMode mode)
  #endif
+
  -- -----------------------------------------------------------------------------
  -- The StringBuffer type
  
--- A StringBuffer is a ByteArray# with a pointer into it.  We also cache
--- the length of the ByteArray# for speed.
-
+-- |A StringBuffer is an internal pointer to a sized chunk of bytes.
+-- The bytes are intended to be *immutable*.  There are pure
+-- operations to read the contents of a StringBuffer.
+--
+-- A StringBuffer may have a finalizer, depending on how it was
+-- obtained.
+--
  data StringBuffer
- = StringBuffer
-     ByteArray#
-     Int#         -- length
-     Int#         -- current pos
+ = StringBuffer {
+     buf :: {-# UNPACK #-} !(ForeignPtr Word8),
+     len :: {-# UNPACK #-} !Int,       -- length
+     cur :: {-# UNPACK #-} !Int                -- current pos
+  }
+       -- The buffer is assumed to be UTF-8 encoded, and furthermore
+       -- we add three '\0' bytes to the end as sentinels so that the
+       -- decoder doesn't have to check for overflow at every single byte
+       -- of a multibyte sequence.
  
  instance Show StringBuffer where
-       showsPrec _ s = showString "<stringbuffer>"
+       showsPrec _ s = showString "<stringbuffer(" 
+                     . shows (len s) . showString "," . shows (cur s)
+                     . showString ">"
  
  -- -----------------------------------------------------------------------------
  -- Creation / Destruction
@@ -95,97 +95,108 @@ instance Show StringBuffer where
  hGetStringBuffer :: FilePath -> IO StringBuffer
  hGetStringBuffer fname = do
     h <- openBinaryFile fname ReadMode
-   size <- hFileSize h
-   let size_i@(I# sz#) = fromIntegral size
-#if __GLASGOW_HASKELL__ < 503
-   arr <- stToIO (newCharArray (0,size_i-1))
-   r <- hGetBufBA h arr size_i
-#else
-   arr <- newArray_ (0,size_i-1)
-   r <- if size_i == 0 then return 0 else hGetArray h arr size_i
-#endif
-   hClose h
-   if (r /= size_i)
+   size_i <- hFileSize h
+   let size = fromIntegral size_i
+   buf <- mallocForeignPtrArray (size+3)
+   withForeignPtr buf $ \ptr -> do
+     r <- if size == 0 then return 0 else hGetBuf h ptr size
+     hClose h
+     if (r /= size)
         then ioError (userError "short read of file")
         else do
-#if __GLASGOW_HASKELL__ < 503
-   frozen <- stToIO (unsafeFreezeByteArray arr)
-   case frozen of
-      ByteArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#)
-#else
-   frozen <- unsafeFreeze arr
-   case frozen of
-      UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#)
-#endif
+         pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+                -- sentinels for UTF-8 decoding
+         return (StringBuffer buf size 0)
  
-#if __GLASGOW_HASKELL__ >= 502
+stringToStringBuffer :: String -> IO StringBuffer
  stringToStringBuffer str = do
-  let size@(I# sz#) = length str
-      arr = listArray (0,size-1) (map (fromIntegral.ord) str)
-                :: UArray Int Word8
-  case arr of
-       UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#)
-#else
-stringToStringBuffer = panic "stringToStringBuffer: not implemented"
-#endif
+  let size = utf8EncodedLength str
+  buf <- mallocForeignPtrArray (size+3)
+  withForeignPtr buf $ \ptr -> do
+    utf8EncodeString ptr str
+    pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+        -- sentinels for UTF-8 decoding
+  return (StringBuffer buf size 0)
  
  -- -----------------------------------------------------------------------------
--- Lookup
-
-currentChar  :: StringBuffer -> Char
-currentChar (StringBuffer arr# l# current#) =
-  ASSERT(current# <# l#)
-  C# (indexCharArray# arr# current#)
+-- Grab a character
+
+-- Getting our fingers dirty a little here, but this is performance-critical
+{-# INLINE nextChar #-}
+nextChar :: StringBuffer -> (Char,StringBuffer)
+nextChar (StringBuffer buf len (I# cur#)) =
+  inlinePerformIO $ do
+    withForeignPtr buf $ \(Ptr a#) -> do
+       case utf8DecodeChar# (a# `plusAddr#` cur#) of
+         (# c#, b# #) ->
+            let cur' = I# (b# `minusAddr#` a#) in
+            return (C# c#, StringBuffer buf len cur')
+
+currentChar :: StringBuffer -> Char
+currentChar = fst . nextChar
  
  prevChar :: StringBuffer -> Char -> Char
-prevChar (StringBuffer _ _ 0#) deflt = deflt
-prevChar s deflt = lookAhead s (-1)
-
-lookAhead :: StringBuffer -> Int  -> Char
-lookAhead (StringBuffer arr# l# c#) (I# i#) =
-  ASSERT(off <# l#  && off >=# 0#)
-  C# (indexCharArray# arr# off)
- where 
-   off = c# +# i#
+prevChar (StringBuffer buf len 0)   deflt = deflt
+prevChar (StringBuffer buf len cur) deflt = 
+  inlinePerformIO $ do
+    withForeignPtr buf $ \p -> do
+      p' <- utf8PrevChar (p `plusPtr` cur)
+      return (fst (utf8DecodeChar p'))
  
  -- -----------------------------------------------------------------------------
  -- Moving
  
  stepOn :: StringBuffer -> StringBuffer
-stepOn s = stepOnBy 1 s
+stepOn s = snd (nextChar s)
+
+offsetBytes :: Int -> StringBuffer -> StringBuffer
+offsetBytes i s = s { cur = cur s + i }
  
-stepOnBy :: Int -> StringBuffer -> StringBuffer
-stepOnBy (I# i#) (StringBuffer fo# l# c#) = StringBuffer fo# l# (c# +# i#)
+byteDiff :: StringBuffer -> StringBuffer -> Int
+byteDiff s1 s2 = cur s2 - cur s1
  
  atEnd :: StringBuffer -> Bool
-atEnd (StringBuffer _ l# c#) = l# ==# c#
+atEnd (StringBuffer _ l c) = l == c
  
  -- -----------------------------------------------------------------------------
  -- Conversion
  
-lexemeToString :: StringBuffer -> Int -> String
+lexemeToString :: StringBuffer -> Int {-bytes-} -> String
  lexemeToString _ 0 = ""
-lexemeToString (StringBuffer arr# _ current#) (I# len#) = unpack current#
- where
-    end = current# +# len#
+lexemeToString (StringBuffer buf _ cur) bytes =
+  inlinePerformIO $ 
+    withForeignPtr buf $ \ptr -> 
+      utf8DecodeString (ptr `plusPtr` cur) bytes
  
-    unpack nh
-      | nh >=# end  = []
-      | otherwise   = C# ch : unpack (nh +# 1#)
-      where
-       ch = indexCharArray# arr# nh
-
-lexemeToFastString :: StringBuffer -> Int -> FastString
+lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
  lexemeToFastString _ 0 = mkFastString ""
-lexemeToFastString (StringBuffer fo _ current#) (I# len) =
-    mkFastSubStringBA# fo current# len
+lexemeToFastString (StringBuffer buf _ cur) len =
+   inlinePerformIO $
+     withForeignPtr buf $ \ptr ->
+       return $! mkFastStringBytes (ptr `plusPtr` cur) len
  
  -- -----------------------------------------------------------------------------
  -- Parsing integer strings in various bases
  
+byteOff :: StringBuffer -> Int -> Char
+byteOff (StringBuffer buf _ cur) i = 
+  inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+    w <- peek (ptr `plusPtr` (cur+i))
+    return (unsafeChr (fromIntegral (w::Word8)))
+
+-- | XXX assumes ASCII digits only
  parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
  parseInteger buf len radix to_int 
    = go 0 0
    where go i x | i == len  = x
-              | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
+              | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i)))
+
+-- -----------------------------------------------------------------------------
+-- under the carpet
+
+-- Just like unsafePerformIO, but we inline it.
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #)   -> r
+
  \end{code}
diff --git a/ghc/compiler/utils/UnicodeUtil.lhs b/ghc/compiler/utils/UnicodeUtil.lhs

deleted file mode 100644 (file)

index 56e95a5..0000000
--- a/ghc/compiler/utils/UnicodeUtil.lhs
+++ /dev/null
@@ -1,36 +0,0 @@
-Various Unicode-related utilities.
-
-\begin{code}
-module UnicodeUtil(
-    stringToUtf8, intsToUtf8
-  ) where
-
-#include "HsVersions.h"
-
-import Panic ( panic )
-import Char  ( chr, ord )
-\end{code}
-
-\begin{code}
-stringToUtf8 :: String -> String
-stringToUtf8 s = intsToUtf8 (map ord s)
-
-intsToUtf8 :: [Int] -> String
-intsToUtf8 []       = ""
-intsToUtf8 (c:s)
-    | c >= 1 && c <= 0x7F = chr c : intsToUtf8 s
-    | c < 0           = panic ("charToUtf8 ("++show c++")")
-    | c <= 0x7FF      = chr (0xC0 + c `div`       0x40           ) :
-                        chr (0x80 + c                  `mod` 0x40) :
-                        intsToUtf8 s
-    | c <= 0xFFFF     = chr (0xE0 + c `div`     0x1000           ) :
-                        chr (0x80 + c `div`       0x40 `mod` 0x40) :
-                        chr (0x80 + c                  `mod` 0x40) :
-                        intsToUtf8 s
-    | c <= 0x10FFFF   = chr (0xF0 + c `div`    0x40000           ) :
-                        chr (0x80 + c `div`     0x1000 `mod` 0x40) :
-                        chr (0x80 + c `div`       0x40 `mod` 0x40) :
-                        chr (0x80 + c                  `mod` 0x40) :
-                        intsToUtf8 s
-    | otherwise       = panic ("charToUtf8 "++show c)
-\end{code}
author	simonmar <unknown>
	Fri, 6 Jan 2006 16:30:19 +0000 (16:30 +0000)
committer	simonmar <unknown>
	Fri, 6 Jan 2006 16:30:19 +0000 (16:30 +0000)
ghc/compiler/HsVersions.h		patch \| blob \| history
ghc/compiler/Makefile		patch \| blob \| history
ghc/compiler/basicTypes/Id.lhs		patch \| blob \| history
ghc/compiler/basicTypes/Literal.lhs		patch \| blob \| history
ghc/compiler/basicTypes/MkId.lhs		patch \| blob \| history
ghc/compiler/basicTypes/Module.lhs		patch \| blob \| history
ghc/compiler/basicTypes/Name.lhs		patch \| blob \| history
ghc/compiler/basicTypes/OccName.lhs		patch \| blob \| history
ghc/compiler/basicTypes/RdrName.lhs		patch \| blob \| history
ghc/compiler/cmm/CLabel.hs		patch \| blob \| history
ghc/compiler/cmm/Cmm.hs		patch \| blob \| history
ghc/compiler/cmm/CmmLex.x		patch \| blob \| history
ghc/compiler/cmm/CmmParse.y		patch \| blob \| history
ghc/compiler/cmm/PprC.hs		patch \| blob \| history
ghc/compiler/cmm/PprCmm.hs		patch \| blob \| history
ghc/compiler/codeGen/CgProf.hs		patch \| blob \| history
ghc/compiler/codeGen/CgUtils.hs		patch \| blob \| history
ghc/compiler/codeGen/ClosureInfo.lhs		patch \| blob \| history
ghc/compiler/deSugar/Check.lhs		patch \| blob \| history
ghc/compiler/deSugar/DsForeign.lhs		patch \| blob \| history
ghc/compiler/deSugar/DsMeta.hs		patch \| blob \| history
ghc/compiler/deSugar/DsUtils.lhs		patch \| blob \| history
ghc/compiler/ghci/ByteCodeGen.lhs		patch \| blob \| history
ghc/compiler/ghci/ByteCodeLink.lhs		patch \| blob \| history
ghc/compiler/ghci/InteractiveUI.hs		patch \| blob \| history
ghc/compiler/hsSyn/Convert.lhs		patch \| blob \| history
ghc/compiler/hsSyn/HsDecls.lhs		patch \| blob \| history
ghc/compiler/hsSyn/HsUtils.lhs		patch \| blob \| history
ghc/compiler/iface/LoadIface.lhs		patch \| blob \| history
ghc/compiler/iface/MkIface.lhs		patch \| blob \| history
ghc/compiler/main/DriverMkDepend.hs		patch \| blob \| history
ghc/compiler/main/DriverPipeline.hs		patch \| blob \| history
ghc/compiler/main/Finder.lhs		patch \| blob \| history
ghc/compiler/main/GHC.hs		patch \| blob \| history
ghc/compiler/main/HscTypes.lhs		patch \| blob \| history
ghc/compiler/nativeGen/PprMach.hs		patch \| blob \| history
ghc/compiler/ndpFlatten/FlattenMonad.hs		patch \| blob \| history
ghc/compiler/parser/Ctype.lhs		patch \| blob \| history
ghc/compiler/parser/Lexer.x		patch \| blob \| history
ghc/compiler/parser/Parser.y.pp		patch \| blob \| history
ghc/compiler/parser/ParserCore.y		patch \| blob \| history
ghc/compiler/parser/RdrHsSyn.lhs		patch \| blob \| history
ghc/compiler/prelude/PrelNames.lhs		patch \| blob \| history
ghc/compiler/prelude/PrelRules.lhs		patch \| blob \| history
ghc/compiler/prelude/PrimOp.lhs		patch \| blob \| history
ghc/compiler/prelude/TysPrim.lhs		patch \| blob \| history
ghc/compiler/prelude/TysWiredIn.lhs		patch \| blob \| history
ghc/compiler/profiling/CostCentre.lhs		patch \| blob \| history
ghc/compiler/rename/RnEnv.lhs		patch \| blob \| history
ghc/compiler/rename/RnExpr.lhs		patch \| blob \| history
ghc/compiler/rename/RnNames.lhs		patch \| blob \| history
ghc/compiler/simplCore/SetLevels.lhs		patch \| blob \| history
ghc/compiler/simplCore/SimplMonad.lhs		patch \| blob \| history
ghc/compiler/simplCore/Simplify.lhs		patch \| blob \| history
ghc/compiler/stgSyn/CoreToStg.lhs		patch \| blob \| history
ghc/compiler/typecheck/Inst.lhs		patch \| blob \| history
ghc/compiler/typecheck/TcClassDcl.lhs		patch \| blob \| history
ghc/compiler/typecheck/TcGenDeriv.lhs		patch \| blob \| history
ghc/compiler/typecheck/TcInstDcls.lhs		patch \| blob \| history
ghc/compiler/typecheck/TcRnDriver.lhs		patch \| blob \| history
ghc/compiler/typecheck/TcSplice.lhs		patch \| blob \| history
ghc/compiler/types/TypeRep.lhs		patch \| blob \| history
ghc/compiler/utils/Binary.hs		patch \| blob \| history
ghc/compiler/utils/BufWrite.hs		patch \| blob \| history
ghc/compiler/utils/Encoding.hs	[new file with mode: 0644]	patch \| blob
ghc/compiler/utils/FastString.lhs		patch \| blob \| history
ghc/compiler/utils/FastTypes.lhs		patch \| blob \| history
ghc/compiler/utils/Pretty.lhs		patch \| blob \| history
ghc/compiler/utils/PrimPacked.lhs	[deleted file]	patch \| blob \| history
ghc/compiler/utils/StringBuffer.lhs		patch \| blob \| history
ghc/compiler/utils/UnicodeUtil.lhs	[deleted file]	patch \| blob \| history