-- when compiling FastString itself
#ifndef COMPILING_FAST_STRING
--
-import qualified FastString
+import qualified FastString as FS
#endif
-#define SLIT(x) (FastString.mkLitString# (x#))
-#define FSLIT(x) (FastString.mkFastString# (x#))
+#define SLIT(x) (FS.mkLitString# (x#))
+#define FSLIT(x) (FS.mkFastString# (x#))
+
+-- Useful for declaring arguments to be strict
+#define STRICT1(f) f a b c | a `seq` False = undefined
+#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
+#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
+#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
+#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
+#define STRICT6(f) f a b c d e f | a `seq` b `seq` c `seq` d `seq` e `seq` f `seq` False = undefined
#endif /* HsVersions.h */
PKG_DEPENDS += template-haskell
# Use threaded RTS with GHCi, so threads don't get blocked at the prompt.
-SRC_HC_OPTS += -threaded
+# SRC_HC_OPTS += -threaded
ALL_DIRS += ghci
-- Simple construction
mkGlobalId, mkLocalId, mkLocalIdWithInfo,
- mkSysLocal, mkSysLocalUnencoded, mkUserLocal, mkVanillaGlobal,
+ mkSysLocal, mkUserLocal, mkVanillaGlobal,
mkTemplateLocals, mkTemplateLocalsNum, mkWildId, mkTemplateLocal,
mkWorkerId, mkExportedLocalId,
import DataCon ( DataCon, isUnboxedTupleCon )
import NewDemand ( Demand, StrictSig, topDmd, topSig, isBottomingSig )
import Name ( Name, OccName, nameIsLocalOrFrom,
- mkSystemVarName, mkSystemVarNameEncoded, mkInternalName,
- getOccName, getSrcLoc
- )
+ mkSystemVarName, mkInternalName, getOccName,
+ getSrcLoc )
import Module ( Module )
-import OccName ( EncodedFS, mkWorkerOcc )
+import OccName ( mkWorkerOcc )
import Maybes ( orElse )
import SrcLoc ( SrcLoc )
import Outputable
import Unique ( Unique, mkBuiltinUnique )
+import FastString ( FastString )
import StaticFlags ( opt_NoStateHack )
-- infixl so you can say (id `set` a `set` b)
-- SysLocal: for an Id being created by the compiler out of thin air...
-- UserLocal: an Id with a name the user might recognize...
mkUserLocal :: OccName -> Unique -> Type -> SrcLoc -> Id
-mkSysLocal :: EncodedFS -> Unique -> Type -> Id
+mkSysLocal :: FastString -> Unique -> Type -> Id
mkVanillaGlobal :: Name -> Type -> IdInfo -> Id
--- for SysLocal, we assume the base name is already encoded, to avoid
--- re-encoding the same string over and over again.
-mkSysLocal fs uniq ty = mkLocalId (mkSystemVarNameEncoded uniq fs) ty
-
--- version to use when the faststring needs to be encoded
-mkSysLocalUnencoded fs uniq ty = mkLocalId (mkSystemVarName uniq fs) ty
+mkSysLocal fs uniq ty = mkLocalId (mkSystemVarName uniq fs) ty
mkUserLocal occ uniq ty loc = mkLocalId (mkInternalName uniq occ loc) ty
mkVanillaGlobal = mkGlobalId VanillaGlobal
import FastString
import Binary
-import UnicodeUtil ( stringToUtf8 )
import Ratio ( numerator )
import FastString ( uniqueOfFS, lengthFS )
import DATA_INT ( Int8, Int16, Int32 )
= ------------------
-- First the primitive guys
MachChar Char -- Char# At least 31 bits
- | MachStr FastString
+
+ | MachStr FastString -- A string-literal: stored and emitted
+ -- UTF-8 encoded, we'll arrange to decode it
+ -- at runtime. Also emitted with a '\0'
+ -- terminator.
| MachNullAddr -- the NULL pointer, the only pointer value
-- that can be represented as a Literal.
mkMachWord64 x = MachWord64 x
mkStringLit :: String -> Literal
-mkStringLit s = MachStr (mkFastString (stringToUtf8 s))
+mkStringLit s = MachStr (mkFastString s) -- stored UTF-8 encoded
inIntRange, inWordRange :: Integer -> Bool
inIntRange x = x >= tARGET_MIN_INT && x <= tARGET_MAX_INT
import Var ( Id, TyVar, Var )
import VarSet ( isEmptyVarSet, subVarSet, varSetElems )
import Name ( mkFCallName, mkWiredInName, Name, BuiltInSyntax(..) )
-import OccName ( mkOccFS, varName )
+import OccName ( mkOccNameFS, varName )
import PrimOp ( PrimOp, primOpSig, primOpOcc, primOpTag )
import ForeignCall ( ForeignCall )
import DataCon ( DataCon, DataConIds(..), dataConTyVars,
\begin{code}
mkWiredInIdName mod fs uniq id
- = mkWiredInName mod (mkOccFS varName fs) uniq Nothing (AnId id) UserSyntax
+ = mkWiredInName mod (mkOccNameFS varName fs) uniq Nothing (AnId id) UserSyntax
unsafeCoerceName = mkWiredInIdName gHC_PRIM FSLIT("unsafeCoerce#") unsafeCoerceIdKey unsafeCoerceId
nullAddrName = mkWiredInIdName gHC_PRIM FSLIT("nullAddr#") nullAddrIdKey nullAddrId
Module
~~~~~~~~~~
-Simply the name of a module, represented as a Z-encoded FastString.
+Simply the name of a module, represented as a FastString.
These are Uniquable, hence we can build FiniteMaps with ModuleNames as
the keys.
, ModLocation(..)
, addBootSuffix, addBootSuffix_maybe, addBootSuffixLocn
- , moduleString -- :: ModuleName -> EncodedString
- , moduleUserString -- :: ModuleName -> UserString
- , moduleFS -- :: ModuleName -> EncodedFS
+ , moduleString -- :: ModuleName -> String
+ , moduleFS -- :: ModuleName -> FastString
- , mkModule -- :: UserString -> ModuleName
- , mkModuleFS -- :: UserFS -> ModuleName
- , mkSysModuleFS -- :: EncodedFS -> ModuleName
+ , mkModule -- :: String -> ModuleName
+ , mkModuleFS -- :: FastString -> ModuleName
, ModuleEnv
, elemModuleEnv, extendModuleEnv, extendModuleEnvList, plusModuleEnv_C
%************************************************************************
\begin{code}
-newtype Module = Module EncodedFS
+newtype Module = Module FastString
-- Haskell module names can include the quote character ',
-- so the module names have the z-encoding applied to them
instance Outputable Module where
ppr = pprModule
-
pprModule :: Module -> SDoc
-pprModule (Module nm) = pprEncodedFS nm
+pprModule (Module nm) =
+ getPprStyle $ \ sty ->
+ if codeStyle sty
+ then ftext (zEncodeFS nm)
+ else ftext nm
-moduleFS :: Module -> EncodedFS
+moduleFS :: Module -> FastString
moduleFS (Module mod) = mod
-moduleString :: Module -> EncodedString
+moduleString :: Module -> String
moduleString (Module mod) = unpackFS mod
-moduleUserString :: Module -> UserString
-moduleUserString (Module mod) = decode (unpackFS mod)
-
-- used to be called mkSrcModule
-mkModule :: UserString -> Module
-mkModule s = Module (mkFastString (encode s))
+mkModule :: String -> Module
+mkModule s = Module (mkFastString s)
-- used to be called mkSrcModuleFS
-mkModuleFS :: UserFS -> Module
-mkModuleFS s = Module (encodeFS s)
-
--- used to be called mkSysModuleFS
-mkSysModuleFS :: EncodedFS -> Module
-mkSysModuleFS s = Module s
+mkModuleFS :: FastString -> Module
+mkModuleFS s = Module s
\end{code}
%************************************************************************
Name, -- Abstract
BuiltInSyntax(..),
mkInternalName, mkSystemName,
- mkSystemVarName, mkSystemVarNameEncoded, mkSysTvName,
+ mkSystemVarName, mkSysTvName,
mkFCallName, mkIPName,
mkExternalName, mkWiredInName,
import {-# SOURCE #-} TypeRep( TyThing )
import OccName -- All of it
-import Module ( Module )
+import Module ( Module, moduleFS )
import SrcLoc ( noSrcLoc, wiredInSrcLoc, SrcLoc )
import Unique ( Unique, Uniquable(..), getKey, pprUnique )
import Maybes ( orElse, isJust )
+import FastString ( FastString, zEncodeFS )
import Outputable
\end{code}
mkSystemName uniq occ = Name { n_uniq = uniq, n_sort = System,
n_occ = occ, n_loc = noSrcLoc }
-mkSystemVarName :: Unique -> UserFS -> Name
-mkSystemVarName uniq fs = mkSystemName uniq (mkVarOcc fs)
+mkSystemVarName :: Unique -> FastString -> Name
+mkSystemVarName uniq fs = mkSystemName uniq (mkVarOccFS fs)
--- Use this version when the string is already encoded. Avoids duplicating
--- the string each time a new name is created.
-mkSystemVarNameEncoded :: Unique -> EncodedFS -> Name
-mkSystemVarNameEncoded uniq fs = mkSystemName uniq (mkSysOccFS varName fs)
+mkSysTvName :: Unique -> FastString -> Name
+mkSysTvName uniq fs = mkSystemName uniq (mkOccNameFS tvName fs)
-mkSysTvName :: Unique -> EncodedFS -> Name
-mkSysTvName uniq fs = mkSystemName uniq (mkSysOccFS tvName fs)
-
-mkFCallName :: Unique -> EncodedString -> Name
+mkFCallName :: Unique -> String -> Name
-- The encoded string completely describes the ccall
mkFCallName uniq str = Name { n_uniq = uniq, n_sort = Internal,
- n_occ = mkFCallOcc str, n_loc = noSrcLoc }
+ n_occ = mkVarOcc str, n_loc = noSrcLoc }
mkIPName :: Unique -> OccName -> Name
mkIPName uniq occ
Internal -> pprInternal sty uniq occ
pprExternal sty uniq mod occ is_wired is_builtin
- | codeStyle sty = ppr mod <> char '_' <> ppr_occ_name occ
+ | codeStyle sty = ppr_z_module mod <> char '_' <> ppr_z_occ_name occ
-- In code style, always qualify
-- ToDo: maybe we could print all wired-in things unqualified
-- in code style, to reduce symbol table bloat?
| debugStyle sty = ppr mod <> dot <> ppr_occ_name occ
<> braces (hsep [if is_wired then ptext SLIT("(w)") else empty,
- text (briefOccNameFlavour occ),
+ pprNameSpaceBrief (occNameSpace occ),
pprUnique uniq])
| BuiltInSyntax <- is_builtin = ppr_occ_name occ
-- never qualify builtin syntax
pprInternal sty uniq occ
| codeStyle sty = pprUnique uniq
- | debugStyle sty = ppr_occ_name occ <> braces (hsep [text (briefOccNameFlavour occ),
+ | debugStyle sty = ppr_occ_name occ <> braces (hsep [pprNameSpaceBrief (occNameSpace occ),
pprUnique uniq])
| dumpStyle sty = ppr_occ_name occ <> char '_' <> pprUnique uniq
-- For debug dumps, we're not necessarily dumping
pprSystem sty uniq occ
| codeStyle sty = pprUnique uniq
| debugStyle sty = ppr_occ_name occ <> char '_' <> pprUnique uniq
- <> braces (text (briefOccNameFlavour occ))
+ <> braces (pprNameSpaceBrief (occNameSpace occ))
| otherwise = ppr_occ_name occ <> char '_' <> pprUnique uniq
-- If the tidy phase hasn't run, the OccName
-- is unlikely to be informative (like 's'),
-- so print the unique
-ppr_occ_name occ = pprEncodedFS (occNameFS occ)
+ppr_occ_name occ = ftext (occNameFS occ)
-- Don't use pprOccName; instead, just print the string of the OccName;
-- we print the namespace in the debug stuff above
+
+-- In code style, we Z-encode the strings. The results of Z-encoding each FastString are
+-- cached behind the scenes in the FastString implementation.
+ppr_z_occ_name occ = ftext (zEncodeFS (occNameFS occ))
+ppr_z_module mod = ftext (zEncodeFS (moduleFS mod))
+
\end{code}
%************************************************************************
\begin{code}
module OccName (
- -- The NameSpace type; abstact
+ -- * The NameSpace type; abstact
NameSpace, tcName, clsName, tcClsName, dataName, varName,
- tvName, srcDataName, nameSpaceString,
+ tvName, srcDataName,
- -- The OccName type
+ -- ** Printing
+ pprNameSpace, pprNonVarNameSpace, pprNameSpaceBrief,
+
+ -- * The OccName type
OccName, -- Abstract, instance of Outputable
pprOccName,
+ -- ** Construction
+ mkOccName, mkOccNameFS,
+ mkVarOcc, mkVarOccFS,
+ mkTyVarOcc,
+ mkDFunOcc,
+ mkTupleOcc,
+ setOccNameSpace,
+
+ -- ** Derived OccNames
+ mkDataConWrapperOcc, mkWorkerOcc, mkDefaultMethodOcc, mkDerivedTyConOcc,
+ mkClassTyConOcc, mkClassDataConOcc, mkDictOcc, mkIPOcc,
+ mkSpecOcc, mkForeignExportOcc, mkGenOcc1, mkGenOcc2,
+ mkDataTOcc, mkDataCOcc, mkDataConWorkerOcc,
+ mkSuperDictSelOcc, mkLocalOcc, mkMethodOcc,
+
+ -- ** Deconstruction
+ occNameFS, occNameString, occNameSpace,
+
+ isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
+ parenSymOcc, reportIfUnused, isTcClsName, isVarName,
+
+ isTupleOcc_maybe,
+
-- The OccEnv type
OccEnv, emptyOccEnv, unitOccEnv, extendOccEnv,
lookupOccEnv, mkOccEnv, extendOccEnvList, elemOccEnv,
occEnvElts, foldOccEnv, plusOccEnv, plusOccEnv_C, extendOccEnv_C,
-
-- The OccSet type
- OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet, extendOccSetList,
+ OccSet, emptyOccSet, unitOccSet, mkOccSet, extendOccSet,
+ extendOccSetList,
unionOccSets, unionManyOccSets, minusOccSet, elemOccSet, occSetElts,
foldOccSet, isEmptyOccSet, intersectOccSet, intersectsOccSet,
- mkOccName, mkOccFS, mkSysOcc, mkSysOccFS, mkFCallOcc, mkKindOccFS,
- mkVarOcc, mkVarOccEncoded, mkTyVarOcc,
- mkSuperDictSelOcc, mkDFunOcc, mkForeignExportOcc,
- mkDictOcc, mkIPOcc, mkWorkerOcc, mkMethodOcc, mkDefaultMethodOcc,
- mkDerivedTyConOcc, mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc,
- mkGenOcc1, mkGenOcc2, mkLocalOcc, mkDataTOcc, mkDataCOcc,
- mkDataConWrapperOcc, mkDataConWorkerOcc,
-
- isVarOcc, isTvOcc, isTcOcc, isDataOcc, isDataSymOcc, isSymOcc, isValOcc,
- parenSymOcc, reportIfUnused, isTcClsName, isVarName,
-
- occNameFS, occNameString, occNameUserString, occNameSpace,
- occNameFlavour, briefOccNameFlavour,
- setOccNameSpace,
-
- mkTupleOcc, isTupleOcc_maybe,
-
-- Tidying up
TidyOccEnv, emptyTidyOccEnv, tidyOccName, initTidyOccEnv,
- -- Encoding
- EncodedString, EncodedFS, UserString, UserFS, encode, encodeFS, decode, pprEncodedFS,
-
-- The basic form of names
isLexCon, isLexVar, isLexId, isLexSym,
isLexConId, isLexConSym, isLexVarId, isLexVarSym,
- isLowerISO, isUpperISO,
startsVarSym, startsVarId, startsConSym, startsConId
) where
#include "HsVersions.h"
-import Char ( isDigit, isUpper, isLower, isAlphaNum, ord, chr, digitToInt )
-import Util ( thenCmp )
-import Unique ( Unique, mkUnique, Uniquable(..) )
-import BasicTypes ( Boxity(..), Arity )
-import StaticFlags ( opt_PprStyle_Debug )
+import Util ( thenCmp )
+import Unique ( Unique, mkUnique, Uniquable(..) )
+import BasicTypes ( Boxity(..), Arity )
+import StaticFlags ( opt_PprStyle_Debug )
import UniqFM
import UniqSet
import FastString
import Binary
import GLAEXTS
-\end{code}
-
-We hold both module names and identifier names in a 'Z-encoded' form
-that makes them acceptable both as a C identifier and as a Haskell
-(prefix) identifier.
-
-They can always be decoded again when printing error messages
-or anything else for the user, but it does make sense for it
-to be represented here in encoded form, so that when generating
-code the encoding operation is not performed on each occurrence.
-These type synonyms help documentation.
-
-\begin{code}
-type UserFS = FastString -- As the user typed it
-type EncodedFS = FastString -- Encoded form
+import Data.Char ( isUpper, isLower, ord )
-type UserString = String -- As the user typed it
-type EncodedString = String -- Encoded form
+-- Unicode TODO: put isSymbol in libcompat
+#if __GLASGOW_HASKELL__ > 604
+import Data.Char ( isSymbol )
+#else
+isSymbol = const False
+#endif
-
-pprEncodedFS :: EncodedFS -> SDoc
-pprEncodedFS fs
- = getPprStyle $ \ sty ->
- if userStyle sty || dumpStyle sty
- -- ftext (decodeFS fs) would needlessly pack the string again
- then text (decode (unpackFS fs))
- else ftext fs
\end{code}
%************************************************************************
isVarName VarName = True
isVarName other = False
+pprNameSpace :: NameSpace -> SDoc
+pprNameSpace DataName = ptext SLIT("data constructor")
+pprNameSpace VarName = ptext SLIT("variable")
+pprNameSpace TvName = ptext SLIT("type variable")
+pprNameSpace TcClsName = ptext SLIT("type constructor or class")
+
+pprNonVarNameSpace :: NameSpace -> SDoc
+pprNonVarNameSpace VarName = empty
+pprNonVarNameSpace ns = pprNameSpace ns
-nameSpaceString :: NameSpace -> String
-nameSpaceString DataName = "data constructor"
-nameSpaceString VarName = "variable"
-nameSpaceString TvName = "type variable"
-nameSpaceString TcClsName = "type constructor or class"
+pprNameSpaceBrief DataName = char 'd'
+pprNameSpaceBrief VarName = char 'v'
+pprNameSpaceBrief TvName = ptext SLIT("tv")
+pprNameSpaceBrief TcClsName = ptext SLIT("tc")
\end{code}
\begin{code}
data OccName = OccName
{ occNameSpace :: !NameSpace
- , occNameFS :: !EncodedFS
+ , occNameFS :: !FastString
}
\end{code}
pprOccName :: OccName -> SDoc
pprOccName (OccName sp occ)
= getPprStyle $ \ sty ->
- pprEncodedFS occ <> if debugStyle sty then
- braces (text (briefNameSpaceFlavour sp))
- else empty
+ if codeStyle sty
+ then ftext (zEncodeFS occ)
+ else ftext occ <> if debugStyle sty
+ then braces (pprNameSpaceBrief sp)
+ else empty
\end{code}
%* *
\subsection{Construction}
%* *
-%*****p*******************************************************************
-
-*Sys* things do no encoding; the caller should ensure that the thing is
-already encoded
-
-\begin{code}
-mkSysOcc :: NameSpace -> EncodedString -> OccName
-mkSysOcc occ_sp str = ASSERT2( alreadyEncoded str, text str )
- OccName occ_sp (mkFastString str)
-
-mkSysOccFS :: NameSpace -> EncodedFS -> OccName
-mkSysOccFS occ_sp fs = ASSERT2( alreadyEncodedFS fs, ppr fs )
- OccName occ_sp fs
-
-mkFCallOcc :: EncodedString -> OccName
--- This version of mkSysOcc doesn't check that the string is already encoded,
--- because it will be something like "{__ccall f dyn Int# -> Int#}"
--- This encodes a lot into something that then parses like an Id.
--- But then alreadyEncoded complains about the braces!
-mkFCallOcc str = OccName varName (mkFastString str)
-
--- Kind constructors get a special function. Uniquely, they are not encoded,
--- so that they have names like '*'. This means that *even in interface files*
--- we'll get kinds like (* -> (* -> *)). We can't use mkSysOcc because it
--- has an ASSERT that doesn't hold.
-mkKindOccFS :: NameSpace -> EncodedFS -> OccName
-mkKindOccFS occ_sp fs = OccName occ_sp fs
-\end{code}
-
-*Source-code* things are encoded.
+%************************************************************************
\begin{code}
-mkOccFS :: NameSpace -> UserFS -> OccName
-mkOccFS occ_sp fs = mkSysOccFS occ_sp (encodeFS fs)
-
mkOccName :: NameSpace -> String -> OccName
-mkOccName ns s = mkSysOcc ns (encode s)
+mkOccName occ_sp str = OccName occ_sp (mkFastString str)
-mkVarOcc :: UserFS -> OccName
-mkVarOcc fs = mkSysOccFS varName (encodeFS fs)
+mkOccNameFS :: NameSpace -> FastString -> OccName
+mkOccNameFS occ_sp fs = OccName occ_sp fs
-mkTyVarOcc :: UserFS -> OccName
-mkTyVarOcc fs = mkSysOccFS tvName (encodeFS fs)
+mkVarOcc :: String -> OccName
+mkVarOcc s = mkOccName varName s
-mkVarOccEncoded :: EncodedFS -> OccName
-mkVarOccEncoded fs = mkSysOccFS varName fs
-\end{code}
+mkVarOccFS :: FastString -> OccName
+mkVarOccFS fs = mkOccNameFS varName fs
+mkTyVarOcc :: FastString -> OccName
+mkTyVarOcc fs = mkOccNameFS tvName fs
+\end{code}
%************************************************************************
%* *
%************************************************************************
-\begin{code}
-occNameString :: OccName -> EncodedString
+\begin{code}
+occNameString :: OccName -> String
occNameString (OccName _ s) = unpackFS s
-occNameUserString :: OccName -> UserString
-occNameUserString occ = decode (occNameString occ)
-
setOccNameSpace :: NameSpace -> OccName -> OccName
setOccNameSpace sp (OccName _ occ) = OccName sp occ
--- occNameFlavour is used only to generate good error messages
-occNameFlavour :: OccName -> SDoc
-occNameFlavour (OccName DataName _) = ptext SLIT("data constructor")
-occNameFlavour (OccName TvName _) = ptext SLIT("type variable")
-occNameFlavour (OccName TcClsName _) = ptext SLIT("type constructor or class")
-occNameFlavour (OccName VarName s) = empty
-
--- briefOccNameFlavour is used in debug-printing of names
-briefOccNameFlavour :: OccName -> String
-briefOccNameFlavour (OccName sp _) = briefNameSpaceFlavour sp
-
-briefNameSpaceFlavour DataName = "d"
-briefNameSpaceFlavour VarName = "v"
-briefNameSpaceFlavour TvName = "tv"
-briefNameSpaceFlavour TcClsName = "tc"
-\end{code}
-
-\begin{code}
isVarOcc, isTvOcc, isDataSymOcc, isSymOcc, isTcOcc :: OccName -> Bool
isVarOcc (OccName VarName _) = True
-- Data constructor operator (starts with ':', or '[]')
-- Pretty inefficient!
-isDataSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
-isDataSymOcc (OccName VarName s) = isLexConSym (decodeFS s)
+isDataSymOcc (OccName DataName s) = isLexConSym s
+isDataSymOcc (OccName VarName s) = isLexConSym s
isDataSymOcc other = False
isDataOcc (OccName DataName _) = True
-isDataOcc (OccName VarName s) = isLexCon (decodeFS s)
+isDataOcc (OccName VarName s) = isLexCon s
isDataOcc other = False
-- Any operator (data constructor or variable)
-- Pretty inefficient!
-isSymOcc (OccName DataName s) = isLexConSym (decodeFS s)
-isSymOcc (OccName TcClsName s) = isLexConSym (decodeFS s)
-isSymOcc (OccName VarName s) = isLexSym (decodeFS s)
+isSymOcc (OccName DataName s) = isLexConSym s
+isSymOcc (OccName TcClsName s) = isLexConSym s
+isSymOcc (OccName VarName s) = isLexSym s
isSymOcc other = False
parenSymOcc :: OccName -> SDoc -> SDoc
reportIfUnused :: OccName -> Bool
-- Haskell 98 encourages compilers to suppress warnings about
-- unused names in a pattern if they start with "_".
-reportIfUnused occ = case occNameUserString occ of
+reportIfUnused occ = case occNameString occ of
('_' : _) -> False
- zz_other -> True
+ _other -> True
\end{code}
-
%************************************************************************
%* *
\subsection{Making system names}
\begin{code}
mk_deriv :: NameSpace
-> String -- Distinguishes one sort of derived name from another
- -> EncodedString -- Must be already encoded!! We don't want to encode it a
- -- second time because encoding isn't idempotent
+ -> String
-> OccName
-mk_deriv occ_sp sys_prefix str = mkSysOcc occ_sp (encode sys_prefix ++ str)
+mk_deriv occ_sp sys_prefix str = mkOccName occ_sp (sys_prefix ++ str)
\end{code}
\begin{code}
-mkDictOcc, mkIPOcc, mkWorkerOcc, mkDefaultMethodOcc,
- mkClassTyConOcc, mkClassDataConOcc, mkSpecOcc
+mkDataConWrapperOcc, mkWorkerOcc, mkDefaultMethodOcc, mkDerivedTyConOcc,
+ mkClassTyConOcc, mkClassDataConOcc, mkDictOcc, mkIPOcc,
+ mkSpecOcc, mkForeignExportOcc, mkGenOcc1, mkGenOcc2,
+ mkDataTOcc, mkDataCOcc, mkDataConWorkerOcc
:: OccName -> OccName
-- These derived variables have a prefix that no Haskell value could have
mk_simple_deriv sp px occ = mk_deriv sp px (occNameString occ)
-
-- Data constructor workers are made by setting the name space
-- of the data constructor OccName (which should be a DataName)
-- to VarName
\begin{code}
-mkDFunOcc :: EncodedString -- Typically the class and type glommed together e.g. "OrdMaybe"
+mkDFunOcc :: String -- Typically the class and type glommed together e.g. "OrdMaybe"
-- Only used in debug mode, for extra clarity
-> Bool -- True <=> hs-boot instance dfun
-> Int -- Unique index
Just n -> -- Already used: make a new guess,
-- change the guess base, and try again
tidyOccName (extendOccEnv in_scope occ (n+1))
- (mkSysOcc occ_sp (unpackFS fs ++ show n))
+ (mkOccName occ_sp (unpackFS fs ++ show n))
\end{code}
-
-%************************************************************************
-%* *
-\subsection{The 'Z' encoding}
-%* *
-%************************************************************************
-
-This is the main name-encoding and decoding function. It encodes any
-string into a string that is acceptable as a C name. This is the name
-by which things are known right through the compiler.
-
-The basic encoding scheme is this.
-
-* Tuples (,,,) are coded as Z3T
-
-* Alphabetic characters (upper and lower) and digits
- all translate to themselves;
- except 'Z', which translates to 'ZZ'
- and 'z', which translates to 'zz'
- We need both so that we can preserve the variable/tycon distinction
-
-* Most other printable characters translate to 'zx' or 'Zx' for some
- alphabetic character x
-
-* The others translate as 'znnnU' where 'nnn' is the decimal number
- of the character
-
- Before After
- --------------------------
- Trak Trak
- foo_wib foozuwib
- > zg
- >1 zg1
- foo# foozh
- foo## foozhzh
- foo##1 foozhzh1
- fooZ fooZZ
- :+ ZCzp
- () Z0T 0-tuple
- (,,,,) Z5T 5-tuple
- (# #) Z1H unboxed 1-tuple (note the space)
- (#,,,,#) Z5H unboxed 5-tuple
- (NB: There is no Z1T nor Z0H.)
-
-\begin{code}
--- alreadyEncoded is used in ASSERTs to check for encoded
--- strings. It isn't fail-safe, of course, because, say 'zh' might
--- be encoded or not.
-alreadyEncoded :: String -> Bool
-alreadyEncoded s = all ok s
- where
- ok ' ' = True
- -- This is a bit of a lie; if we really wanted spaces
- -- in names we'd have to encode them. But we do put
- -- spaces in ccall "occurrences", and we don't want to
- -- reject them here
- ok ch = isAlphaNum ch
-
-alreadyEncodedFS :: FastString -> Bool
-alreadyEncodedFS fs = alreadyEncoded (unpackFS fs)
-
-encode :: UserString -> EncodedString
-encode cs = case maybe_tuple cs of
- Just n -> n -- Tuples go to Z2T etc
- Nothing -> go cs
- where
- go [] = []
- go (c:cs) = encode_ch c ++ go cs
-
-encodeFS :: UserFS -> EncodedFS
-encodeFS fast_str | all unencodedChar str = fast_str
- | otherwise = mkFastString (encode str)
- where
- str = unpackFS fast_str
-
-unencodedChar :: Char -> Bool -- True for chars that don't need encoding
-unencodedChar 'Z' = False
-unencodedChar 'z' = False
-unencodedChar c = c >= 'a' && c <= 'z'
- || c >= 'A' && c <= 'Z'
- || c >= '0' && c <= '9'
-
-encode_ch :: Char -> EncodedString
-encode_ch c | unencodedChar c = [c] -- Common case first
-
--- Constructors
-encode_ch '(' = "ZL" -- Needed for things like (,), and (->)
-encode_ch ')' = "ZR" -- For symmetry with (
-encode_ch '[' = "ZM"
-encode_ch ']' = "ZN"
-encode_ch ':' = "ZC"
-encode_ch 'Z' = "ZZ"
-
--- Variables
-encode_ch 'z' = "zz"
-encode_ch '&' = "za"
-encode_ch '|' = "zb"
-encode_ch '^' = "zc"
-encode_ch '$' = "zd"
-encode_ch '=' = "ze"
-encode_ch '>' = "zg"
-encode_ch '#' = "zh"
-encode_ch '.' = "zi"
-encode_ch '<' = "zl"
-encode_ch '-' = "zm"
-encode_ch '!' = "zn"
-encode_ch '+' = "zp"
-encode_ch '\'' = "zq"
-encode_ch '\\' = "zr"
-encode_ch '/' = "zs"
-encode_ch '*' = "zt"
-encode_ch '_' = "zu"
-encode_ch '%' = "zv"
-encode_ch c = 'z' : shows (ord c) "U"
-\end{code}
-
-Decode is used for user printing.
-
-\begin{code}
-decodeFS :: FastString -> FastString
-decodeFS fs = mkFastString (decode (unpackFS fs))
-
-decode :: EncodedString -> UserString
-decode [] = []
-decode ('Z' : d : rest) | isDigit d = decode_tuple d rest
- | otherwise = decode_upper d : decode rest
-decode ('z' : d : rest) | isDigit d = decode_num_esc d rest
- | otherwise = decode_lower d : decode rest
-decode (c : rest) = c : decode rest
-
-decode_upper, decode_lower :: Char -> Char
-
-decode_upper 'L' = '('
-decode_upper 'R' = ')'
-decode_upper 'M' = '['
-decode_upper 'N' = ']'
-decode_upper 'C' = ':'
-decode_upper 'Z' = 'Z'
-decode_upper ch = pprTrace "decode_upper" (char ch) ch
-
-decode_lower 'z' = 'z'
-decode_lower 'a' = '&'
-decode_lower 'b' = '|'
-decode_lower 'c' = '^'
-decode_lower 'd' = '$'
-decode_lower 'e' = '='
-decode_lower 'g' = '>'
-decode_lower 'h' = '#'
-decode_lower 'i' = '.'
-decode_lower 'l' = '<'
-decode_lower 'm' = '-'
-decode_lower 'n' = '!'
-decode_lower 'p' = '+'
-decode_lower 'q' = '\''
-decode_lower 'r' = '\\'
-decode_lower 's' = '/'
-decode_lower 't' = '*'
-decode_lower 'u' = '_'
-decode_lower 'v' = '%'
-decode_lower ch = pprTrace "decode_lower" (char ch) ch
-
--- Characters not having a specific code are coded as z224U
-decode_num_esc d rest
- = go (digitToInt d) rest
- where
- go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
- go n ('U' : rest) = chr n : decode rest
- go n other = pprPanic "decode_num_esc" (ppr n <+> text other)
-
-decode_tuple :: Char -> EncodedString -> UserString
-decode_tuple d rest
- = go (digitToInt d) rest
- where
- -- NB. recurse back to decode after decoding the tuple, because
- -- the tuple might be embedded in a longer name.
- go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
- go 0 ('T':rest) = "()" ++ decode rest
- go n ('T':rest) = '(' : replicate (n-1) ',' ++ ")" ++ decode rest
- go 1 ('H':rest) = "(# #)" ++ decode rest
- go n ('H':rest) = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ decode rest
- go n other = pprPanic "decode_tuple" (ppr n <+> text other)
-\end{code}
-
-
%************************************************************************
%* *
Stuff for dealing with tuples
%* *
%************************************************************************
-Tuples are encoded as
- Z3T or Z3H
-for 3-tuples or unboxed 3-tuples respectively. No other encoding starts
- Z<digit>
-
-* "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
- There are no unboxed 0-tuples.
-
-* "()" is the tycon for a boxed 0-tuple.
- There are no boxed 1-tuples.
-
-
-\begin{code}
-maybe_tuple :: UserString -> Maybe EncodedString
-
-maybe_tuple "(# #)" = Just("Z1H")
-maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
- (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
- other -> Nothing
-maybe_tuple "()" = Just("Z0T")
-maybe_tuple ('(' : cs) = case count_commas (0::Int) cs of
- (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
- other -> Nothing
-maybe_tuple other = Nothing
-
-count_commas :: Int -> String -> (Int, String)
-count_commas n (',' : cs) = count_commas (n+1) cs
-count_commas n cs = (n,cs)
-\end{code}
-
\begin{code}
mkTupleOcc :: NameSpace -> Boxity -> Arity -> OccName
-mkTupleOcc ns bx ar
- = OccName ns (mkFastString ('Z' : (show ar ++ bx_char)))
+mkTupleOcc ns bx ar = OccName ns (mkFastString str)
where
- bx_char = case bx of
- Boxed -> "T"
- Unboxed -> "H"
+ -- no need to cache these, the caching is done in the caller
+ -- (TysWiredIn.mk_tuple)
+ str = case bx of
+ Boxed -> '(' : commas ++ ")"
+ Unboxed -> '(' : '#' : commas ++ "#)"
+
+ commas = take (ar-1) (repeat ',')
isTupleOcc_maybe :: OccName -> Maybe (NameSpace, Boxity, Arity)
-- Tuples are special, because there are so many of them!
isTupleOcc_maybe (OccName ns fs)
= case unpackFS fs of
- ('Z':d:rest) | isDigit d -> Just (decode_tup (digitToInt d) rest)
- other -> Nothing
+ '(':'#':',':rest -> Just (ns, Unboxed, 2 + count_commas rest)
+ '(':',':rest -> Just (ns, Boxed, 2 + count_commas rest)
+ _other -> Nothing
where
- decode_tup n "H" = (ns, Unboxed, n)
- decode_tup n "T" = (ns, Boxed, n)
- decode_tup n (d:rest) = decode_tup (n*10 + digitToInt d) rest
+ count_commas (',':rest) = 1 + count_commas rest
+ count_commas _ = 0
\end{code}
%************************************************************************
-------------
isLexConId cs -- Prefix type or data constructors
- | nullFastString cs = False -- e.g. "Foo", "[]", "(,)"
+ | nullFS cs = False -- e.g. "Foo", "[]", "(,)"
| cs == FSLIT("[]") = True
| otherwise = startsConId (headFS cs)
isLexVarId cs -- Ordinary prefix identifiers
- | nullFastString cs = False -- e.g. "x", "_x"
+ | nullFS cs = False -- e.g. "x", "_x"
| otherwise = startsVarId (headFS cs)
isLexConSym cs -- Infix type or data constructors
- | nullFastString cs = False -- e.g. ":-:", ":", "->"
+ | nullFS cs = False -- e.g. ":-:", ":", "->"
| cs == FSLIT("->") = True
| otherwise = startsConSym (headFS cs)
isLexVarSym cs -- Infix identifiers
- | nullFastString cs = False -- e.g. "+"
+ | nullFS cs = False -- e.g. "+"
| otherwise = startsVarSym (headFS cs)
-------------
startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
-startsVarSym c = isSymbolASCII c || isSymbolISO c -- Infix Ids
+startsVarSym c = isSymbolASCII c || (ord c > 0x7f && isSymbol c) -- Infix Ids
startsConSym c = c == ':' -- Infix data constructors
-startsVarId c = isLower c || isLowerISO c || c == '_' -- Ordinary Ids
-startsConId c = isUpper c || isUpperISO c || c == '(' -- Ordinary type constructors and data constructors
-
+startsVarId c = isLower c || c == '_' -- Ordinary Ids
+startsConId c = isUpper c || c == '(' -- Ordinary type constructors and data constructors
isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
-isSymbolISO c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
-isUpperISO (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
- --0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
-isLowerISO (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
- --0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
\end{code}
%************************************************************************
#include "HsVersions.h"
-import OccName ( NameSpace, varName,
- OccName, UserFS,
- setOccNameSpace,
- mkOccFS, occNameFlavour,
- isDataOcc, isTvOcc, isTcOcc,
- OccEnv, emptyOccEnv, extendOccEnvList, lookupOccEnv,
- elemOccEnv, plusOccEnv_C, extendOccEnv_C, foldOccEnv,
- occEnvElts
- )
+import OccName
import Module ( Module, mkModuleFS )
import Name ( Name, NamedThing(getName), nameModule, nameParent_maybe,
nameOccName, isExternalName, nameSrcLoc )
import Maybes ( mapCatMaybes )
import SrcLoc ( isGoodSrcLoc, SrcSpan )
+import FastString ( FastString )
import Outputable
import Util ( thenCmp )
\end{code}
-
%************************************************************************
%* *
\subsection{The main data type}
---------------
-- These two are used when parsing source files
-- They do encode the module and occurrence names
-mkUnqual :: NameSpace -> UserFS -> RdrName
-mkUnqual sp n = Unqual (mkOccFS sp n)
+mkUnqual :: NameSpace -> FastString -> RdrName
+mkUnqual sp n = Unqual (mkOccNameFS sp n)
-mkVarUnqual :: UserFS -> RdrName
-mkVarUnqual n = Unqual (mkOccFS varName n)
+mkVarUnqual :: FastString -> RdrName
+mkVarUnqual n = Unqual (mkVarOccFS n)
-mkQual :: NameSpace -> (UserFS, UserFS) -> RdrName
-mkQual sp (m, n) = Qual (mkModuleFS m) (mkOccFS sp n)
+mkQual :: NameSpace -> (FastString, FastString) -> RdrName
+mkQual sp (m, n) = Qual (mkModuleFS m) (mkOccNameFS sp n)
getRdrName :: NamedThing thing => thing -> RdrName
getRdrName name = nameRdrName (getName name)
ppr (Qual mod occ) = ppr mod <> dot <> ppr occ <+> ppr_name_space occ
ppr (Orig mod occ) = ppr mod <> dot <> ppr occ <+> ppr_name_space occ
-ppr_name_space occ = ifPprDebug (parens (occNameFlavour occ))
+ppr_name_space occ = ifPprDebug (parens (pprNonVarNameSpace (occNameSpace occ)))
instance OutputableBndr RdrName where
pprBndr _ n
pprCLbl (CCS_Label ccs) = ppr ccs
pprCLbl (ModuleInitLabel mod way _)
- = ptext SLIT("__stginit_") <> ftext (moduleFS mod)
+ = ptext SLIT("__stginit_") <> ppr mod
<> char '_' <> text way
pprCLbl (PlainModuleInitLabel mod _)
- = ptext SLIT("__stginit_") <> ftext (moduleFS mod)
+ = ptext SLIT("__stginit_") <> ppr mod
ppIdFlavor :: IdLabelInfo -> SDoc
ppIdFlavor x = pp_cSEP <>
import ForeignCall ( CCallConv )
import Unique ( Unique, Uniquable(..) )
import FastString ( FastString )
+import DATA_WORD ( Word8 )
-----------------------------------------------------------------------------
-- Cmm, CmmTop, CmmBasicBlock
-- align to next N-byte boundary (N must be a power of 2).
| CmmDataLabel CLabel
-- label the current position in this section.
- | CmmString String
+ | CmmString [Word8]
-- string of 8-bit values only, not zero terminated.
- -- ToDo: might be more honest to use [Word8] here?
-----------------------------------------------------------------------------
-- Global STG registers
= return (L span (CmmT_Int $! parseInteger buf len 10 octDecDigit))
tok_octal span buf len
- = return (L span (CmmT_Int $! parseInteger (stepOn buf) (len-1) 8 octDecDigit))
+ = return (L span (CmmT_Int $! parseInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
tok_hexadecimal span buf len
- = return (L span (CmmT_Int $! parseInteger (stepOnBy 2 buf) (len-2) 16 hexDigit))
+ = return (L span (CmmT_Int $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
tok_float str = CmmT_Float $! readRational str
import Outputable
import Monad ( when )
+import Data.Char ( ord )
#include "HsVersions.h"
}
return [CmmStaticLit (getLit e)] }
| type ';' { return [CmmUninitialised
(machRepByteWidth $1)] }
- | 'bits8' '[' ']' STRING ';' { return [CmmString $4] }
+ | 'bits8' '[' ']' STRING ';' { return [mkString $4] }
| 'bits8' '[' INT ']' ';' { return [CmmUninitialised
(fromIntegral $3)] }
| typenot8 '[' INT ']' ';' { return [CmmUninitialised
section "bss" = UninitialisedData
section s = OtherSection s
+mkString :: String -> CmmStatic
+mkString s = CmmString (map (fromIntegral.ord) s)
+
-- mkMachOp infers the type of the MachOp from the type of its first
-- argument. We assume that this is correct: for MachOps that don't have
-- symmetrical args (e.g. shift ops), the first arg determines the type of
import Char ( ord, chr )
import IO ( Handle )
import DATA_BITS
+import Data.Word ( Word8 )
#ifdef DEBUG
import PprCmm () -- instances only
-- ---------------------------------------------------------------------
-- print strings as valid C strings
--- Assumes it contains only characters '\0'..'\xFF'!
-pprFSInCStyle :: FastString -> SDoc
-pprFSInCStyle fs = pprStringInCStyle (unpackFS fs)
-
-pprStringInCStyle :: String -> SDoc
+pprStringInCStyle :: [Word8] -> SDoc
pprStringInCStyle s = doubleQuotes (text (concatMap charToC s))
-charToC :: Char -> String
-charToC '\"' = "\\\""
-charToC '\'' = "\\\'"
-charToC '\\' = "\\\\"
-charToC c | c >= ' ' && c <= '~' = [c]
- | c > '\xFF' = panic ("charToC "++show c)
- | otherwise = ['\\',
+charToC :: Word8 -> String
+charToC w =
+ case chr (fromIntegral w) of
+ '\"' -> "\\\""
+ '\'' -> "\\\'"
+ '\\' -> "\\\\"
+ c | c >= ' ' && c <= '~' -> [c]
+ | otherwise -> ['\\',
chr (ord '0' + ord c `div` 64),
chr (ord '0' + ord c `div` 8 `mod` 8),
chr (ord '0' + ord c `mod` 8)]
-
-- ---------------------------------------------------------------------------
-- Initialising static objects with floating-point numbers. We can't
-- just emit the floating point number, because C will cast it to an int
import Data.List ( intersperse, groupBy )
import IO ( Handle )
import Maybe ( isJust )
+import Data.Char ( chr )
pprCmms :: [Cmm] -> SDoc
pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms))
CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i)
CmmAlign i -> nest 4 $ text "align" <+> int i
CmmDataLabel clbl -> pprCLabel clbl <> colon
- CmmString s' -> nest 4 $ text "I8[]" <+> doubleQuotes (text s')
+ CmmString s' -> nest 4 $ text "I8[]" <+>
+ doubleQuotes (text (map (chr.fromIntegral) s'))
-- --------------------------------------------------------------------------
-- Registers, whether local (temps) or global
import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr )
import CLabel ( mkCCLabel, mkCCSLabel, mkRtsDataLabel )
-import Module ( moduleUserString )
+import Module ( moduleString )
import Id ( Id )
import CostCentre
import StgSyn ( GenStgExpr(..), StgExpr )
-> Code
emitCostCentreDecl cc = do
{ label <- mkStringCLit (costCentreUserName cc)
- ; modl <- mkStringCLit (moduleUserString (cc_mod cc))
+ ; modl <- mkStringCLit (moduleString (cc_mod cc))
; let
lits = [ zero, -- StgInt ccID,
label, -- char *label,
import Util ( filterOut, sortLe )
import DynFlags ( DynFlags(..), HscTarget(..) )
import Packages ( HomeModules )
-import FastString ( LitString, FastString, unpackFS )
+import FastString ( LitString, FastString, bytesFS )
import Outputable
import Char ( ord )
import DATA_BITS
+import DATA_WORD ( Word8 )
import Maybe ( isNothing )
-------------------------------------------------------------------------
-------------------------------------------------------------------------
cgLit :: Literal -> FCode CmmLit
-cgLit (MachStr s) = mkStringCLit (unpackFS s)
+cgLit (MachStr s) = mkByteStringCLit (bytesFS s)
+ -- not unpackFS; we want the UTF-8 byte stream.
cgLit other_lit = return (mkSimpleLit other_lit)
mkSimpleLit :: Literal -> CmmLit
mkStringCLit :: String -> FCode CmmLit
-- Make a global definition for the string,
-- and return its label
-mkStringCLit str
+mkStringCLit str = mkByteStringCLit (map (fromIntegral.ord) str)
+
+mkByteStringCLit :: [Word8] -> FCode CmmLit
+mkByteStringCLit bytes
= do { uniq <- newUnique
; let lbl = mkStringLitLabel uniq
- ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString str]
+ ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes]
; return (CmmLabel lbl) }
-------------------------------------------------------------------------
import Id ( Id, idType, idArity, idName )
import DataCon ( DataCon, dataConTyCon, isNullaryRepDataCon, dataConName )
import Name ( Name, nameUnique, getOccName, getOccString )
-import OccName ( occNameUserString )
+import OccName ( occNameString )
import Type ( isUnLiftedType, Type, repType, splitTyConApp_maybe )
import TcType ( tcSplitSigmaTy )
import TyCon ( isFunTyCon, isAbstractTyCon )
closureValDescr (ClosureInfo {closureDescr = descr})
= descr
closureValDescr (ConInfo {closureCon = con})
- = occNameUserString (getOccName con)
+ = occNameString (getOccName con)
closureTypeDescr (ClosureInfo { closureType = ty })
= getTyDescription ty
closureTypeDescr (ConInfo { closureCon = data_con })
- = occNameUserString (getOccName (dataConTyCon data_con))
+ = occNameString (getOccName (dataConTyCon data_con))
getTyDescription :: Type -> String
getTyDescription ty
import MatchLit ( tidyLitPat, tidyNPat )
import Id ( Id, idType )
import DataCon ( DataCon, dataConTyCon, dataConOrigArgTys, dataConFieldLabels )
-import Name ( Name, mkInternalName, getOccName, isDataSymOcc, getName, mkVarOcc )
+import Name ( Name, mkInternalName, getOccName, isDataSymOcc,
+ getName, mkVarOccFS )
import TysWiredIn
import PrelNames ( unboundKey )
import TyCon ( tyConDataCons, tupleTyConBoxity, isTupleTyCon )
new_var = hash_x
hash_x = mkInternalName unboundKey {- doesn't matter much -}
- (mkVarOcc FSLIT("#x"))
+ (mkVarOccFS FSLIT("#x"))
noSrcLoc
make_row_vars_for_constructor :: (EqnNo, EquationInfo) -> [WarningPat]
import CoreUtils ( exprType, mkInlineMe )
import Id ( Id, idType, idName, mkSysLocal, setInlinePragma )
import Literal ( Literal(..), mkStringLit )
-import Module ( moduleString )
+import Module ( moduleFS )
import Name ( getOccString, NamedThing(..) )
-import OccName ( encodeFS )
import Type ( repType, coreEqType )
import TcType ( Type, mkFunTys, mkForAllTys, mkTyConApp,
mkFunTy, tcSplitTyConApp_maybe,
= dsCImport id spec cconv safety no_hdrs `thenDs` \(ids, h, c) ->
returnDs (ids, h, c, if no_hdrs then Nothing else Just header)
where
- no_hdrs = nullFastString header
+ no_hdrs = nullFS header
-- FIXME: the `lib' field is needed for .NET ILX generation when invoking
-- routines that are external to the .NET runtime, but GHC doesn't
the_ccall_app = mkFCall ccall_uniq fcall val_args ccall_result_ty
work_rhs = mkLams tvs (mkLams work_arg_ids the_ccall_app)
work_id = setImpInline no_hdrs $ -- See comments with setImpInline
- mkSysLocal (encodeFS FSLIT("$wccall")) work_uniq worker_ty
+ mkSysLocal FSLIT("$wccall") work_uniq worker_ty
-- Build the wrapper
work_app = mkApps (mkVarApps (Var work_id) tvs) val_args
getModuleDs `thenDs` \ mod_name ->
let
-- hack: need to get at the name of the C stub we're about to generate.
- fe_nm = mkFastString (moduleString mod_name ++ "_" ++ toCName fe_id)
+ fe_nm = mkFastString (unpackFS (zEncodeFS (moduleFS mod_name)) ++ "_" ++ toCName fe_id)
in
newSysLocalDs arg_ty `thenDs` \ cback ->
dsLookupGlobalId newStablePtrName `thenDs` \ newStablePtrId ->
import HsSyn
import Class (FunDep)
import PrelNames ( rationalTyConName, integerTyConName, negateName )
-import OccName ( isDataOcc, isTvOcc, occNameUserString )
+import OccName ( isDataOcc, isTvOcc, occNameString )
-- To avoid clashes with DsMeta.varName we must make a local alias for OccName.varName
-- we do this by removing varName from the import of OccName above, making
-- a qualified instance of OccName and using OccNameAlias.varName where varName
-- ws previously used in this file.
import qualified OccName
-import Module ( Module, mkModule, moduleUserString )
+import Module ( Module, mkModule, moduleString )
import Id ( Id, mkLocalId )
-import OccName ( mkOccFS )
+import OccName ( mkOccNameFS )
import Name ( Name, mkExternalName, localiseName, nameOccName, nameModule,
isExternalName, getSrcLoc )
import NameEnv
; MkC uni <- coreIntLit (getKey (getUnique name))
; rep2 mkNameLName [occ,uni] }
where
- name_mod = moduleUserString (nameModule name)
+ name_mod = moduleString (nameModule name)
name_occ = nameOccName name
mk_varg | OccName.isDataOcc name_occ = mkNameG_dName
| OccName.isVarOcc name_occ = mkNameG_vName
; return (NonRec id var) }
occNameLit :: Name -> DsM (Core String)
-occNameLit n = coreStringLit (occNameUserString (nameOccName n))
+occNameLit n = coreStringLit (occNameString (nameOccName n))
-- %*********************************************************************
thLib = mkModule "Language.Haskell.TH.Lib"
mk_known_key_name mod space str uniq
- = mkExternalName uniq mod (mkOccFS space str)
+ = mkExternalName uniq mod (mkOccNameFS space str)
Nothing noSrcLoc
libFun = mk_known_key_name thLib OccName.varName
plusIntegerName, timesIntegerName, smallIntegerDataConName,
lengthPName, indexPName )
import Outputable
-import UnicodeUtil ( intsToUtf8 )
import SrcLoc ( Located(..), unLoc )
import Util ( isSingleton, notNull, zipEqual, sortWith )
import ListSetOps ( assocDefault )
import FastString
+
+import Data.Char ( ord )
\end{code}
mkStringExpr str = mkStringExprFS (mkFastString str)
mkStringExprFS str
- | nullFastString str
+ | nullFS str
= returnDs (mkNilExpr charTy)
| lengthFS str == 1
in
returnDs (mkConsExpr charTy the_char (mkNilExpr charTy))
- | all safeChar int_chars
+ | all safeChar chars
= dsLookupGlobalId unpackCStringName `thenDs` \ unpack_id ->
returnDs (App (Var unpack_id) (Lit (MachStr str)))
| otherwise
= dsLookupGlobalId unpackCStringUtf8Name `thenDs` \ unpack_id ->
- returnDs (App (Var unpack_id) (Lit (MachStr (mkFastString (intsToUtf8 int_chars)))))
+ returnDs (App (Var unpack_id) (Lit (MachStr str)))
where
- int_chars = unpackIntFS str
- safeChar c = c >= 1 && c <= 0xFF
+ chars = unpackFS str
+ safeChar c = ord c >= 1 && ord c <= 0xFF
\end{code}
import Constants ( wORD_SIZE )
import Data.List ( intersperse, sortBy, zip4, zip5, partition )
-import Foreign ( Ptr, castPtr, mallocBytes, pokeByteOff, Word8 )
+import Foreign ( Ptr, castPtr, mallocBytes, pokeByteOff, Word8,
+ withForeignPtr )
import Foreign.C ( CInt )
import Control.Exception ( throwDyn )
pushStr s
= let getMallocvilleAddr
= case s of
- FastString _ l ba ->
- -- sigh, a string in the heap is no good to us.
- -- We need a static C pointer, since the type of
- -- a string literal is Addr#. So, copy the string
- -- into C land and remember the pointer so we can
- -- free it later.
- let n = I# l
- -- CAREFUL! Chars are 32 bits in ghc 4.09+
- in ioToBc (mallocBytes (n+1)) `thenBc` \ ptr ->
+ FastString _ n _ fp _ ->
+ -- we could grab the Ptr from the ForeignPtr,
+ -- but then we have no way to control its lifetime.
+ -- In reality it'll probably stay alive long enoungh
+ -- by virtue of the global FastString table, but
+ -- to be on the safe side we copy the string into
+ -- a malloc'd area of memory.
+ ioToBc (mallocBytes (n+1)) `thenBc` \ ptr ->
recordMallocBc ptr `thenBc_`
ioToBc (
- do memcpy ptr ba (fromIntegral n)
+ withForeignPtr fp $ \p -> do
+ memcpy ptr p (fromIntegral n)
pokeByteOff ptr n (fromIntegral (ord '\0') :: Word8)
return ptr
)
(pprCoreExpr (deAnnotate (undefined, other)))
foreign import ccall unsafe "memcpy"
- memcpy :: Ptr a -> ByteArray# -> CInt -> IO ()
+ memcpy :: Ptr a -> Ptr b -> CInt -> IO ()
-- -----------------------------------------------------------------------------
import Name ( Name, nameModule, nameOccName, isExternalName )
import NameEnv
-import OccName ( occNameString )
+import OccName ( occNameFS )
import PrimOp ( PrimOp, primOpOcc )
-import Module ( moduleString )
-import FastString ( FastString(..), unpackFS )
+import Module ( moduleFS )
+import FastString ( FastString(..), unpackFS, zEncodeFS )
import Outputable
import Panic ( GhcException(..) )
-- HACKS!!! ToDo: cleaner
nameToCLabel :: Name -> String{-suffix-} -> String
nameToCLabel n suffix
- = moduleString (nameModule n)
- ++ '_':occNameString (nameOccName n) ++ '_':suffix
+ = unpackFS (zEncodeFS (moduleFS (nameModule n)))
+ ++ '_': unpackFS (zEncodeFS (occNameFS (nameOccName n))) ++ '_':suffix
primopToCLabel :: PrimOp -> String{-suffix-} -> String
primopToCLabel primop suffix
- = let str = "GHCziPrimopWrappers_" ++ occNameString (primOpOcc primop) ++ '_':suffix
+ = let str = "GHCziPrimopWrappers_" ++ unpackFS (zEncodeFS (occNameFS (primOpOcc primop))) ++ '_':suffix
in --trace ("primopToCLabel: " ++ str)
str
\end{code}
import Outputable
-- for createtags (should these come via GHC?)
-import Module( moduleUserString )
-import Name( nameSrcLoc, nameModule, nameOccName )
-import OccName( pprOccName )
-import SrcLoc( isGoodSrcLoc, srcLocFile, srcLocLine, srcLocCol )
+import Module ( moduleString )
+import Name ( nameSrcLoc, nameModule, nameOccName )
+import OccName ( pprOccName )
+import SrcLoc ( isGoodSrcLoc, srcLocFile, srcLocLine, srcLocCol )
-- Other random utilities
import Digraph ( flattenSCCs )
is_interpreted <- GHC.moduleIsInterpreted session m
-- should we just skip these?
when (not is_interpreted) $
- throwDyn (CmdLineError ("module '" ++ moduleUserString m ++ "' is not interpreted"))
+ throwDyn (CmdLineError ("module '" ++ moduleString m ++ "' is not interpreted"))
mbModInfo <- GHC.getModuleInfo session m
let unqual
import Module ( Module, mkModule )
import RdrHsSyn ( mkClassDecl, mkTyData )
import qualified OccName
-import OccName ( startsVarId, startsVarSym, startsConId, startsConSym )
+import OccName ( startsVarId, startsVarSym, startsConId, startsConSym,
+ pprNameSpace )
import SrcLoc ( Located(..), SrcSpan )
import Type ( Type )
import TysWiredIn ( unitTyCon, tupleTyCon, tupleCon, trueDataCon, nilDataCon, consDataCon )
badOcc :: OccName.NameSpace -> String -> SDoc
badOcc ctxt_ns occ
- = ptext SLIT("Illegal") <+> text (OccName.nameSpaceString ctxt_ns)
+ = ptext SLIT("Illegal") <+> pprNameSpace ctxt_ns
<+> ptext SLIT("name:") <+> quotes (text occ)
thRdrName :: OccName.NameSpace -> String -> TH.NameFlavour -> RdrName
-- The packing and unpacking is rather turgid :-(
mk_occ :: OccName.NameSpace -> String -> OccName.OccName
-mk_occ ns occ = OccName.mkOccFS ns (mkFastString occ)
+mk_occ ns occ = OccName.mkOccNameFS ns (mkFastString occ)
mk_ghc_ns :: TH.NameSpace -> OccName.NameSpace
mk_ghc_ns TH.DataName = OccName.dataName
ptext SLIT("dynamic")
pprCEntity _ _ (CWrapper) = ptext SLIT("wrapper")
--
- pprLib lib | nullFastString lib = empty
- | otherwise = char '[' <> ppr lib <> char ']'
+ pprLib lib | nullFS lib = empty
+ | otherwise = char '[' <> ppr lib <> char ']'
instance Outputable ForeignExport where
ppr (CExport (CExportStatic lbl cconv)) =
import Var ( Id )
import Type ( Type )
import DataCon ( DataCon, dataConWrapId, dataConSourceArity )
-import OccName ( mkVarOcc )
+import OccName ( mkVarOccFS )
import Name ( Name )
import BasicTypes ( RecFlag(..) )
import SrcLoc
mkHsSplice e = HsSplice unqualSplice e
-unqualSplice = mkRdrUnqual (mkVarOcc FSLIT("splice"))
+unqualSplice = mkRdrUnqual (mkVarOccFS FSLIT("splice"))
-- A name (uniquified later) to
-- identify the splice
import MkId ( seqId )
import Module ( Module, ModLocation(ml_hi_file), emptyModuleEnv,
addBootSuffix_maybe,
- extendModuleEnv, lookupModuleEnv, moduleUserString
+ extendModuleEnv, lookupModuleEnv, moduleString
)
import OccName ( OccName, mkOccEnv, lookupOccEnv, mkClassTyConOcc, mkClassDataConOcc,
mkSuperDictSelOcc, mkDataConWrapperOcc, mkDataConWorkerOcc )
-- imported name, to fix the module correctly in the cache
mk_new_bndr mod mb_parent occ
= newGlobalBinder mod occ mb_parent
- (importedSrcLoc (moduleUserString mod))
+ (importedSrcLoc (moduleString mod))
doc = ptext SLIT("Declaration for") <+> ppr (ifName decl)
isEmptyOccSet, intersectOccSet, intersectsOccSet,
occNameFS, isTcOcc )
import Module ( Module, moduleFS,
- ModLocation(..), mkSysModuleFS, moduleUserString,
+ ModLocation(..), mkModuleFS, moduleString,
ModuleEnv, emptyModuleEnv, lookupModuleEnv,
extendModuleEnv_C
)
-- Group by module and sort by occurrence
-- This keeps the list in canonical order
mkIfaceExports exports
- = [ (mkSysModuleFS fs, eltsFM avails)
+ = [ (mkModuleFS fs, eltsFM avails)
| (fs, avails) <- fmToList groupFM
]
where
checkOldIface hsc_env mod_summary source_unchanged maybe_iface
= do { showPass (hsc_dflags hsc_env)
- ("Checking old interface for " ++ moduleUserString (ms_mod mod_summary)) ;
+ ("Checking old interface for " ++ moduleString (ms_mod mod_summary)) ;
; initIfaceCheck hsc_env $
check_old_iface mod_summary source_unchanged maybe_iface
import Packages ( PackageIdH(..) )
import SysTools ( newTempName )
import qualified SysTools
-import Module ( Module, ModLocation(..), mkModule, moduleUserString,
+import Module ( Module, ModLocation(..), mkModule,
addBootSuffix_maybe )
import Digraph ( SCC(..) )
import Finder ( findModule, FindResult(..) )
| otherwise -> return []
getOptionsFromStringBuffer :: StringBuffer -> FilePath -> [(Int,String)]
-getOptionsFromStringBuffer buffer@(StringBuffer _ len# _) fn =
+getOptionsFromStringBuffer buffer@(StringBuffer _ len _) fn =
let
- ls = lines (lexemeToString buffer (I# len#)) -- lazy, so it's ok
+ ls = lines (lexemeToString buffer len) -- lazy, so it's ok
in
look 1 ls
where
return result
where
- basename = dots_to_slashes (moduleUserString mod)
+ basename = dots_to_slashes (moduleString mod)
to_search :: [(FilePath, IO FinderCacheEntry)]
to_search = [ (file, fn path basename)
-> String -- Suffix
-> IO ModLocation
mkHomeModLocation2 dflags mod src_basename ext = do
- let mod_basename = dots_to_slashes (moduleUserString mod)
+ let mod_basename = dots_to_slashes (moduleString mod)
obj_fn <- mkObjPath dflags src_basename mod_basename
hi_fn <- mkHiPath dflags src_basename mod_basename
= let
stubdir = stubDir dflags
- mod_basename = dots_to_slashes (moduleUserString mod)
+ mod_basename = dots_to_slashes (moduleString mod)
src_basename = basenameOf (expectJust "mkStubPaths"
(ml_hs_file location))
when (ghci_mode == BatchCompile && isJust ofile && not do_linking) $
debugTraceMsg dflags 1 (text ("Warning: output was redirected with -o, " ++
"but no output will be generated\n" ++
- "because there is no " ++ moduleUserString main_mod ++ " module."))
+ "because there is no " ++ moduleString main_mod ++ " module."))
-- link everything together
linkresult <- link ghci_mode dflags do_linking (hsc_HPT hsc_env1)
char ')'])
where
mod = ms_mod mod_summary
- mod_str = moduleUserString mod ++ hscSourceString (ms_hsc_src mod_summary)
+ mod_str = moduleString mod ++ hscSourceString (ms_hsc_src mod_summary)
\end{code}
pprLabel lbl = pprGloblDecl lbl $$ (pprCLabel_asm lbl <> char ':')
--- Assume we want to backslash-convert the string
pprASCII str
- = vcat (map do1 (str ++ [chr 0]))
+ = vcat (map do1 str) $$ do1 0
where
- do1 :: Char -> Doc
- do1 c = ptext SLIT("\t.byte\t0x") <> hshow (ord c)
-
- hshow :: Int -> Doc
- hshow n | n >= 0 && n <= 255
- = char (tab !! (n `div` 16)) <> char (tab !! (n `mod` 16))
- tab = "0123456789ABCDEF"
+ do1 :: Word8 -> Doc
+ do1 w = ptext SLIT("\t.byte\t") <> int (fromIntegral w)
pprAlign bytes =
IF_ARCH_alpha(ptextSLIT(".align ") <> int pow2,
import Panic (panic)
import Outputable (Outputable(ppr), pprPanic)
import UniqSupply (UniqSupply, splitUniqSupply, uniqFromSupply)
-import OccName (UserFS)
import Var (Var, idType)
import Id (Id, mkSysLocal)
import Name (Name)
import PrelInfo ( primOpId )
import CoreSyn (Expr(..), Bind(..), CoreBndr, CoreExpr, CoreBind, mkApps)
import CoreUtils (exprType)
+import FastString (FastString)
-- friends
import NDPCoreUtils (parrElemTy)
-- generate a new local variable whose name is based on the given lexeme and
-- whose type is as specified in the second argument (EXPORTED)
--
-newVar :: UserFS -> Type -> Flatten Var
+newVar :: FastString -> Type -> Flatten Var
newVar lexeme ty = Flatten $ \state ->
let
(us1, us2) = splitUniqSupply (us state)
-- generate a non-recursive binding using a new binder whose name is derived
-- from the given lexeme (EXPORTED)
--
-mkBind :: UserFS -> CoreExpr -> Flatten (CoreBndr, CoreBind)
+mkBind :: FastString -> CoreExpr -> Flatten (CoreBndr, CoreBind)
mkBind lexeme e =
do
v <- newVar lexeme (exprType e)
, is_lower -- Char# -> Bool
, is_upper -- Char# -> Bool
, is_digit -- Char# -> Bool
+ , is_alphanum -- Char# -> Bool
, is_hexdigit, is_octdigit
, hexDigit, octDecDigit
is_lower = is_ctype cLower
is_upper = is_ctype cUpper
is_digit = is_ctype cDigit
+is_alphanum = is_ctype (cLower+cUpper+cDigit)
\end{code}
Utils
'\158' -> 0 -- \236
'\159' -> 0 -- \237
'\160' -> cSpace --
- '\161' -> cAny + cSymbol -- ¡
- '\162' -> cAny + cSymbol -- ¢
- '\163' -> cAny + cSymbol -- £
- '\164' -> cAny + cSymbol -- ¤
- '\165' -> cAny + cSymbol -- ¥
- '\166' -> cAny + cSymbol -- ¦
- '\167' -> cAny + cSymbol -- §
- '\168' -> cAny + cSymbol -- ¨
- '\169' -> cAny + cSymbol -- ©
- '\170' -> cAny + cSymbol -- ª
- '\171' -> cAny + cSymbol -- «
- '\172' -> cAny + cSymbol -- ¬
- '\173' -> cAny + cSymbol --
- '\174' -> cAny + cSymbol -- ®
- '\175' -> cAny + cSymbol -- ¯
- '\176' -> cAny + cSymbol -- °
- '\177' -> cAny + cSymbol -- ±
- '\178' -> cAny + cSymbol -- ²
- '\179' -> cAny + cSymbol -- ³
- '\180' -> cAny + cSymbol -- ´
- '\181' -> cAny + cSymbol -- µ
- '\182' -> cAny + cSymbol -- ¶
- '\183' -> cAny + cSymbol -- ·
- '\184' -> cAny + cSymbol -- ¸
- '\185' -> cAny + cSymbol -- ¹
- '\186' -> cAny + cSymbol -- º
- '\187' -> cAny + cSymbol -- »
- '\188' -> cAny + cSymbol -- ¼
- '\189' -> cAny + cSymbol -- ½
- '\190' -> cAny + cSymbol -- ¾
- '\191' -> cAny + cSymbol -- ¿
- '\192' -> cAny + cIdent + cUpper -- À
- '\193' -> cAny + cIdent + cUpper -- Á
- '\194' -> cAny + cIdent + cUpper -- Â
- '\195' -> cAny + cIdent + cUpper -- Ã
- '\196' -> cAny + cIdent + cUpper -- Ä
- '\197' -> cAny + cIdent + cUpper -- Å
- '\198' -> cAny + cIdent + cUpper -- Æ
- '\199' -> cAny + cIdent + cUpper -- Ç
- '\200' -> cAny + cIdent + cUpper -- È
- '\201' -> cAny + cIdent + cUpper -- É
- '\202' -> cAny + cIdent + cUpper -- Ê
- '\203' -> cAny + cIdent + cUpper -- Ë
- '\204' -> cAny + cIdent + cUpper -- Ì
- '\205' -> cAny + cIdent + cUpper -- Í
- '\206' -> cAny + cIdent + cUpper -- Î
- '\207' -> cAny + cIdent + cUpper -- Ï
- '\208' -> cAny + cIdent + cUpper -- Ð
- '\209' -> cAny + cIdent + cUpper -- Ñ
- '\210' -> cAny + cIdent + cUpper -- Ò
- '\211' -> cAny + cIdent + cUpper -- Ó
- '\212' -> cAny + cIdent + cUpper -- Ô
- '\213' -> cAny + cIdent + cUpper -- Õ
- '\214' -> cAny + cIdent + cUpper -- Ö
- '\215' -> cAny + cSymbol + cLower -- ×
- '\216' -> cAny + cIdent + cUpper -- Ø
- '\217' -> cAny + cIdent + cUpper -- Ù
- '\218' -> cAny + cIdent + cUpper -- Ú
- '\219' -> cAny + cIdent + cUpper -- Û
- '\220' -> cAny + cIdent + cUpper -- Ü
- '\221' -> cAny + cIdent + cUpper -- Ý
- '\222' -> cAny + cIdent + cUpper -- Þ
- '\223' -> cAny + cIdent -- ß
- '\224' -> cAny + cIdent + cLower -- à
- '\225' -> cAny + cIdent + cLower -- á
- '\226' -> cAny + cIdent + cLower -- â
- '\227' -> cAny + cIdent + cLower -- ã
- '\228' -> cAny + cIdent + cLower -- ä
- '\229' -> cAny + cIdent + cLower -- å
- '\230' -> cAny + cIdent + cLower -- æ
- '\231' -> cAny + cIdent + cLower -- ç
- '\232' -> cAny + cIdent + cLower -- è
- '\233' -> cAny + cIdent + cLower -- é
- '\234' -> cAny + cIdent + cLower -- ê
- '\235' -> cAny + cIdent + cLower -- ë
- '\236' -> cAny + cIdent + cLower -- ì
- '\237' -> cAny + cIdent + cLower -- í
- '\238' -> cAny + cIdent + cLower -- î
- '\239' -> cAny + cIdent + cLower -- ï
- '\240' -> cAny + cIdent + cLower -- ð
- '\241' -> cAny + cIdent + cLower -- ñ
- '\242' -> cAny + cIdent + cLower -- ò
- '\243' -> cAny + cIdent + cLower -- ó
- '\244' -> cAny + cIdent + cLower -- ô
- '\245' -> cAny + cIdent + cLower -- õ
- '\246' -> cAny + cIdent + cLower -- ö
- '\247' -> cAny + cSymbol -- ÷
- '\248' -> cAny + cIdent -- ø
- '\249' -> cAny + cIdent + cLower -- ù
- '\250' -> cAny + cIdent + cLower -- ú
- '\251' -> cAny + cIdent + cLower -- û
- '\252' -> cAny + cIdent + cLower -- ü
- '\253' -> cAny + cIdent + cLower -- ý
- '\254' -> cAny + cIdent + cLower -- þ
- '\255' -> cAny + cIdent + cLower -- ÿ
+ '\161' -> cAny + cSymbol -- ¡
+ '\162' -> cAny + cSymbol -- ¢
+ '\163' -> cAny + cSymbol -- £
+ '\164' -> cAny + cSymbol -- ¤
+ '\165' -> cAny + cSymbol -- ¥
+ '\166' -> cAny + cSymbol -- ¦
+ '\167' -> cAny + cSymbol -- §
+ '\168' -> cAny + cSymbol -- ¨
+ '\169' -> cAny + cSymbol -- ©
+ '\170' -> cAny + cSymbol -- ª
+ '\171' -> cAny + cSymbol -- «
+ '\172' -> cAny + cSymbol -- ¬
+ '\173' -> cAny + cSymbol --
+ '\174' -> cAny + cSymbol -- ®
+ '\175' -> cAny + cSymbol -- ¯
+ '\176' -> cAny + cSymbol -- °
+ '\177' -> cAny + cSymbol -- ±
+ '\178' -> cAny + cSymbol -- ²
+ '\179' -> cAny + cSymbol -- ³
+ '\180' -> cAny + cSymbol -- ´
+ '\181' -> cAny + cSymbol -- µ
+ '\182' -> cAny + cSymbol -- ¶
+ '\183' -> cAny + cSymbol -- ·
+ '\184' -> cAny + cSymbol -- ¸
+ '\185' -> cAny + cSymbol -- ¹
+ '\186' -> cAny + cSymbol -- º
+ '\187' -> cAny + cSymbol -- »
+ '\188' -> cAny + cSymbol -- ¼
+ '\189' -> cAny + cSymbol -- ½
+ '\190' -> cAny + cSymbol -- ¾
+ '\191' -> cAny + cSymbol -- ¿
+ '\192' -> cAny + cIdent + cUpper -- À
+ '\193' -> cAny + cIdent + cUpper -- Á
+ '\194' -> cAny + cIdent + cUpper -- Â
+ '\195' -> cAny + cIdent + cUpper -- Ã
+ '\196' -> cAny + cIdent + cUpper -- Ä
+ '\197' -> cAny + cIdent + cUpper -- Å
+ '\198' -> cAny + cIdent + cUpper -- Æ
+ '\199' -> cAny + cIdent + cUpper -- Ç
+ '\200' -> cAny + cIdent + cUpper -- È
+ '\201' -> cAny + cIdent + cUpper -- É
+ '\202' -> cAny + cIdent + cUpper -- Ê
+ '\203' -> cAny + cIdent + cUpper -- Ë
+ '\204' -> cAny + cIdent + cUpper -- Ì
+ '\205' -> cAny + cIdent + cUpper -- Í
+ '\206' -> cAny + cIdent + cUpper -- Î
+ '\207' -> cAny + cIdent + cUpper -- Ï
+ '\208' -> cAny + cIdent + cUpper -- Ð
+ '\209' -> cAny + cIdent + cUpper -- Ñ
+ '\210' -> cAny + cIdent + cUpper -- Ò
+ '\211' -> cAny + cIdent + cUpper -- Ó
+ '\212' -> cAny + cIdent + cUpper -- Ô
+ '\213' -> cAny + cIdent + cUpper -- Õ
+ '\214' -> cAny + cIdent + cUpper -- Ö
+ '\215' -> cAny + cSymbol + cLower -- ×
+ '\216' -> cAny + cIdent + cUpper -- Ø
+ '\217' -> cAny + cIdent + cUpper -- Ù
+ '\218' -> cAny + cIdent + cUpper -- Ú
+ '\219' -> cAny + cIdent + cUpper -- Û
+ '\220' -> cAny + cIdent + cUpper -- Ü
+ '\221' -> cAny + cIdent + cUpper -- Ý
+ '\222' -> cAny + cIdent + cUpper -- Þ
+ '\223' -> cAny + cIdent -- ß
+ '\224' -> cAny + cIdent + cLower -- à
+ '\225' -> cAny + cIdent + cLower -- á
+ '\226' -> cAny + cIdent + cLower -- â
+ '\227' -> cAny + cIdent + cLower -- ã
+ '\228' -> cAny + cIdent + cLower -- ä
+ '\229' -> cAny + cIdent + cLower -- å
+ '\230' -> cAny + cIdent + cLower -- æ
+ '\231' -> cAny + cIdent + cLower -- ç
+ '\232' -> cAny + cIdent + cLower -- è
+ '\233' -> cAny + cIdent + cLower -- é
+ '\234' -> cAny + cIdent + cLower -- ê
+ '\235' -> cAny + cIdent + cLower -- ë
+ '\236' -> cAny + cIdent + cLower -- ì
+ '\237' -> cAny + cIdent + cLower -- í
+ '\238' -> cAny + cIdent + cLower -- î
+ '\239' -> cAny + cIdent + cLower -- ï
+ '\240' -> cAny + cIdent + cLower -- ð
+ '\241' -> cAny + cIdent + cLower -- ñ
+ '\242' -> cAny + cIdent + cLower -- ò
+ '\243' -> cAny + cIdent + cLower -- ó
+ '\244' -> cAny + cIdent + cLower -- ô
+ '\245' -> cAny + cIdent + cLower -- õ
+ '\246' -> cAny + cIdent + cLower -- ö
+ '\247' -> cAny + cSymbol -- ÷
+ '\248' -> cAny + cIdent -- ø
+ '\249' -> cAny + cIdent + cLower -- ù
+ '\250' -> cAny + cIdent + cLower -- ú
+ '\251' -> cAny + cIdent + cLower -- û
+ '\252' -> cAny + cIdent + cLower -- ü
+ '\253' -> cAny + cIdent + cLower -- ý
+ '\254' -> cAny + cIdent + cLower -- þ
+ '\255' -> cAny + cIdent + cLower -- ÿ
\end{code}
-----------------------------------------------------------------------------
--- (c) The University of Glasgow, 2003
+-- (c) The University of Glasgow, 2006
--
-- GHC's lexer.
--
import Util ( maybePrefixMatch, readRational )
import DATA_BITS
-import Char
+import Data.Char
import Ratio
--import TRACE
}
-$whitechar = [\ \t\n\r\f\v\xa0]
+$unispace = \x05
+$whitechar = [\ \t\n\r\f\v\xa0 $unispace]
$white_no_nl = $whitechar # \n
$ascdigit = 0-9
-$unidigit = \x01
+$unidigit = \x03
+$decdigit = $ascdigit -- for now, should really be $digit (ToDo)
$digit = [$ascdigit $unidigit]
$special = [\(\)\,\;\[\]\`\{\}]
$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
-$unisymbol = \x02
+$unisymbol = \x04
$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\']
-$unilarge = \x03
+$unilarge = \x01
$asclarge = [A-Z \xc0-\xd6 \xd8-\xde]
$large = [$asclarge $unilarge]
-$unismall = \x04
+$unismall = \x02
$ascsmall = [a-z \xdf-\xf6 \xf8-\xff]
$small = [$ascsmall $unismall \_]
-$graphic = [$small $large $symbol $digit $special \:\"\']
+$unigraphic = \x06
+$graphic = [$small $large $symbol $digit $special $unigraphic \:\"\']
$octit = 0-7
-$hexit = [$digit A-F a-f]
+$hexit = [$decdigit A-F a-f]
$symchar = [$symbol \:]
$nl = [\n\r]
$idchar = [$small $large $digit \']
@varsym = $symbol $symchar*
@consym = \: $symchar*
-@decimal = $digit+
+@decimal = $decdigit+
@octal = $octit+
@hexadecimal = $hexit+
@exponent = [eE] [\-\+]? @decimal
-- single-line line pragmas, of the form
-- # <line> "<file>" <extra-stuff> \n
-<line_prag1> $digit+ { setLine line_prag1a }
+<line_prag1> $decdigit+ { setLine line_prag1a }
<line_prag1a> \" [$graphic \ ]* \" { setFile line_prag1b }
<line_prag1b> .* { pop }
-- Haskell-style line pragmas, of the form
-- {-# LINE <line> "<file>" #-}
-<line_prag2> $digit+ { setLine line_prag2a }
+<line_prag2> $decdigit+ { setLine line_prag2a }
<line_prag2a> \" [$graphic \ ]* \" { setFile line_prag2b }
<line_prag2b> "#-}"|"-}" { pop }
-- NOTE: accept -} at the end of a LINE pragma, for compatibility
,(">-", ITrarrowtail, bit arrowsBit)
,("-<<", ITLarrowtail, bit arrowsBit)
,(">>-", ITRarrowtail, bit arrowsBit)
+
+#if __GLASGOW_HASKELL__ >= 605
+ ,("∀", ITforall, bit tvBit)
+ ,("→", ITrarrow, 0)
+ ,("←", ITlarrow, 0)
+ ,("⋯", ITdotdot, 0)
+#endif
]
-- -----------------------------------------------------------------------------
-- takes a StringBuffer and a length, and returns the module name
-- and identifier parts of a qualified name. Splits at the *last* dot,
-- because of hierarchical module names.
-splitQualName orig_buf len = split orig_buf 0 0
+splitQualName orig_buf len = split orig_buf orig_buf
where
- split buf dot_off n
- | n == len = done dot_off
- | lookAhead buf n == '.' = split2 buf n (n+1)
- | otherwise = split buf dot_off (n+1)
+ split buf dot_buf
+ | orig_buf `byteDiff` buf >= len = done dot_buf
+ | c == '.' = found_dot buf'
+ | otherwise = split buf' dot_buf
+ where
+ (c,buf') = nextChar buf
-- careful, we might get names like M....
-- so, if the character after the dot is not upper-case, this is
-- the end of the qualifier part.
- split2 buf dot_off n
- | isUpper (lookAhead buf n) = split buf dot_off (n+1)
- | otherwise = done dot_off
-
- done dot_off =
- (lexemeToFastString orig_buf dot_off,
- lexemeToFastString (stepOnBy (dot_off+1) orig_buf) (len - dot_off -1))
+ found_dot buf -- buf points after the '.'
+ | isUpper c = split buf' buf
+ | otherwise = done buf
+ where
+ (c,buf') = nextChar buf
+
+ done dot_buf =
+ (lexemeToFastString orig_buf (qual_size - 1),
+ lexemeToFastString dot_buf (len - qual_size))
+ where
+ qual_size = orig_buf `byteDiff` dot_buf
varid span buf len =
case lookupUFM reservedWordsFM fs of
= return (L span (ITinteger $! parseInteger buf len 10 octDecDigit))
tok_octal span buf len
- = return (L span (ITinteger $! parseInteger (stepOnBy 2 buf) (len-2) 8 octDecDigit))
+ = return (L span (ITinteger $! parseInteger (offsetBytes 2 buf) (len-2) 8 octDecDigit))
tok_hexadecimal span buf len
- = return (L span (ITinteger $! parseInteger (stepOnBy 2 buf) (len-2) 16 hexDigit))
+ = return (L span (ITinteger $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
prim_decimal span buf len
= return (L span (ITprimint $! parseInteger buf (len-1) 10 octDecDigit))
prim_octal span buf len
- = return (L span (ITprimint $! parseInteger (stepOnBy 2 buf) (len-3) 8 octDecDigit))
+ = return (L span (ITprimint $! parseInteger (offsetBytes 2 buf) (len-3) 8 octDecDigit))
prim_hexadecimal span buf len
- = return (L span (ITprimint $! parseInteger (stepOnBy 2 buf) (len-3) 16 hexDigit))
+ = return (L span (ITprimint $! parseInteger (offsetBytes 2 buf) (len-3) 16 hexDigit))
tok_float str = ITrational $! readRational str
prim_float str = ITprimfloat $! readRational str
lex_string :: String -> P Token
lex_string s = do
i <- getInput
- case alexGetChar i of
+ case alexGetChar' i of
Nothing -> lit_error
Just ('"',i) -> do
if glaexts
then do
i <- getInput
- case alexGetChar i of
+ case alexGetChar' i of
Just ('#',i) -> do
setInput i
if any (> '\xFF') s
then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
- else let s' = mkFastStringNarrow (reverse s) in
- -- always a narrow string/byte array
+ else let s' = mkZFastString (reverse s) in
return (ITprimstring s')
+ -- mkZFastString is a hack to avoid encoding the
+ -- string in UTF-8. We just want the exact bytes.
_other ->
return (ITstring (mkFastString (reverse s)))
else
setInput i; lex_string s
| Just (c,i) <- next, is_space c -> do
setInput i; lex_stringgap s
- where next = alexGetChar i
+ where next = alexGetChar' i
- Just _ -> do
- c <- lex_char
- lex_string (c:s)
+ Just (c, i) -> do
+ c' <- lex_char c i
+ lex_string (c':s)
lex_stringgap s = do
c <- getCharOrFail
lex_char_tok span buf len = do -- We've seen '
i1 <- getInput -- Look ahead to first character
let loc = srcSpanStart span
- case alexGetChar i1 of
+ case alexGetChar' i1 of
Nothing -> lit_error
Just ('\'', i2@(AI end2 _ _)) -> do -- We've seen ''
lit_ch <- lex_escape
mc <- getCharOrFail -- Trailing quote
if mc == '\'' then finish_char_tok loc lit_ch
- else lit_error
+ else do setInput i2; lit_error
- Just (c, i2@(AI end2 _ _)) | not (is_any c) -> lit_error
- | otherwise ->
+ Just (c, i2@(AI end2 _ _))
+ | not (isAny c) -> lit_error
+ | otherwise ->
-- We've seen 'x, where x is a valid character
-- (i.e. not newline etc) but not a quote or backslash
- case alexGetChar i2 of -- Look ahead one more character
+ case alexGetChar' i2 of -- Look ahead one more character
Nothing -> lit_error
Just ('\'', i3) -> do -- We've seen 'x'
setInput i3
th_exts <- extension thEnabled
let (AI end _ _) = i1
if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
- else lit_error
+ else do setInput i2; lit_error
finish_char_tok :: SrcLoc -> Char -> P (Located Token)
finish_char_tok loc ch -- We've already seen the closing quote
= do glaexts <- extension glaExtsEnabled
i@(AI end _ _) <- getInput
if glaexts then do
- case alexGetChar i of
+ case alexGetChar' i of
Just ('#',i@(AI end _ _)) -> do
setInput i
return (L (mkSrcSpan loc end) (ITprimchar ch))
else do
return (L (mkSrcSpan loc end) (ITchar ch))
-lex_char :: P Char
-lex_char = do
- mc <- getCharOrFail
- case mc of
- '\\' -> lex_escape
- c | is_any c -> return c
+lex_char :: Char -> AlexInput -> P Char
+lex_char c inp = do
+ case c of
+ '\\' -> do setInput inp; lex_escape
+ c | isAny c -> do setInput inp; return c
_other -> lit_error
+isAny c | c > '\xff' = isPrint c
+ | otherwise = is_any c
+
lex_escape :: P Char
lex_escape = do
c <- getCharOrFail
c1 -> do
i <- getInput
- case alexGetChar i of
+ case alexGetChar' i of
Nothing -> lit_error
Just (c2,i2) ->
- case alexGetChar i2 of
- Nothing -> lit_error
+ case alexGetChar' i2 of
+ Nothing -> do setInput i2; lit_error
Just (c3,i3) ->
let str = [c1,c2,c3] in
case [ (c,rest) | (p,c) <- silly_escape_chars,
readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
readNum is_digit base conv = do
+ i <- getInput
c <- getCharOrFail
if is_digit c
then readNum2 is_digit base conv (conv c)
- else lit_error
+ else do setInput i; lit_error
readNum2 is_digit base conv i = do
input <- getInput
read i input
where read i input = do
- case alexGetChar input of
+ case alexGetChar' input of
Just (c,input') | is_digit c -> do
read (i*base + conv c) input'
_other -> do
- setInput input
if i >= 0 && i <= 0x10FFFF
- then return (chr i)
+ then do setInput input; return (chr i)
else lit_error
silly_escape_chars = [
("DEL", '\DEL')
]
+-- before calling lit_error, ensure that the current input is pointing to
+-- the position of the error in the buffer. This is so that we can report
+-- a correct location to the user, but also so we can detect UTF-8 decoding
+-- errors if they occur.
lit_error = lexError "lexical error in string/character literal"
getCharOrFail :: P Char
getCharOrFail = do
i <- getInput
- case alexGetChar i of
+ case alexGetChar' i of
Nothing -> lexError "unexpected end-of-file in string/character literal"
Just (c,i) -> do setInput i; return c
data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer
alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI _ _ s) = prevChar s '\n'
+alexInputPrevChar (AI _ _ buf) = prevChar buf '\n'
alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
alexGetChar (AI loc ofs s)
| atEnd s = Nothing
- | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` Just (c, (AI loc' ofs' s'))
- where c = currentChar s
- loc' = advanceSrcLoc loc c
- ofs' = advanceOffs c ofs
- s' = stepOn s
+ | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq`
+ Just (adj_c, (AI loc' ofs' s'))
+ where (c,s') = nextChar s
+ loc' = advanceSrcLoc loc c
+ ofs' = advanceOffs c ofs
+
+ non_graphic = '\x0'
+ upper = '\x1'
+ lower = '\x2'
+ digit = '\x3'
+ symbol = '\x4'
+ space = '\x5'
+ other_graphic = '\x6'
+
+ adj_c
+#if __GLASGOW_HASKELL__ < 605
+ = c -- no Unicode support
+#else
+ | c <= '\x04' = non_graphic
+ | c <= '\xff' = c
+ | otherwise =
+ case generalCategory c of
+ UppercaseLetter -> upper
+ LowercaseLetter -> lower
+ TitlecaseLetter -> upper
+ ModifierLetter -> other_graphic
+ OtherLetter -> other_graphic
+ NonSpacingMark -> other_graphic
+ SpacingCombiningMark -> other_graphic
+ EnclosingMark -> other_graphic
+ DecimalNumber -> digit
+ LetterNumber -> other_graphic
+ OtherNumber -> other_graphic
+ ConnectorPunctuation -> other_graphic
+ DashPunctuation -> other_graphic
+ OpenPunctuation -> other_graphic
+ ClosePunctuation -> other_graphic
+ InitialQuote -> other_graphic
+ FinalQuote -> other_graphic
+ OtherPunctuation -> other_graphic
+ MathSymbol -> symbol
+ CurrencySymbol -> symbol
+ ModifierSymbol -> symbol
+ OtherSymbol -> symbol
+ Space -> space
+ _other -> non_graphic
+#endif
+
+-- This version does not squash unicode characters, it is used when
+-- lexing strings.
+alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar' (AI loc ofs s)
+ | atEnd s = Nothing
+ | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq`
+ Just (c, (AI loc' ofs' s'))
+ where (c,s') = nextChar s
+ loc' = advanceSrcLoc loc c
+ ofs' = advanceOffs c ofs
- advanceOffs :: Char -> Int -> Int
- advanceOffs '\n' offs = 0
- advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
- advanceOffs _ offs = offs + 1
+advanceOffs :: Char -> Int -> Int
+advanceOffs '\n' offs = 0
+advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8
+advanceOffs _ offs = offs + 1
getInput :: P AlexInput
getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b)
else hcat [ptext SLIT("parse error on input "),
char '`', text token, char '\'']
]
- where token = lexemeToString (stepOnBy (-len) buf) len
+ where token = lexemeToString (offsetBytes (-len) buf) len
-- Report a parse failure, giving the span of the previous token as
-- the location of the error. This is the entry point for errors
PFailed last_loc (srcParseErr buf len)
-- A lexical error is reported at a particular position in the source file,
--- not over a token range. TODO: this is slightly wrong, because we record
--- the error at the character position following the one which caused the
--- error. We should somehow back up by one character.
+-- not over a token range.
lexError :: String -> P a
lexError str = do
loc <- getSrcLoc
- i@(AI end _ _) <- getInput
- failLocMsgP loc end str
+ i@(AI end _ buf) <- getInput
+ reportLexError loc end buf False str
-- -----------------------------------------------------------------------------
-- This is the top-level function: called from the parser each time a
lexer :: (Located Token -> P a) -> P a
lexer cont = do
tok@(L _ tok__) <- lexToken
- -- trace ("token: " ++ show tok__) $ do
+ --trace ("token: " ++ show tok__) $ do
cont tok
lexToken :: P (Located Token)
AlexEOF -> do let span = mkSrcSpan loc1 loc1
setLastToken span 0
return (L span ITeof)
- AlexError (AI loc2 _ _) -> do failLocMsgP loc1 loc2 "lexical error"
+ AlexError (AI loc2 _ buf) -> do
+ reportLexError loc1 loc2 buf True "lexical error"
AlexSkip inp2 _ -> do
setInput inp2
lexToken
AlexToken inp2@(AI end _ buf2) len t -> do
setInput inp2
let span = mkSrcSpan loc1 end
- span `seq` setLastToken span len
- t span buf len
+ let bytes = byteDiff buf buf2
+ span `seq` setLastToken span bytes
+ t span buf bytes
+
+reportLexError loc1 loc2 buf is_prev str =
+ let
+ c | is_prev = prevChar buf '\0'
+ | otherwise = fst (nextChar buf)
+ in
+ if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
+ then failLocMsgP loc2 loc2 "UTF-8 decoding error"
+ else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
}
import ForeignCall ( Safety(..), CExportSpec(..), CLabelString,
CCallConv(..), CCallTarget(..), defaultCCallConv
)
-import OccName ( UserFS, varName, dataName, tcClsName, tvName )
+import OccName ( varName, dataName, tcClsName, tvName )
import DataCon ( DataCon, dataConName )
import SrcLoc ( Located(..), unLoc, getLoc, noLoc, combineSrcSpans,
SrcSpan, combineLocs, srcLocFile,
-- These special_ids are treated as keywords in various places,
-- but as ordinary ids elsewhere. 'special_id' collects all these
-- except 'unsafe' and 'forall' whose treatment differs depending on context
-special_id :: { Located UserFS }
+special_id :: { Located FastString }
special_id
: 'as' { L1 FSLIT("as") }
| 'qualified' { L1 FSLIT("qualified") }
| 'stdcall' { L1 FSLIT("stdcall") }
| 'ccall' { L1 FSLIT("ccall") }
-special_sym :: { Located UserFS }
+special_sym :: { Located FastString }
special_sym : '!' { L1 FSLIT("!") }
| '.' { L1 FSLIT(".") }
| '*' { L1 FSLIT("*") }
: '%module' modid tdefs vdefgs { HsExtCore $2 $3 $4 }
modid :: { Module }
- : CNAME { mkSysModuleFS (mkFastString $1) }
+ : CNAME { mkModuleFS (mkFastString $1) }
-------------------------------------------------------------
-- Type and newtype declarations are in HsSyn syntax
| '(' STRING '::' aty ')' { MachStr (mkFastString $2) }
tv_occ :: { OccName }
- : NAME { mkSysOcc tvName $1 }
+ : NAME { mkOccName tvName $1 }
var_occ :: { OccName }
- : NAME { mkSysOcc varName $1 }
+ : NAME { mkVarOcc $1 }
-- Type constructor
q_tc_name :: { IfaceExtName }
- : modid '.' CNAME { ExtPkg $1 (mkSysOcc tcName $3) }
+ : modid '.' CNAME { ExtPkg $1 (mkOccName tcName $3) }
-- Data constructor in a pattern or data type declaration; use the dataName,
-- because that's what we expect in Core case patterns
d_pat_occ :: { OccName }
- : CNAME { mkSysOcc dataName $1 }
+ : CNAME { mkOccName dataName $1 }
-- Data constructor occurrence in an expression;
-- use the varName because that's the worker Id
d_occ :: { OccName }
- : CNAME { mkSysOcc varName $1 }
+ : CNAME { mkVarOcc $1 }
{
import ForeignCall ( CCallConv, Safety, CCallTarget(..), CExportSpec(..),
DNCallSpec(..), DNKind(..), CLabelString )
import OccName ( srcDataName, varName, isDataOcc, isTcOcc,
- occNameUserString )
+ occNameString )
import SrcLoc
import OrdList ( OrdList, fromOL )
import Bag ( Bag, emptyBag, snocBag, consBag, foldrBag )
mkExport (CCall cconv) (L loc entity, v, ty) = return $
ForD (ForeignExport v ty (CExport (CExportStatic entity' cconv)) False)
where
- entity' | nullFastString entity = mkExtName (unLoc v)
- | otherwise = entity
+ entity' | nullFS entity = mkExtName (unLoc v)
+ | otherwise = entity
mkExport DNCall (L loc entity, v, ty) =
parseError (getLoc v){-TODO: not quite right-}
"Foreign export is not yet supported for .NET"
-- of the Haskell name is then performed, so if you foreign export (++),
-- it's external name will be "++". Too bad; it's important because we don't
-- want z-encoding (e.g. names with z's in them shouldn't be doubled)
--- (This is why we use occNameUserString.)
--
mkExtName :: RdrName -> CLabelString
-mkExtName rdrNm = mkFastString (occNameUserString (rdrNameOcc rdrNm))
+mkExtName rdrNm = mkFastString (occNameString (rdrNameOcc rdrNm))
\end{code}
#include "HsVersions.h"
import Module ( Module, mkModule )
-import OccName ( dataName, tcName, clsName, varName, mkOccFS
- )
-
+import OccName ( dataName, tcName, clsName, varName, mkOccNameFS,
+ mkVarOccFS )
import RdrName ( RdrName, nameRdrName, mkOrig, rdrNameOcc, mkUnqual )
import Unique ( Unique, Uniquable(..), hasKey,
mkPreludeMiscIdUnique, mkPreludeDataConUnique,
This *local* name is used by the interactive stuff
\begin{code}
-itName uniq = mkInternalName uniq (mkOccFS varName FSLIT("it")) noSrcLoc
+itName uniq = mkInternalName uniq (mkOccNameFS varName FSLIT("it")) noSrcLoc
\end{code}
\begin{code}
genUnitDataCon_RDR = dataQual_RDR pREL_BASE FSLIT("Unit")
----------------------
-varQual_RDR mod str = mkOrig mod (mkOccFS varName str)
-tcQual_RDR mod str = mkOrig mod (mkOccFS tcName str)
-clsQual_RDR mod str = mkOrig mod (mkOccFS clsName str)
-dataQual_RDR mod str = mkOrig mod (mkOccFS dataName str)
+varQual_RDR mod str = mkOrig mod (mkOccNameFS varName str)
+tcQual_RDR mod str = mkOrig mod (mkOccNameFS tcName str)
+clsQual_RDR mod str = mkOrig mod (mkOccNameFS clsName str)
+dataQual_RDR mod str = mkOrig mod (mkOccNameFS dataName str)
\end{code}
%************************************************************************
clsQual = mk_known_key_name clsName
mk_known_key_name space mod str uniq
- = mkExternalName uniq mod (mkOccFS space str)
+ = mkExternalName uniq mod (mkOccNameFS space str)
Nothing noSrcLoc
conName :: Name -> FastString -> Unique -> Name
conName tycon occ uniq
- = mkExternalName uniq (nameModule tycon) (mkOccFS dataName occ)
+ = mkExternalName uniq (nameModule tycon) (mkOccNameFS dataName occ)
(Just tycon) noSrcLoc
methName :: Name -> FastString -> Unique -> Name
methName cls occ uniq
- = mkExternalName uniq (nameModule cls) (mkOccFS varName occ)
+ = mkExternalName uniq (nameModule cls) (mkVarOccFS occ)
(Just cls) noSrcLoc
\end{code}
import DataCon ( dataConTag, dataConTyCon, dataConWorkId, fIRST_TAG )
import CoreUtils ( cheapEqExpr, exprIsConApp_maybe )
import Type ( tyConAppTyCon, coreEqType )
-import OccName ( occNameUserString)
+import OccName ( occNameFS )
import PrelNames ( unpackCStringFoldrName, unpackCStringFoldrIdKey, hasKey,
eqStringName, unpackCStringIdKey )
import Maybes ( orElse )
primOpRules :: PrimOp -> Name -> [CoreRule]
primOpRules op op_name = primop_rule op
where
- rule_name = mkFastString (occNameUserString (primOpOcc op))
+ rule_name = occNameFS (primOpOcc op)
rule_name_case = rule_name `appendFS` FSLIT("->case")
-- A useful shorthand
import NewDemand
import Var ( TyVar )
-import OccName ( OccName, pprOccName, mkVarOcc )
+import OccName ( OccName, pprOccName, mkVarOccFS )
import TyCon ( TyCon, isPrimTyCon, tyConPrimRep, PrimRep(..) )
import Type ( Type, mkForAllTys, mkFunTy, mkFunTys, tyConAppTyCon,
typePrimRep )
[Type]
Type
-mkDyadic str ty = Dyadic (mkVarOcc str) ty
-mkMonadic str ty = Monadic (mkVarOcc str) ty
-mkCompare str ty = Compare (mkVarOcc str) ty
-mkGenPrimOp str tvs tys ty = GenPrimOp (mkVarOcc str) tvs tys ty
+mkDyadic str ty = Dyadic (mkVarOccFS str) ty
+mkMonadic str ty = Monadic (mkVarOccFS str) ty
+mkCompare str ty = Compare (mkVarOccFS str) ty
+mkGenPrimOp str tvs tys ty = GenPrimOp (mkVarOccFS str) tvs tys ty
\end{code}
%************************************************************************
import Var ( TyVar, mkTyVar )
import Name ( Name, BuiltInSyntax(..), mkInternalName, mkWiredInName )
-import OccName ( mkOccFS, tcName, mkTyVarOcc )
+import OccName ( mkOccNameFS, tcName, mkTyVarOcc )
import TyCon ( TyCon, ArgVrcs, mkPrimTyCon, mkLiftedPrimTyCon,
PrimRep(..) )
import Type ( mkTyConApp, mkTyConTy, mkTyVarTys, mkTyVarTy,
mkPrimTc :: FastString -> Unique -> TyCon -> Name
mkPrimTc fs uniq tycon
- = mkWiredInName gHC_PRIM (mkOccFS tcName fs)
+ = mkWiredInName gHC_PRIM (mkOccNameFS tcName fs)
uniq
Nothing -- No parent object
(ATyCon tycon) -- Relevant TyCon
import RdrName ( nameRdrName )
import Name ( Name, BuiltInSyntax(..), nameUnique, nameOccName,
nameModule, mkWiredInName )
-import OccName ( mkOccFS, tcName, dataName, mkTupleOcc, mkDataConWorkerOcc )
+import OccName ( mkOccNameFS, tcName, dataName, mkTupleOcc,
+ mkDataConWorkerOcc )
import DataCon ( DataCon, mkDataCon, dataConWorkId, dataConSourceArity )
import Var ( TyVar, tyVarKind )
import TyCon ( TyCon, AlgTyConRhs(DataTyCon), tyConDataCons,
- mkTupleTyCon, mkAlgTyCon, tyConName
- )
+ mkTupleTyCon, mkAlgTyCon, tyConName )
-import BasicTypes ( Arity, RecFlag(..), Boxity(..), isBoxed, StrictnessMark(..) )
+import BasicTypes ( Arity, RecFlag(..), Boxity(..), isBoxed,
+ StrictnessMark(..) )
-import Type ( Type, mkTyConTy, mkTyConApp, mkTyVarTy, mkTyVarTys, TyThing(..) )
+import Type ( Type, mkTyConTy, mkTyConApp, mkTyVarTy, mkTyVarTys,
+ TyThing(..) )
import Kind ( mkArrowKinds, liftedTypeKind, ubxTupleKind )
import Unique ( incrUnique, mkTupleTyConUnique,
mkTupleDataConUnique, mkPArrDataConUnique )
\begin{code}
mkWiredInTyConName :: BuiltInSyntax -> Module -> FastString -> Unique -> TyCon -> Name
mkWiredInTyConName built_in mod fs uniq tycon
- = mkWiredInName mod (mkOccFS tcName fs) uniq
+ = mkWiredInName mod (mkOccNameFS tcName fs) uniq
Nothing -- No parent object
(ATyCon tycon) -- Relevant TyCon
built_in
mkWiredInDataConName :: BuiltInSyntax -> Module -> FastString -> Unique -> DataCon -> Name -> Name
mkWiredInDataConName built_in mod fs uniq datacon parent
- = mkWiredInName mod (mkOccFS dataName fs) uniq
+ = mkWiredInName mod (mkOccNameFS dataName fs) uniq
(Just parent) -- Name of parent TyCon
(ADataCon datacon) -- Relevant DataCon
built_in
tyvar = head alphaTyVars
tyvarTys = replicate arity $ mkTyVarTy tyvar
nameStr = mkFastString ("MkPArr" ++ show arity)
- name = mkWiredInName pREL_PARR (mkOccFS dataName nameStr) uniq
+ name = mkWiredInName pREL_PARR (mkOccNameFS dataName nameStr) uniq
Nothing (ADataCon data_con) UserSyntax
uniq = mkPArrDataConUnique arity
#include "HsVersions.h"
import Var ( Id )
-import Name ( UserFS, EncodedFS, encodeFS, decode,
- getOccName, occNameFS
- )
+import Name ( getOccName, occNameFS )
import Module ( Module )
import Outputable
import FastTypes
cc_mod :: Module -- Name of module defining this CC.
}
-type CcName = EncodedFS
+type CcName = FastString
data IsDupdCC
= OriginalCC -- This says how the CC is *used*. Saying that
Building cost centres
\begin{code}
-mkUserCC :: UserFS -> Module -> CostCentre
+mkUserCC :: FastString -> Module -> CostCentre
mkUserCC cc_name mod
- = NormalCC { cc_name = encodeFS cc_name, cc_mod = mod,
+ = NormalCC { cc_name = cc_name, cc_mod = mod,
cc_is_dupd = OriginalCC, cc_is_caf = NotCafCC {-might be changed-}
}
costCentreUserName (NoCostCentre) = "NO_CC"
costCentreUserName (AllCafsCC {}) = "CAF"
costCentreUserName cc@(NormalCC {cc_name = name, cc_is_caf = is_caf})
- = case is_caf of { CafCC -> "CAF:"; _ -> "" } ++ decode (unpackFS name)
+ = case is_caf of { CafCC -> "CAF:"; _ -> "" } ++ unpackFS name
\end{code}
import Name ( Name, nameIsLocalOrFrom, mkInternalName, isWiredInName,
nameSrcLoc, nameOccName, nameModule, nameParent, isExternalName )
import NameSet
-import OccName ( tcName, isDataOcc, occNameFlavour, reportIfUnused )
+import OccName ( tcName, isDataOcc, pprNonVarNameSpace, occNameSpace,
+ reportIfUnused )
import Module ( Module )
import PrelNames ( mkUnboundName, rOOT_MAIN, iNTERACTIVE, consDataConKey, hasKey )
import UniqSupply
warnUnusedName (name, prov)
= addWarnAt loc $
sep [msg <> colon,
- nest 2 $ occNameFlavour (nameOccName name) <+> quotes (ppr name)]
+ nest 2 $ pprNonVarNameSpace (occNameSpace (nameOccName name))
+ <+> quotes (ppr name)]
-- TODO should be a proper span
where
(loc,msg) = case prov of
unknownNameErr rdr_name
= sep [ptext SLIT("Not in scope:"),
- nest 2 $ occNameFlavour (rdrNameOcc rdr_name) <+> quotes (ppr rdr_name)]
+ nest 2 $ pprNonVarNameSpace (occNameSpace (rdrNameOcc rdr_name))
+ <+> quotes (ppr rdr_name)]
unknownInstBndrErr cls op
= quotes (ppr op) <+> ptext SLIT("is not a (visible) method of class") <+> quotes (ppr cls)
import NameSet
import RdrName ( RdrName, emptyGlobalRdrEnv, extendLocalRdrEnv, lookupLocalRdrEnv )
import LoadIface ( loadHomeInterface )
-import UnicodeUtil ( stringToUtf8 )
import UniqFM ( isNullUFM )
import UniqSet ( emptyUniqSet )
import List ( nub )
= getSrcSpanM `thenM` \ sloc ->
let
expr = HsApp (L sloc (HsVar assertErrorName)) (L sloc (HsLit msg))
- msg = HsStringPrim (mkFastString (stringToUtf8 (showSDoc (ppr sloc))))
+ msg = HsStringPrim (mkFastString (showSDoc (ppr sloc)))
in
returnM (expr, emptyFVs)
\end{code}
import FiniteMap
import PrelNames ( pRELUDE, isUnboundName, main_RDR_Unqual )
-import Module ( Module, moduleUserString, unitModuleEnv,
+import Module ( Module, moduleString, unitModuleEnv,
lookupModuleEnv, moduleEnvElts, foldModuleEnv )
import Name ( Name, nameSrcLoc, nameOccName, nameModule, isWiredInName,
nameParent, nameParent_maybe, isExternalName,
isBuiltInSyntax )
import NameSet
import NameEnv
-import OccName ( srcDataName, isTcOcc, occNameFlavour, OccEnv,
- mkOccEnv, lookupOccEnv, emptyOccEnv, extendOccEnv )
+import OccName ( srcDataName, isTcOcc, pprNonVarNameSpace,
+ occNameSpace,
+ OccEnv, mkOccEnv, lookupOccEnv, emptyOccEnv,
+ extendOccEnv )
import HscTypes ( GenAvailInfo(..), AvailInfo,
HomePackageTable, PackageIfaceTable,
unQualInScope,
, Just deprec_txt <- lookupDeprec hpt pit name
= setSrcSpan (importSpecLoc imp_spec) $
addWarn (sep [ptext SLIT("Deprecated use of") <+>
- occNameFlavour (nameOccName name) <+>
+ pprNonVarNameSpace (occNameSpace (nameOccName name)) <+>
quotes (ppr name),
(parens imp_msg) <> colon,
(ppr deprec_txt) ])
(vcat (map ppr_mod_ie mod_ies)) })
}
where
- mkFilename this_mod = moduleUserString this_mod ++ ".imports"
+ mkFilename this_mod = moduleString this_mod ++ ".imports"
ppr_mod_ie (mod_name, ies)
| mod_name == pRELUDE
= empty
import CoreFVs -- all of it
import CoreSubst ( Subst, emptySubst, extendInScope, extendIdSubst,
cloneIdBndr, cloneRecIdBndrs )
-import Id ( Id, idType, mkSysLocalUnencoded,
- isOneShotLambda, zapDemandIdInfo,
+import Id ( Id, idType, mkSysLocal, isOneShotLambda,
+ zapDemandIdInfo,
idSpecialisation, idWorkerInfo, setIdInfo
)
import IdInfo ( workerExists, vanillaIdInfo, isEmptySpecInfo )
import VarSet
import VarEnv
import Name ( getOccName )
-import OccName ( occNameUserString )
+import OccName ( occNameString )
import Type ( isUnLiftedType, Type )
import BasicTypes ( TopLevelFlag(..) )
import UniqSupply
in
returnLvl (extendPolyLvlEnv dest_lvl env abs_vars (bndrs `zip` new_bndrs), new_bndrs)
where
- mk_poly_bndr bndr uniq = mkSysLocalUnencoded (mkFastString str) uniq poly_ty
+ mk_poly_bndr bndr uniq = mkSysLocal (mkFastString str) uniq poly_ty
where
- str = "poly_" ++ occNameUserString (getOccName bndr)
+ str = "poly_" ++ occNameString (getOccName bndr)
poly_ty = mkPiTypes abs_vars (idType bndr)
-> LvlM Id
newLvlVar str vars body_ty
= getUniqueUs `thenLvl` \ uniq ->
- returnUs (mkSysLocalUnencoded (mkFastString str) uniq (mkPiTypes vars body_ty))
+ returnUs (mkSysLocal (mkFastString str) uniq (mkPiTypes vars body_ty))
-- The deeply tiresome thing is that we have to apply the substitution
-- to the rules inside each Id. Grr. But it matters.
)
import DynFlags ( SimplifierSwitch(..), DynFlags, DynFlag(..), dopt )
import StaticFlags ( opt_PprStyle_Debug, opt_HistorySize )
-import OccName ( EncodedFS )
import Unique ( Unique )
import Maybes ( expectJust )
import FiniteMap ( FiniteMap, emptyFM, isEmptyFM, lookupFM, addToFM, plusFM_C, fmToList )
getDOptsSmpl
= SM (\dflags us sc -> (dflags, us, sc))
-newId :: EncodedFS -> Type -> SimplM Id
+newId :: FastString -> Type -> SimplM Id
newId fs ty = getUniqueSmpl `thenSmpl` \ uniq ->
returnSmpl (mkSysLocal fs uniq ty)
\end{code}
)
import MkId ( eRROR_ID )
import Literal ( mkStringLit )
-import OccName ( encodeFS )
import IdInfo ( OccInfo(..), isLoopBreaker,
setArityInfo, zapDemandInfo,
setUnfoldingInfo,
) `thenSmpl` \ (final_bndrs', final_args) ->
-- See comment about "$j" name above
- newId (encodeFS FSLIT("$j")) (mkPiTypes final_bndrs' rhs_ty') `thenSmpl` \ join_bndr ->
+ newId FSLIT("$j") (mkPiTypes final_bndrs' rhs_ty') `thenSmpl` \ join_bndr ->
-- Notice the funky mkPiTypes. If the contructor has existentials
-- it's possible that the join point will be abstracted over
-- type varaibles as well as term variables.
import VarEnv
import Maybes ( maybeToBool )
import Name ( getOccName, isExternalName, nameOccName )
-import OccName ( occNameUserString, occNameFS )
+import OccName ( occNameString, occNameFS )
import BasicTypes ( Arity )
import Packages ( HomeModules )
import StaticFlags ( opt_RuntimeTypes )
is_join_var :: Id -> Bool
-- A hack (used only for compiler debuggging) to tell if
-- a variable started life as a join point ($j)
-is_join_var j = occNameUserString (getOccName j) == "$j"
+is_join_var j = occNameString (getOccName j) == "$j"
\end{code}
\begin{code}
import Id ( Id, idName, idType, mkUserLocal, mkLocalId )
import PrelInfo ( isNoDictClass )
import Name ( Name, mkMethodOcc, getOccName, getSrcLoc, nameModule,
- isInternalName, setNameUnique, mkSystemVarNameEncoded )
+ isInternalName, setNameUnique, mkSystemVarName )
import NameSet ( addOneToNameSet )
import Literal ( inIntRange )
import Var ( TyVar, tyVarKind, setIdType )
= do { loc <- getInstLoc orig
; new_uniq <- newUnique
; let
- lit_nm = mkSystemVarNameEncoded new_uniq FSLIT("lit")
- -- The "encoded" bit means that we don't need to
- -- z-encode the string every time we call this!
+ lit_nm = mkSystemVarName new_uniq FSLIT("lit")
lit_inst = LitInst lit_nm lit expected_ty loc
; extendLIE lit_inst
; return (HsVar (instToId lit_inst)) }
import Outputable
import PrelNames ( genericTyConNames )
import DynFlags
-import UnicodeUtil ( stringToUtf8 )
import ErrUtils ( dumpIfSet_dyn )
import Util ( count, lengthIs, isSingleton, lengthExceeds )
import Unique ( Uniquable(..) )
where
error_rhs = noLoc $ HsLam (mkMatchGroup [mkSimpleMatch wild_pats simple_rhs])
simple_rhs = nlHsApp (nlHsVar (getName nO_METHOD_BINDING_ERROR_ID))
- (nlHsLit (HsStringPrim (mkFastString (stringToUtf8 error_msg))))
+ (nlHsLit (HsStringPrim (mkFastString error_msg)))
error_msg = showSDoc (hcat [ppr loc, text "|", ppr sel_id ])
-- When the type is of form t1 -> t2 -> t3
ident_pat s = nlConPat ident_RDR [nlLitPat (mkHsString s)] -- Ident "foo"
symbol_pat s = nlConPat symbol_RDR [nlLitPat (mkHsString s)] -- Symbol ">>"
- data_con_str con = occNameUserString (getOccName con)
+ data_con_str con = occNameString (getOccName con)
read_punc c = bindLex (punc_pat c)
read_arg a ty
| otherwise
= [bindLex (ident_pat lbl_str)]
where
- lbl_str = occNameUserString (getOccName lbl)
+ lbl_str = occNameString (getOccName lbl)
\end{code}
dc_nm = getName data_con
dc_occ_nm = getOccName data_con
- con_str = occNameUserString dc_occ_nm
+ con_str = occNameString dc_occ_nm
op_con_str = wrapOpParens con_str
backquote_str = wrapOpBackquotes con_str
-- it seems tidier to have them both sides.
where
occ_nm = getOccName l
- nm = wrapOpParens (occNameUserString occ_nm)
+ nm = wrapOpParens (occNameString occ_nm)
show_args = zipWith show_arg bs_needed arg_tys
(show_arg1:show_arg2:_) = show_args
constr_args dc =
[ -- nlHsIntLit (toInteger (dataConTag dc)), -- Tag
nlHsVar data_type_name, -- DataType
- nlHsLit (mkHsString (occNameUserString dc_occ)), -- String name
+ nlHsLit (mkHsString (occNameString dc_occ)), -- String name
nlList labels, -- Field labels
nlHsVar fixity] -- Fixity
where
= mkDerivedRdrName tc_name mk_occ
where
tc_name = tyConName tycon
- mk_occ tc_occ = mkOccFS varName (mkFastString new_str)
+ mk_occ tc_occ = mkVarOccFS (mkFastString new_str)
where
new_str = str ++ occNameString tc_occ ++ "#"
\end{code}
import MkId ( mkDictFunId, rUNTIME_ERROR_ID )
import FunDeps ( checkInstFDs )
import Name ( Name, getSrcLoc )
-import UnicodeUtil ( stringToUtf8 )
import Maybe ( catMaybes )
import SrcLoc ( srcLocSpan, unLoc, noLoc, Located(..), srcSpanStart )
import ListSetOps ( minusList )
-- Hardly beautiful, but only three extra lines.
nlHsApp (noLoc $ TyApp (nlHsVar rUNTIME_ERROR_ID)
[idType this_dict_id])
- (nlHsLit (HsStringPrim (mkFastString (stringToUtf8 msg))))
+ (nlHsLit (HsStringPrim (mkFastString msg)))
| otherwise -- The common case
= mkHsConApp dict_constr inst_tys' (map HsVar scs_and_meths)
import Id ( Id, mkExportedLocalId, isLocalId, idName, idType )
import Var ( Var )
import Module ( Module, ModuleEnv, moduleEnvElts, elemModuleEnv )
-import OccName ( mkVarOcc, mkOccFS, varName )
+import OccName ( mkVarOccFS )
import Name ( Name, NamedThing(..), isExternalName, getSrcLoc, isWiredInName,
mkExternalName )
import NameSet
dflags <- getDOpts ;
let { main_mod = mainModIs dflags ;
main_fn = case mainFunIs dflags of {
- Just fn -> mkRdrUnqual (mkVarOcc (mkFastString fn)) ;
+ Just fn -> mkRdrUnqual (mkVarOccFS (mkFastString fn)) ;
Nothing -> main_RDR_Unqual } } ;
check_main ghci_mode tcg_env main_mod main_fn
-- for 'main' in the interface file!
; let { root_main_name = mkExternalName rootMainKey rOOT_MAIN
- (mkOccFS varName FSLIT("main"))
+ (mkVarOccFS FSLIT("main"))
(Just main_name) (getSrcLoc main_name)
; root_main_id = mkExportedLocalId root_main_name ty
; main_bind = noLoc (VarBind root_main_id main_expr) }
import HscTypes ( lookupType, ExternalPackageState(..), emptyModDetails )
import OccName
import Var ( Id, TyVar, idType )
-import Module ( moduleUserString )
+import Module ( moduleString )
import TcRnMonad
import IfaceEnv ( lookupOrig )
import Class ( Class, classExtraBigSig )
qReport True msg = addErr (text msg)
qReport False msg = addReport (text msg)
- qCurrentModule = do { m <- getModule; return (moduleUserString m) }
+ qCurrentModule = do { m <- getModule; return (moduleString m) }
qReify v = reify v
qRecover = recoverM
-- have free variables, we may need to generate NameL's for them.
where
name = getName thing
- mod = moduleUserString (nameModule name)
- occ_str = occNameUserString occ
+ mod = moduleString (nameModule name)
+ occ_str = occNameString occ
occ = nameOccName name
mk_varg | OccName.isDataOcc occ = TH.mkNameG_d
| OccName.isVarOcc occ = TH.mkNameG_v
import Var ( Var, Id, TyVar, tyVarKind )
import VarSet ( TyVarSet )
import Name ( Name, NamedThing(..), BuiltInSyntax(..), mkWiredInName )
-import OccName ( mkOccFS, tcName, parenSymOcc )
+import OccName ( mkOccNameFS, tcName, parenSymOcc )
import BasicTypes ( IPName, tupleParens )
import TyCon ( TyCon, mkFunTyCon, tyConArity, tupleTyConBoxity, isTupleTyCon, isRecursiveTyCon, isNewTyCon )
import Class ( Class )
-- a prefix way, thus: (->) Int# Int#. And this is unusual.
funTyConName = mkWiredInName gHC_PRIM
- (mkOccFS tcName FSLIT("(->)"))
+ (mkOccNameFS tcName FSLIT("(->)"))
funTyConKey
Nothing -- No parent object
(ATyCon funTyCon) -- Relevant TyCon
import FastMutInt
import PackageConfig ( PackageId, packageIdFS, fsToPackageId )
-#if __GLASGOW_HASKELL__ < 503
-import DATA_IOREF
-import DATA_BITS
-import DATA_INT
-import DATA_WORD
-import Char
-import Monad
-import Exception
-import GlaExts hiding (ByteArray, newByteArray, freezeByteArray)
-import Array
-import IO
-import PrelIOBase ( IOError(..), IOErrorType(..)
-#if __GLASGOW_HASKELL__ > 411
- , IOException(..)
-#endif
- )
-import PrelReal ( Ratio(..) )
-import PrelIOBase ( IO(..) )
-import IOExts ( openFileEx, IOModeEx(..) )
-#else
+import Foreign
import Data.Array.IO
import Data.Array
import Data.Bits
#else
import System.IO ( openBinaryFile )
#endif
-#endif
#if __GLASGOW_HASKELL__ < 601
openBinaryFile f mode = openFileEx f (BinaryMode mode)
#endif
-#if __GLASGOW_HASKELL__ < 503
-type BinArray = MutableByteArray RealWorld Int
-newArray_ bounds = stToIO (newCharArray bounds)
-unsafeWrite arr ix e = stToIO (writeWord8Array arr ix e)
-unsafeRead arr ix = stToIO (readWord8Array arr ix)
-#if __GLASGOW_HASKELL__ < 411
-newByteArray# = newCharArray#
-#endif
-hPutArray h arr sz = hPutBufBAFull h arr sz
-hGetArray h sz = hGetBufBAFull h sz
-
-mkIOError :: IOErrorType -> String -> Maybe Handle -> Maybe FilePath -> Exception
-mkIOError t location maybe_hdl maybe_filename
- = IOException (IOError maybe_hdl t location ""
-#if __GLASGOW_HASKELL__ > 411
- maybe_filename
-#endif
- )
-
-eofErrorType = EOF
-
-#ifndef SIZEOF_HSINT
-#define SIZEOF_HSINT INT_SIZE_IN_BYTES
-#endif
-
-#ifndef SIZEOF_HSWORD
-#define SIZEOF_HSWORD WORD_SIZE_IN_BYTES
-#endif
-
-#else
type BinArray = IOUArray Int Word8
-#endif
---------------------------------------------------------------
-- BinHandle
-- Reading and writing FastStrings
---------------------------------------------------------
-putFS bh (FastString id l ba) = do
- put_ bh (I# l)
- putByteArray bh ba l
-putFS bh s = error ("Binary.put_(FastString): " ++ unpackFS s)
- -- Note: the length of the FastString is *not* the same as
- -- the size of the ByteArray: the latter is rounded up to a
- -- multiple of the word size.
+putFS bh (FastString id l _ buf _) = do
+ put_ bh l
+ withForeignPtr buf $ \ptr ->
+ let
+ go n | n == l = return ()
+ | otherwise = do
+ b <- peekElemOff ptr n
+ putByte bh b
+ go (n+1)
+ in
+ go 0
{- -- possible faster version, not quite there yet:
getFS bh@BinMem{} = do
return $! (mkFastSubStringBA# arr off l)
-}
getFS bh = do
- (I# l) <- get bh
- (BA ba) <- getByteArray bh (I# l)
- return $! (mkFastSubStringBA# ba 0# l)
+ l <- get bh
+ fp <- mallocForeignPtrBytes l
+ withForeignPtr fp $ \ptr -> do
+ let
+ go n | n == l = mkFastStringForeignPtr ptr fp l
+ | otherwise = do
+ b <- getByte bh
+ pokeElemOff ptr n b
+ go (n+1)
+ --
+ go 0
instance Binary PackageId where
put_ bh pid = put_ bh (packageIdFS pid)
get bh = do { fs <- get bh; return (fsToPackageId fs) }
instance Binary FastString where
- put_ bh f@(FastString id l ba) =
+ put_ bh f@(FastString id l _ fp _) =
case getUserData bh of {
UserData { ud_next = j_r, ud_map = out_r, ud_dict = dict} -> do
out <- readIORef out_r
import Foreign
import IO
-#if __GLASGOW_HASKELL__ < 503
-import PrelIOBase ( IO(..) )
-import IOExts ( hPutBufFull )
-#else
import GHC.IOBase ( IO(..) )
import System.IO ( hPutBuf )
-#endif
-
-import GLAEXTS ( touch#, byteArrayContents#, Int(..), Int#, Addr# )
+import GHC.Ptr ( Ptr(..) )
-import PrimPacked ( Ptr(..) )
+import GLAEXTS ( Int(..), Int#, Addr# )
-- -----------------------------------------------------------------------------
loop cs (i+1)
bPutFS :: BufHandle -> FastString -> IO ()
-bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len# arr#) = do
- let len = I# len#
+bPutFS b@(BufHandle buf r hdl) fs@(FastString _ len _ fp _) =
+ withForeignPtr fp $ \ptr -> do
i <- readFastMutInt r
if (i + len) >= buf_size
then do hPutBuf hdl buf i
writeFastMutInt r 0
if (len >= buf_size)
- then do
- let a# = byteArrayContents# arr#
- hPutBuf hdl (Ptr a#) len
- touch fs
+ then hPutBuf hdl ptr len
else bPutFS b fs
else do
- let a# = byteArrayContents# arr#
- copyBytes (buf `plusPtr` i) (Ptr a#) len
- touch fs
+ copyBytes (buf `plusPtr` i) ptr len
writeFastMutInt r (i+len)
bPutFS _ _ = panic "bPutFS"
free buf
return ()
-touch r = IO $ \s -> case touch# r s of s -> (# s, () #)
-
#if 0
myPutBuf s hdl buf i =
modifyIOError (\e -> ioeSetErrorString e (ioeGetErrorString e ++ ':':s ++ " (" ++ show buf ++ "," ++ show i ++ ")")) $
--- /dev/null
+{-# OPTIONS_GHC -O #-}
+-- -----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow, 1997-2003
+--
+-- Character encodings
+--
+-- -----------------------------------------------------------------------------
+
+module Encoding (
+ -- * UTF-8
+ utf8DecodeChar#,
+ utf8PrevChar,
+ utf8CharStart,
+ utf8DecodeChar,
+ utf8DecodeString,
+ utf8EncodeChar,
+ utf8EncodeString,
+ utf8EncodedLength,
+ countUTF8Chars,
+
+ -- * Latin-1
+ latin1DecodeChar,
+ latin1EncodeChar,
+
+ -- * Z-encoding
+ zEncodeString,
+ zDecodeString
+ ) where
+
+#define COMPILING_FAST_STRING
+#include "HsVersions.h"
+import Foreign
+import Data.Char ( ord, chr, isDigit, digitToInt, isHexDigit )
+import Numeric ( showHex )
+
+import GHC.Ptr ( Ptr(..) )
+import GHC.Base
+
+-- -----------------------------------------------------------------------------
+-- Latin-1
+
+latin1DecodeChar ptr = do
+ w <- peek ptr
+ return (unsafeChr (fromIntegral w), ptr `plusPtr` 1)
+
+latin1EncodeChar c ptr = do
+ poke ptr (fromIntegral (ord c))
+ return (ptr `plusPtr` 1)
+
+-- -----------------------------------------------------------------------------
+-- UTF-8
+
+-- We can't write the decoder as efficiently as we'd like without
+-- resorting to unboxed extensions, unfortunately. I tried to write
+-- an IO version of this function, but GHC can't eliminate boxed
+-- results from an IO-returning function.
+--
+-- We assume we can ignore overflow when parsing a multibyte character here.
+-- To make this safe, we add extra sentinel bytes to unparsed UTF-8 sequences
+-- before decoding them (see StringBuffer.hs).
+
+{-# INLINE utf8DecodeChar# #-}
+utf8DecodeChar# :: Addr# -> (# Char#, Addr# #)
+utf8DecodeChar# a# =
+ let ch0 = word2Int# (indexWord8OffAddr# a# 0#) in
+ case () of
+ _ | ch0 <=# 0x7F# -> (# chr# ch0, a# `plusAddr#` 1# #)
+
+ | ch0 >=# 0xC0# && ch0 <=# 0xDF# ->
+ let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+ if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
+ (# chr# (((ch0 -# 0xC0#) `uncheckedIShiftL#` 6#) +#
+ (ch1 -# 0x80#)),
+ a# `plusAddr#` 2# #)
+
+ | ch0 >=# 0xE0# && ch0 <=# 0xEF# ->
+ let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+ if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
+ let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+ if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else
+ (# chr# (((ch0 -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+ ((ch1 -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ch2 -# 0x80#)),
+ a# `plusAddr#` 3# #)
+
+ | ch0 >=# 0xF0# && ch0 <=# 0xF8# ->
+ let ch1 = word2Int# (indexWord8OffAddr# a# 1#) in
+ if ch1 <# 0x80# || ch1 >=# 0xC0# then fail 1# else
+ let ch2 = word2Int# (indexWord8OffAddr# a# 2#) in
+ if ch2 <# 0x80# || ch2 >=# 0xC0# then fail 2# else
+ let ch3 = word2Int# (indexWord8OffAddr# a# 3#) in
+ if ch3 <# 0x80# || ch3 >=# 0xC0# then fail 3# else
+ (# chr# (((ch0 -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+ ((ch1 -# 0x80#) `uncheckedIShiftL#` 12#) +#
+ ((ch2 -# 0x80#) `uncheckedIShiftL#` 6#) +#
+ (ch3 -# 0x80#)),
+ a# `plusAddr#` 4# #)
+
+ | otherwise -> fail 1#
+ where
+ -- all invalid sequences end up here:
+ fail n = (# '\0'#, a# `plusAddr#` n #)
+ -- '\xFFFD' would be the usual replacement character, but
+ -- that's a valid symbol in Haskell, so will result in a
+ -- confusing parse error later on. Instead we use '\0' which
+ -- will signal a lexer error immediately.
+
+utf8DecodeChar :: Ptr Word8 -> (Char, Ptr Word8)
+utf8DecodeChar (Ptr a#) = ( C# c#, Ptr b# )
+ where (# c#, b# #) = utf8DecodeChar# a#
+
+-- UTF-8 is cleverly designed so that we can always figure out where
+-- the start of the current character is, given any position in a
+-- stream. This function finds the start of the previous character,
+-- assuming there *is* a previous character.
+utf8PrevChar :: Ptr Word8 -> IO (Ptr Word8)
+utf8PrevChar p = utf8CharStart (p `plusPtr` (-1))
+
+utf8CharStart :: Ptr Word8 -> IO (Ptr Word8)
+utf8CharStart p = go p
+ where go p = do w <- peek p
+ if (w .&. 0xC0) == 0x80
+ then go (p `plusPtr` (-1))
+ else return p
+
+utf8DecodeString :: Ptr Word8 -> Int -> IO [Char]
+STRICT2(utf8DecodeString)
+utf8DecodeString (Ptr a#) (I# len#)
+ = unpack a#
+ where
+ end# = addr2Int# (a# `plusAddr#` len#)
+
+ unpack p#
+ | addr2Int# p# >=# end# = return []
+ | otherwise =
+ case utf8DecodeChar# p# of
+ (# c#, q# #) -> do
+ chs <- unpack q#
+ return (C# c# : chs)
+
+countUTF8Chars :: Ptr Word8 -> Int -> IO Int
+countUTF8Chars ptr bytes = go ptr 0
+ where
+ end = ptr `plusPtr` bytes
+
+ STRICT2(go)
+ go ptr n
+ | ptr >= end = return n
+ | otherwise = do
+ case utf8DecodeChar# (unPtr ptr) of
+ (# c, a #) -> go (Ptr a) (n+1)
+
+unPtr (Ptr a) = a
+
+utf8EncodeChar c ptr =
+ let x = ord c in
+ case () of
+ _ | x > 0 && x <= 0x007f -> do
+ poke ptr (fromIntegral x)
+ return (ptr `plusPtr` 1)
+ -- NB. '\0' is encoded as '\xC0\x80', not '\0'. This is so that we
+ -- can have 0-terminated UTF-8 strings (see GHC.Base.unpackCStringUtf8).
+ | x <= 0x07ff -> do
+ poke ptr (fromIntegral (0xC0 .|. ((x `shiftR` 6) .&. 0x1F)))
+ pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x .&. 0x3F)))
+ return (ptr `plusPtr` 2)
+ | x <= 0xffff -> do
+ poke ptr (fromIntegral (0xE0 .|. (x `shiftR` 12) .&. 0x0F))
+ pokeElemOff ptr 1 (fromIntegral (0x80 .|. (x `shiftR` 6) .&. 0x3F))
+ pokeElemOff ptr 2 (fromIntegral (0x80 .|. (x .&. 0x3F)))
+ return (ptr `plusPtr` 3)
+ | otherwise -> do
+ poke ptr (fromIntegral (0xF0 .|. (x `shiftR` 18)))
+ pokeElemOff ptr 1 (fromIntegral (0x80 .|. ((x `shiftR` 12) .&. 0x3F)))
+ pokeElemOff ptr 2 (fromIntegral (0x80 .|. ((x `shiftR` 6) .&. 0x3F)))
+ pokeElemOff ptr 3 (fromIntegral (0x80 .|. (x .&. 0x3F)))
+ return (ptr `plusPtr` 4)
+
+utf8EncodeString :: Ptr Word8 -> String -> IO ()
+utf8EncodeString ptr str = go ptr str
+ where STRICT2(go)
+ go ptr [] = return ()
+ go ptr (c:cs) = do
+ ptr' <- utf8EncodeChar c ptr
+ go ptr' cs
+
+utf8EncodedLength :: String -> Int
+utf8EncodedLength str = go 0 str
+ where STRICT2(go)
+ go n [] = n
+ go n (c:cs)
+ | ord c > 0 && ord c <= 0x007f = go (n+1) cs
+ | ord c <= 0x07ff = go (n+2) cs
+ | ord c <= 0xffff = go (n+3) cs
+ | otherwise = go (n+4) cs
+
+-- -----------------------------------------------------------------------------
+-- The Z-encoding
+
+{-
+This is the main name-encoding and decoding function. It encodes any
+string into a string that is acceptable as a C name. This is the name
+by which things are known right through the compiler.
+
+The basic encoding scheme is this.
+
+* Tuples (,,,) are coded as Z3T
+
+* Alphabetic characters (upper and lower) and digits
+ all translate to themselves;
+ except 'Z', which translates to 'ZZ'
+ and 'z', which translates to 'zz'
+ We need both so that we can preserve the variable/tycon distinction
+
+* Most other printable characters translate to 'zx' or 'Zx' for some
+ alphabetic character x
+
+* The others translate as 'znnnU' where 'nnn' is the decimal number
+ of the character
+
+ Before After
+ --------------------------
+ Trak Trak
+ foo_wib foozuwib
+ > zg
+ >1 zg1
+ foo# foozh
+ foo## foozhzh
+ foo##1 foozhzh1
+ fooZ fooZZ
+ :+ ZCzp
+ () Z0T 0-tuple
+ (,,,,) Z5T 5-tuple
+ (# #) Z1H unboxed 1-tuple (note the space)
+ (#,,,,#) Z5H unboxed 5-tuple
+ (NB: There is no Z1T nor Z0H.)
+-}
+
+type UserString = String -- As the user typed it
+type EncodedString = String -- Encoded form
+
+
+zEncodeString :: UserString -> EncodedString
+zEncodeString cs = case maybe_tuple cs of
+ Just n -> n -- Tuples go to Z2T etc
+ Nothing -> go cs
+ where
+ go [] = []
+ go (c:cs) = encode_ch c ++ go cs
+
+unencodedChar :: Char -> Bool -- True for chars that don't need encoding
+unencodedChar 'Z' = False
+unencodedChar 'z' = False
+unencodedChar c = c >= 'a' && c <= 'z'
+ || c >= 'A' && c <= 'Z'
+ || c >= '0' && c <= '9'
+
+encode_ch :: Char -> EncodedString
+encode_ch c | unencodedChar c = [c] -- Common case first
+
+-- Constructors
+encode_ch '(' = "ZL" -- Needed for things like (,), and (->)
+encode_ch ')' = "ZR" -- For symmetry with (
+encode_ch '[' = "ZM"
+encode_ch ']' = "ZN"
+encode_ch ':' = "ZC"
+encode_ch 'Z' = "ZZ"
+
+-- Variables
+encode_ch 'z' = "zz"
+encode_ch '&' = "za"
+encode_ch '|' = "zb"
+encode_ch '^' = "zc"
+encode_ch '$' = "zd"
+encode_ch '=' = "ze"
+encode_ch '>' = "zg"
+encode_ch '#' = "zh"
+encode_ch '.' = "zi"
+encode_ch '<' = "zl"
+encode_ch '-' = "zm"
+encode_ch '!' = "zn"
+encode_ch '+' = "zp"
+encode_ch '\'' = "zq"
+encode_ch '\\' = "zr"
+encode_ch '/' = "zs"
+encode_ch '*' = "zt"
+encode_ch '_' = "zu"
+encode_ch '%' = "zv"
+encode_ch c = 'z' : if isDigit (head hex_str) then hex_str
+ else '0':hex_str
+ where hex_str = showHex (ord c) "U"
+ -- ToDo: we could improve the encoding here in various ways.
+ -- eg. strings of unicode characters come out as 'z1234Uz5678U', we
+ -- could remove the 'U' in the middle (the 'z' works as a separator).
+
+zDecodeString :: EncodedString -> UserString
+zDecodeString [] = []
+zDecodeString ('Z' : d : rest)
+ | isDigit d = decode_tuple d rest
+ | otherwise = decode_upper d : zDecodeString rest
+zDecodeString ('z' : d : rest)
+ | isDigit d = decode_num_esc d rest
+ | otherwise = decode_lower d : zDecodeString rest
+zDecodeString (c : rest) = c : zDecodeString rest
+
+decode_upper, decode_lower :: Char -> Char
+
+decode_upper 'L' = '('
+decode_upper 'R' = ')'
+decode_upper 'M' = '['
+decode_upper 'N' = ']'
+decode_upper 'C' = ':'
+decode_upper 'Z' = 'Z'
+decode_upper ch = {-pprTrace "decode_upper" (char ch)-} ch
+
+decode_lower 'z' = 'z'
+decode_lower 'a' = '&'
+decode_lower 'b' = '|'
+decode_lower 'c' = '^'
+decode_lower 'd' = '$'
+decode_lower 'e' = '='
+decode_lower 'g' = '>'
+decode_lower 'h' = '#'
+decode_lower 'i' = '.'
+decode_lower 'l' = '<'
+decode_lower 'm' = '-'
+decode_lower 'n' = '!'
+decode_lower 'p' = '+'
+decode_lower 'q' = '\''
+decode_lower 'r' = '\\'
+decode_lower 's' = '/'
+decode_lower 't' = '*'
+decode_lower 'u' = '_'
+decode_lower 'v' = '%'
+decode_lower ch = {-pprTrace "decode_lower" (char ch)-} ch
+
+-- Characters not having a specific code are coded as z224U (in hex)
+decode_num_esc d rest
+ = go (digitToInt d) rest
+ where
+ go n (c : rest) | isHexDigit c = go (16*n + digitToInt c) rest
+ go n ('U' : rest) = chr n : zDecodeString rest
+ go n other = error ("decode_num_esc: " ++ show n ++ ' ':other)
+
+decode_tuple :: Char -> EncodedString -> UserString
+decode_tuple d rest
+ = go (digitToInt d) rest
+ where
+ -- NB. recurse back to zDecodeString after decoding the tuple, because
+ -- the tuple might be embedded in a longer name.
+ go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
+ go 0 ('T':rest) = "()" ++ zDecodeString rest
+ go n ('T':rest) = '(' : replicate (n-1) ',' ++ ")" ++ zDecodeString rest
+ go 1 ('H':rest) = "(# #)" ++ zDecodeString rest
+ go n ('H':rest) = '(' : '#' : replicate (n-1) ',' ++ "#)" ++ zDecodeString rest
+ go n other = error ("decode_tuple: " ++ show n ++ ' ':other)
+
+{-
+Tuples are encoded as
+ Z3T or Z3H
+for 3-tuples or unboxed 3-tuples respectively. No other encoding starts
+ Z<digit>
+
+* "(# #)" is the tycon for an unboxed 1-tuple (not 0-tuple)
+ There are no unboxed 0-tuples.
+
+* "()" is the tycon for a boxed 0-tuple.
+ There are no boxed 1-tuples.
+-}
+
+maybe_tuple :: UserString -> Maybe EncodedString
+
+maybe_tuple "(# #)" = Just("Z1H")
+maybe_tuple ('(' : '#' : cs) = case count_commas (0::Int) cs of
+ (n, '#' : ')' : cs) -> Just ('Z' : shows (n+1) "H")
+ other -> Nothing
+maybe_tuple "()" = Just("Z0T")
+maybe_tuple ('(' : cs) = case count_commas (0::Int) cs of
+ (n, ')' : cs) -> Just ('Z' : shows (n+1) "T")
+ other -> Nothing
+maybe_tuple other = Nothing
+
+count_commas :: Int -> String -> (Int, String)
+count_commas n (',' : cs) = count_commas (n+1) cs
+count_commas n cs = (n,cs)
%
-% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
+% (c) The University of Glasgow, 1997-2006
%
-\section{Fast strings}
+\begin{code}
+{-# OPTIONS -fglasgow-exts -O #-}
+{-
FastString: A compact, hash-consed, representation of character strings.
Comparison is O(1), and you can get a Unique from them.
Generated by the FSLIT macro
Turn into SDoc with Outputable.ptext
Use LitString unless you want the facilities of FastString
-
-\begin{code}
+-}
module FastString
(
+ -- * FastStrings
FastString(..), -- not abstract, for now.
- mkFastString, -- :: String -> FastString
- mkFastStringNarrow, -- :: String -> FastString
- mkFastSubString, -- :: Addr -> Int -> Int -> FastString
-
- mkFastString#, -- :: Addr# -> FastString
- mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
-
- mkFastStringInt, -- :: [Int] -> FastString
-
- uniqueOfFS, -- :: FastString -> Int#
- lengthFS, -- :: FastString -> Int
- nullFastString, -- :: FastString -> Bool
+ -- ** Construction
+ mkFastString,
+ mkFastStringBytes,
+ mkFastStringForeignPtr,
+ mkFastString#,
+ mkZFastString,
+ mkZFastStringBytes,
+ -- ** Deconstruction
unpackFS, -- :: FastString -> String
- unpackIntFS, -- :: FastString -> [Int]
- appendFS, -- :: FastString -> FastString -> FastString
- headFS, -- :: FastString -> Char
- headIntFS, -- :: FastString -> Int
- tailFS, -- :: FastString -> FastString
- concatFS, -- :: [FastString] -> FastString
- consFS, -- :: Char -> FastString -> FastString
- indexFS, -- :: FastString -> Int -> Char
- nilFS, -- :: FastString
-
- hPutFS, -- :: Handle -> FastString -> IO ()
-
+ bytesFS, -- :: FastString -> [Word8]
+
+ -- ** Encoding
+ isZEncoded,
+ zEncodeFS,
+
+ -- ** Operations
+ uniqueOfFS,
+ lengthFS,
+ nullFS,
+ appendFS,
+ headFS,
+ tailFS,
+ concatFS,
+ consFS,
+ nilFS,
+
+ -- ** Outputing
+ hPutFS,
+
+ -- * LitStrings
LitString,
- mkLitString# -- :: Addr# -> LitString
+ mkLitString#,
+ strLength
) where
-- This #define suppresses the "import FastString" that
#define COMPILING_FAST_STRING
#include "HsVersions.h"
-#if __GLASGOW_HASKELL__ < 503
-import PrelIOBase ( IO(..) )
-#else
-import GHC.IOBase ( IO(..) )
-#endif
+import Encoding
-import PrimPacked
+import Foreign
+import Foreign.C
import GLAEXTS
import UNSAFE_IO ( unsafePerformIO )
import MONAD_ST ( stToIO )
import DATA_IOREF ( IORef, newIORef, readIORef, writeIORef )
+import System.IO ( hPutBuf )
-#if __GLASGOW_HASKELL__ < 503
-import PrelArr ( STArray(..), newSTArray )
-#else
import GHC.Arr ( STArray(..), newSTArray )
-#endif
-
-#if __GLASGOW_HASKELL__ >= 504
-import GHC.IOBase
-import GHC.Handle
-import Foreign.C
-#else
-import IOExts ( hPutBufBAFull )
-#endif
+import GHC.IOBase ( IO(..) )
import IO
-import Char ( chr, ord )
#define hASH_TBL_SIZE 4091
-\end{code}
-@FastString@s are packed representations of strings
-with a unique id for fast comparisons. The unique id
-is assigned when creating the @FastString@, using
-a hash table to map from the character string representation
-to the unique ID.
-\begin{code}
-data FastString
- = FastString -- packed repr. on the heap.
- Int# -- unique id
- -- 0 => string literal, comparison
- -- will
- Int# -- length
- ByteArray# -- stuff
-
- | UnicodeStr -- if contains characters outside '\1'..'\xFF'
- Int# -- unique id
- [Int] -- character numbers
+{-|
+A 'FastString' is an array of bytes, hashed to support fast O(1)
+comparison. It is also associated with a character encoding, so that
+we know how to convert a 'FastString' to the local encoding, or to the
+Z-encoding used by the compiler internally.
-instance Eq FastString where
- -- shortcut for real FastStrings
- (FastString u1 _ _) == (FastString u2 _ _) = u1 ==# u2
- a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False }
+'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
+-}
+
+data FastString = FastString {
+ uniq :: {-# UNPACK #-} !Int, -- unique id
+ n_bytes :: {-# UNPACK #-} !Int, -- number of bytes
+ n_chars :: {-# UNPACK #-} !Int, -- number of chars
+ buf :: {-# UNPACK #-} !(ForeignPtr Word8),
+ enc :: FSEncoding
+ }
+
+data FSEncoding
+ = ZEncoded
+ -- including strings that don't need any encoding
+ | UTF8Encoded {-# UNPACK #-} !(IORef (Maybe FastString))
+ -- A UTF-8 string with a memoized Z-encoding
- (FastString u1 _ _) /= (FastString u2 _ _) = u1 /=# u2
- a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True }
+instance Eq FastString where
+ f1 == f2 = uniq f1 == uniq f2
instance Ord FastString where
-- Compares lexicographically, not by unique
instance Show FastString where
show fs = show (unpackFS fs)
-lengthFS :: FastString -> Int
-lengthFS (FastString _ l# _) = I# l#
-lengthFS (UnicodeStr _ s) = length s
-
-nullFastString :: FastString -> Bool
-nullFastString (FastString _ l# _) = l# ==# 0#
-nullFastString (UnicodeStr _ []) = True
-nullFastString (UnicodeStr _ (_:_)) = False
-
-unpackFS :: FastString -> String
-unpackFS (FastString _ l# ba#) = unpackNBytesBA (BA ba#) (I# l#)
-unpackFS (UnicodeStr _ s) = map chr s
-
-unpackIntFS :: FastString -> [Int]
-unpackIntFS (UnicodeStr _ s) = s
-unpackIntFS fs = map ord (unpackFS fs)
-
-appendFS :: FastString -> FastString -> FastString
-appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
-
-concatFS :: [FastString] -> FastString
-concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
-
-headFS :: FastString -> Char
-headFS (FastString _ l# ba#) =
- if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
-headFS (UnicodeStr _ (c:_)) = chr c
-headFS (UnicodeStr _ []) = error ("headFS: empty FS")
-
-headIntFS :: FastString -> Int
-headIntFS (UnicodeStr _ (c:_)) = c
-headIntFS fs = ord (headFS fs)
-
-indexFS :: FastString -> Int -> Char
-indexFS f i@(I# i#) =
- case f of
- FastString _ l# ba#
- | l# ># 0# && l# ># i# -> C# (indexCharArray# ba# i#)
- | otherwise -> error (msg (I# l#))
- UnicodeStr _ s -> chr (s!!i)
- where
- msg l = "indexFS: out of range: " ++ show (l,i)
-
-tailFS :: FastString -> FastString
-tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
-tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
-
-consFS :: Char -> FastString -> FastString
-consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
+cmpFS :: FastString -> FastString -> Ordering
+cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
+ if u1 == u2 then EQ else
+ let l = if l1 <= l2 then l1 else l2 in
+ inlinePerformIO $
+ withForeignPtr buf1 $ \p1 ->
+ withForeignPtr buf2 $ \p2 -> do
+ res <- memcmp p1 p2 l
+ case () of
+ _ | res < 0 -> return LT
+ | res == 0 -> if l1 == l2 then return EQ
+ else if l1 < l2 then return LT
+ else return GT
+ | otherwise -> return GT
-uniqueOfFS :: FastString -> Int#
-uniqueOfFS (FastString u# _ _) = u#
-uniqueOfFS (UnicodeStr u# _) = u#
+#ifndef __HADDOCK__
+foreign import ccall unsafe "ghc_memcmp"
+ memcmp :: Ptr a -> Ptr b -> Int -> IO Int
+#endif
-nilFS = mkFastString ""
-\end{code}
+-- -----------------------------------------------------------------------------
+-- Construction
+{-
Internally, the compiler will maintain a fast string symbol
table, providing sharing and fast comparison. Creation of
new @FastString@s then covertly does a lookup, re-using the
@FastString@ if there was a hit.
+-}
-Caution: mkFastStringUnicode assumes that if the string is in the
-table, it sits under the UnicodeStr constructor. Other mkFastString
-variants analogously assume the FastString constructor.
-
-\begin{code}
data FastStringTable =
FastStringTable
- Int#
+ {-# UNPACK #-} !Int
(MutableArray# RealWorld [FastString])
-type FastStringTableVar = IORef FastStringTable
-
-string_table :: FastStringTableVar
+string_table :: IORef FastStringTable
string_table =
- unsafePerformIO (
- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
- >>= \ (STArray _ _ arr#) ->
- newIORef (FastStringTable 0# arr#))
-
-lookupTbl :: FastStringTable -> Int# -> IO [FastString]
-lookupTbl (FastStringTable _ arr#) i# =
- IO ( \ s# ->
- readArray# arr# i# s#)
-
-updTbl :: FastStringTableVar -> FastStringTable -> Int# -> [FastString] -> IO ()
-updTbl fs_table_var (FastStringTable uid# arr#) i# ls =
- IO (\ s# -> case writeArray# arr# i# ls s# of { s2# ->
- (# s2#, () #) }) >>
- writeIORef fs_table_var (FastStringTable (uid# +# 1#) arr#)
+ unsafePerformIO $ do
+ (STArray _ _ arr#) <- stToIO (newSTArray (0::Int,hASH_TBL_SIZE) [])
+ newIORef (FastStringTable 0 arr#)
+
+lookupTbl :: FastStringTable -> Int -> IO [FastString]
+lookupTbl (FastStringTable _ arr#) (I# i#) =
+ IO $ \ s# -> readArray# arr# i# s#
+
+updTbl :: IORef FastStringTable -> FastStringTable -> Int -> [FastString] -> IO ()
+updTbl fs_table_var (FastStringTable uid arr#) (I# i#) ls = do
+ (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
+ writeIORef fs_table_var (FastStringTable (uid+1) arr#)
mkFastString# :: Addr# -> FastString
-mkFastString# a# =
- case strLength (Ptr a#) of { (I# len#) -> mkFastStringLen# a# len# }
+mkFastString# a# = mkFastStringBytes ptr (strLength ptr)
+ where ptr = Ptr a#
-mkFastStringLen# :: Addr# -> Int# -> FastString
-mkFastStringLen# a# len# =
- unsafePerformIO (
- readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
+mkFastStringBytes :: Ptr Word8 -> Int -> FastString
+mkFastStringBytes ptr len = unsafePerformIO $ do
+ ft@(FastStringTable uid tbl#) <- readIORef string_table
let
- h = hashStr a# len#
- in
--- _trace ("hashed: "++show (I# h)) $
- lookupTbl ft h >>= \ lookup_result ->
+ h = hashStr ptr len
+ add_it ls = do
+ fs <- copyNewFastString uid ptr len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
+ --
+ lookup_result <- lookupTbl ft h
case lookup_result of
- [] ->
- -- no match, add it to table by copying out the
- -- the string into a ByteArray
- -- _trace "empty bucket" $
- case copyPrefixStr a# (I# len#) of
- BA barr# ->
- let f_str = FastString uid# len# barr# in
- updTbl string_table ft h [f_str] >>
- ({- _trace ("new: " ++ show f_str) $ -} return f_str)
- ls ->
- -- non-empty `bucket', scan the list looking
- -- entry with same length and compare byte by byte.
- -- _trace ("non-empty bucket"++show ls) $
- case bucket_match ls len# a# of
- Nothing ->
- case copyPrefixStr a# (I# len#) of
- BA barr# ->
- let f_str = FastString uid# len# barr# in
- updTbl string_table ft h (f_str:ls) >>
- ( {- _trace ("new: " ++ show f_str) $ -} return f_str)
- Just v -> {- _trace ("re-use: "++show v) $ -} return v)
- where
- bucket_match [] _ _ = Nothing
- bucket_match (v@(FastString _ l# ba#):ls) len# a# =
- if len# ==# l# && eqStrPrefix a# ba# l# then
- Just v
- else
- bucket_match ls len# a#
- bucket_match (UnicodeStr _ _ : ls) len# a# =
- bucket_match ls len# a#
-
-mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
-mkFastSubStringBA# barr# start# len# =
- unsafePerformIO (
- readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
+ [] -> add_it []
+ ls -> do
+ b <- bucket_match ls len ptr
+ case b of
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
+
+mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
+mkZFastStringBytes ptr len = unsafePerformIO $ do
+ ft@(FastStringTable uid tbl#) <- readIORef string_table
let
- h = hashSubStrBA barr# start# len#
- in
--- _trace ("hashed(b): "++show (I# h)) $
- lookupTbl ft h >>= \ lookup_result ->
+ h = hashStr ptr len
+ add_it ls = do
+ fs <- copyNewZFastString uid ptr len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
+ --
+ lookup_result <- lookupTbl ft h
case lookup_result of
- [] ->
- -- no match, add it to table by copying out the
- -- the string into a ByteArray
- -- _trace "empty bucket(b)" $
- case copySubStrBA (BA barr#) (I# start#) (I# len#) of
- BA ba# ->
- let f_str = FastString uid# len# ba# in
- updTbl string_table ft h [f_str] >>
- -- _trace ("new(b): " ++ show f_str) $
- return f_str
- ls ->
- -- non-empty `bucket', scan the list looking
- -- entry with same length and compare byte by byte.
- -- _trace ("non-empty bucket(b)"++show ls) $
- case bucket_match ls start# len# barr# of
- Nothing ->
- case copySubStrBA (BA barr#) (I# start#) (I# len#) of
- BA ba# ->
- let f_str = FastString uid# len# ba# in
- updTbl string_table ft h (f_str:ls) >>
- -- _trace ("new(b): " ++ show f_str) $
- return f_str
- Just v ->
- -- _trace ("re-use(b): "++show v) $
- return v
- )
- where
- bucket_match [] _ _ _ = Nothing
- bucket_match (v:ls) start# len# ba# =
- case v of
- FastString _ l# barr# ->
- if len# ==# l# && eqStrPrefixBA barr# ba# start# len# then
- Just v
- else
- bucket_match ls start# len# ba#
- UnicodeStr _ _ -> bucket_match ls start# len# ba#
-
-mkFastStringUnicode :: [Int] -> FastString
-mkFastStringUnicode s =
- unsafePerformIO (
- readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) ->
+ [] -> add_it []
+ ls -> do
+ b <- bucket_match ls len ptr
+ case b of
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
+
+-- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
+-- between this and 'mkFastStringBytes' is that we don't have to copy
+-- the bytes if the string is new to the table.
+mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
+mkFastStringForeignPtr ptr fp len = do
+ ft@(FastStringTable uid tbl#) <- readIORef string_table
+-- _trace ("hashed: "++show (I# h)) $
let
- h = hashUnicode s 0#
- in
--- _trace ("hashed(b): "++show (I# h)) $
- lookupTbl ft h >>= \ lookup_result ->
+ h = hashStr ptr len
+ add_it ls = do
+ fs <- mkNewFastString uid ptr fp len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
+ --
+ lookup_result <- lookupTbl ft h
+ case lookup_result of
+ [] -> add_it []
+ ls -> do
+ b <- bucket_match ls len ptr
+ case b of
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
+
+mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
+mkZFastStringForeignPtr ptr fp len = do
+ ft@(FastStringTable uid tbl#) <- readIORef string_table
+-- _trace ("hashed: "++show (I# h)) $
+ let
+ h = hashStr ptr len
+ add_it ls = do
+ fs <- mkNewZFastString uid ptr fp len
+ updTbl string_table ft h (fs:ls)
+ {- _trace ("new: " ++ show f_str) $ -}
+ return fs
+ --
+ lookup_result <- lookupTbl ft h
case lookup_result of
- [] ->
- -- no match, add it to table by copying out the
- -- the string into a [Int]
- let f_str = UnicodeStr uid# s in
- updTbl string_table ft h [f_str] >>
- -- _trace ("new(b): " ++ show f_str) $
- return f_str
- ls ->
- -- non-empty `bucket', scan the list looking
- -- entry with same length and compare byte by byte.
- -- _trace ("non-empty bucket(b)"++show ls) $
- case bucket_match ls of
- Nothing ->
- let f_str = UnicodeStr uid# s in
- updTbl string_table ft h (f_str:ls) >>
- -- _trace ("new(b): " ++ show f_str) $
- return f_str
- Just v ->
- -- _trace ("re-use(b): "++show v) $
- return v
- )
- where
- bucket_match [] = Nothing
- bucket_match (v@(UnicodeStr _ s'):ls) =
- if s' == s then Just v else bucket_match ls
- bucket_match (FastString _ _ _ : ls) = bucket_match ls
-
-mkFastStringNarrow :: String -> FastString
-mkFastStringNarrow str =
- case packString str of { (I# len#, BA frozen#) ->
- mkFastSubStringBA# frozen# 0# len#
- }
- {- 0-indexed array, len# == index to one beyond end of string,
- i.e., (0,1) => empty string. -}
+ [] -> add_it []
+ ls -> do
+ b <- bucket_match ls len ptr
+ case b of
+ Nothing -> add_it ls
+ Just v -> {- _trace ("re-use: "++show v) $ -} return v
-mkFastString :: String -> FastString
-mkFastString str = if all good str
- then mkFastStringNarrow str
- else mkFastStringUnicode (map ord str)
- where
- good c = c >= '\1' && c <= '\xFF'
-
-mkFastStringInt :: [Int] -> FastString
-mkFastStringInt str = if all good str
- then mkFastStringNarrow (map chr str)
- else mkFastStringUnicode str
- where
- good c = c >= 1 && c <= 0xFF
-
-mkFastSubString :: Addr# -> Int -> Int -> FastString
-mkFastSubString a# (I# start#) (I# len#) =
- mkFastStringLen# (a# `plusAddr#` start#) len#
-\end{code}
-\begin{code}
-hashStr :: Addr# -> Int# -> Int#
+-- | Creates a UTF-8 encoded 'FastString' from a 'String'
+mkFastString :: String -> FastString
+mkFastString str =
+ inlinePerformIO $ do
+ let l = utf8EncodedLength str
+ buf <- mallocForeignPtrBytes l
+ withForeignPtr buf $ \ptr -> do
+ utf8EncodeString ptr str
+ mkFastStringForeignPtr ptr buf l
+
+
+-- | Creates a Z-encoded 'FastString' from a 'String'
+mkZFastString :: String -> FastString
+mkZFastString str =
+ inlinePerformIO $ do
+ let l = Prelude.length str
+ buf <- mallocForeignPtrBytes l
+ withForeignPtr buf $ \ptr -> do
+ pokeCAString (castPtr ptr) str
+ mkZFastStringForeignPtr ptr buf l
+
+bucket_match [] _ _ = return Nothing
+bucket_match (v@(FastString _ l _ buf _):ls) len ptr
+ | len == l = do
+ b <- cmpStringPrefix ptr buf len
+ if b then return (Just v)
+ else bucket_match ls len ptr
+ | otherwise =
+ bucket_match ls len ptr
+
+mkNewFastString uid ptr fp len = do
+ ref <- newIORef Nothing
+ n_chars <- countUTF8Chars ptr len
+ return (FastString uid len n_chars fp (UTF8Encoded ref))
+
+mkNewZFastString uid ptr fp len = do
+ return (FastString uid len len fp ZEncoded)
+
+
+copyNewFastString uid ptr len = do
+ fp <- copyBytesToForeignPtr ptr len
+ ref <- newIORef Nothing
+ n_chars <- countUTF8Chars ptr len
+ return (FastString uid len n_chars fp (UTF8Encoded ref))
+
+copyNewZFastString uid ptr len = do
+ fp <- copyBytesToForeignPtr ptr len
+ return (FastString uid len len fp ZEncoded)
+
+
+copyBytesToForeignPtr ptr len = do
+ fp <- mallocForeignPtrBytes len
+ withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
+ return fp
+
+cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
+cmpStringPrefix ptr fp len =
+ withForeignPtr fp $ \ptr' -> do
+ r <- memcmp ptr ptr' len
+ return (r == 0)
+
+
+hashStr :: Ptr Word8 -> Int -> Int
-- use the Addr to produce a hash value between 0 & m (inclusive)
-hashStr a# len# = loop 0# 0#
+hashStr (Ptr a#) (I# len#) = loop 0# 0#
where
- loop h n | n ==# len# = h
+ loop h n | n ==# len# = I# h
| otherwise = loop h2 (n +# 1#)
where c = ord# (indexCharOffAddr# a# n)
h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
-hashSubStrBA :: ByteArray# -> Int# -> Int# -> Int#
- -- use the byte array to produce a hash value between 0 & m (inclusive)
-hashSubStrBA ba# start# len# = loop 0# 0#
- where
- loop h n | n ==# len# = h
- | otherwise = loop h2 (n +# 1#)
- where c = ord# (indexCharArray# ba# (start# +# n))
- h2 = (c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#
+-- -----------------------------------------------------------------------------
+-- Operations
-hashUnicode :: [Int] -> Int# -> Int#
-hashUnicode [] h = h
-hashUnicode (I# c : cs) h = hashUnicode cs ((c +# (h *# 128#)) `remInt#` hASH_TBL_SIZE#)
-\end{code}
+-- | Returns the length of the 'FastString' in characters
+lengthFS :: FastString -> Int
+lengthFS f = n_chars f
-\begin{code}
-cmpFS :: FastString -> FastString -> Ordering
-cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
- else compare s1 s2
-cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
-cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
-cmpFS (FastString u1# l1# b1#) (FastString u2# l2# b2#) =
- if u1# ==# u2# then EQ else
- let l# = if l1# <=# l2# then l1# else l2# in
- unsafePerformIO (
- memcmp b1# b2# l# >>= \ (I# res) ->
- return (
- if res <# 0# then LT
- else if res ==# 0# then
- if l1# ==# l2# then EQ
- else if l1# <# l2# then LT else GT
- else GT
- ))
+-- | Returns 'True' if the 'FastString' is Z-encoded
+isZEncoded :: FastString -> Bool
+isZEncoded fs | ZEncoded <- enc fs = True
+ | otherwise = False
-#ifndef __HADDOCK__
-foreign import ccall unsafe "ghc_memcmp"
- memcmp :: ByteArray# -> ByteArray# -> Int# -> IO Int
-#endif
+-- | Returns 'True' if the 'FastString' is empty
+nullFS :: FastString -> Bool
+nullFS f = n_bytes f == 0
+
+-- | unpacks and decodes the FastString
+unpackFS :: FastString -> String
+unpackFS (FastString _ n_bytes _ buf enc) =
+ inlinePerformIO $ withForeignPtr buf $ \ptr ->
+ case enc of
+ ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
+ UTF8Encoded _ -> utf8DecodeString ptr n_bytes
+
+bytesFS :: FastString -> [Word8]
+bytesFS (FastString _ n_bytes _ buf enc) =
+ inlinePerformIO $ withForeignPtr buf $ \ptr ->
+ peekArray n_bytes ptr
+
+-- | returns a Z-encoded version of a 'FastString'. This might be the
+-- original, if it was already Z-encoded. The first time this
+-- function is applied to a particular 'FastString', the results are
+-- memoized.
+--
+zEncodeFS :: FastString -> FastString
+zEncodeFS fs@(FastString uid n_bytes _ fp enc) =
+ case enc of
+ ZEncoded -> fs
+ UTF8Encoded ref ->
+ inlinePerformIO $ do
+ m <- readIORef ref
+ case m of
+ Just fs -> return fs
+ Nothing -> do
+ let efs = mkZFastString (zEncodeString (unpackFS fs))
+ writeIORef ref (Just efs)
+ return efs
+
+appendFS :: FastString -> FastString -> FastString
+appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
+
+concatFS :: [FastString] -> FastString
+concatFS ls = mkFastString (Prelude.concat (map unpackFS ls)) -- ToDo: do better
+
+headFS :: FastString -> Char
+headFS (FastString _ n_bytes _ buf enc) =
+ inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+ case enc of
+ ZEncoded -> do
+ w <- peek (castPtr ptr)
+ return (castCCharToChar w)
+ UTF8Encoded _ ->
+ return (fst (utf8DecodeChar ptr))
+
+tailFS :: FastString -> FastString
+tailFS (FastString _ n_bytes _ buf enc) =
+ inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+ case enc of
+ ZEncoded -> do
+ return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes - 1)
+ UTF8Encoded _ -> do
+ let (_,ptr') = utf8DecodeChar ptr
+ let off = ptr' `minusPtr` ptr
+ return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes - off)
+
+consFS :: Char -> FastString -> FastString
+consFS c fs = mkFastString (c : unpackFS fs)
+
+uniqueOfFS :: FastString -> Int#
+uniqueOfFS (FastString (I# u#) _ _ _ _) = u#
+
+nilFS = mkFastString ""
-- -----------------------------------------------------------------------------
-- Outputting 'FastString's
-#if __GLASGOW_HASKELL__ >= 504
-
--- this is our own version of hPutBuf for FastStrings, because in
--- 5.04+ we don't have mutable byte arrays and therefore hPutBufBA.
--- The closest is hPutArray in Data.Array.IO, but that does some extra
--- range checks that we want to avoid here.
-
-foreign import ccall unsafe "__hscore_memcpy_dst_off"
- memcpy_baoff_ba :: RawBuffer -> Int -> RawBuffer -> CSize -> IO (Ptr ())
-
-hPutFS handle (FastString _ l# ba#)
- | l# ==# 0# = return ()
- | otherwise
- = do wantWritableHandle "hPutFS" handle $
- \ handle_@Handle__{ haFD=fd, haBuffer=ref, haIsStream=stream } -> do
-
- old_buf@Buffer{ bufBuf=old_raw, bufRPtr=r, bufWPtr=w, bufSize=size }
- <- readIORef ref
-
- let count = I# l#
- raw = unsafeCoerce# ba# :: MutableByteArray# RealWorld
-
- -- enough room in handle buffer?
- if (size - w > count)
- -- There's enough room in the buffer:
- -- just copy the data in and update bufWPtr.
- then do memcpy_baoff_ba old_raw w raw (fromIntegral count)
- writeIORef ref old_buf{ bufWPtr = w + count }
- return ()
-
- -- else, we have to flush
- else do flushed_buf <- flushWriteBuffer fd stream old_buf
- writeIORef ref flushed_buf
- let this_buf =
- Buffer{ bufBuf=raw, bufState=WriteBuffer,
- bufRPtr=0, bufWPtr=count, bufSize=count }
- flushWriteBuffer fd stream this_buf
- return ()
-
-#else
-
-hPutFS :: Handle -> FastString -> IO ()
-hPutFS handle (FastString _ l# ba#)
- | l# ==# 0# = return ()
- | otherwise = do mba <- stToIO $ unsafeThawByteArray (ByteArray (bot::Int) bot ba#)
- hPutBufBAFull handle mba (I# l#)
- where
- bot = error "hPutFS.ba"
-
-#endif
+-- |Outputs a 'FastString' with /no decoding at all/, that is, you
+-- get the actual bytes in the 'FastString' written to the 'Handle'.
+hPutFS handle (FastString _ len _ fp _)
+ | len == 0 = return ()
+ | otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
--- ONLY here for debugging the NCG (so -ddump-stix works for string
--- literals); no idea if this is really necessary. JRS, 010131
-hPutFS handle (UnicodeStr _ is)
- = hPutStr handle ("(UnicodeStr " ++ show is ++ ")")
+-- ToDo: we'll probably want an hPutFSLocal, or something, to output
+-- in the current locale's encoding (for error messages and suchlike).
-- -----------------------------------------------------------------------------
-- LitStrings, here for convenience only.
mkLitString# :: Addr# -> LitString
mkLitString# a# = Ptr a#
+
+foreign import ccall unsafe "ghc_strlen"
+ strLength :: Ptr () -> Int
+
+-- -----------------------------------------------------------------------------
+-- under the carpet
+
+-- Just like unsafePerformIO, but we inline it.
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+
+pokeCAString :: Ptr CChar -> String -> IO ()
+pokeCAString ptr str =
+ let
+ go [] n = pokeElemOff ptr n 0
+ go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
+ in
+ go str 0
+
\end{code}
(+#), (-#), (*#), quotFastInt, negateFastInt,
(==#), (<#), (<=#), (>=#), (>#),
- FastBool, fastBool, isFastTrue, fastOr
+ FastBool, fastBool, isFastTrue, fastOr, fastAnd
) where
#include "HsVersions.h"
import BufWrite
import FastString
-import PrimPacked ( strLength )
import GLAEXTS
import Numeric (fromRat)
import IO
-#if __GLASGOW_HASKELL__ < 503
-import IOExts ( hPutBufFull )
-#else
import System.IO ( hPutBuf )
-#endif
-#if __GLASGOW_HASKELL__ < 503
-import PrelBase ( unpackCString# )
-#else
import GHC.Base ( unpackCString# )
-#endif
-
-import PrimPacked ( Ptr(..) )
+import GHC.Ptr ( Ptr(..) )
-- Don't import Util( assertPanic ) because it makes a loop in the module structure
+++ /dev/null
-%
-% (c) The GRASP/AQUA Project, Glasgow University, 1997-1998
-%
-\section{Basic ops on packed representations}
-
-Some basic operations for working on packed representations of series
-of bytes (character strings). Used by the interface lexer input
-subsystem, mostly.
-
-\begin{code}
-{-# OPTIONS -optc-DNON_POSIX_SOURCE #-}
-
-module PrimPacked (
- Ptr(..), nullPtr, plusAddr#,
- BA(..),
- packString, -- :: String -> (Int, BA)
- unpackNBytesBA, -- :: BA -> Int -> [Char]
- strLength, -- :: Ptr CChar -> Int
- copyPrefixStr, -- :: Addr# -> Int -> BA
- copySubStrBA, -- :: BA -> Int -> Int -> BA
- eqStrPrefix, -- :: Addr# -> ByteArray# -> Int# -> Bool
- eqStrPrefixBA, -- :: ByteArray# -> ByteArray# -> Int# -> Int# -> Bool
- ) where
-
--- This #define suppresses the "import FastString" that
--- HsVersions otherwise produces
-#define COMPILING_FAST_STRING
-#include "HsVersions.h"
-
-import GLAEXTS
-import UNSAFE_IO ( unsafePerformIO )
-
-import MONAD_ST
-import Foreign
-
-#if __GLASGOW_HASKELL__ < 503
-import PrelST
-#else
-import GHC.ST
-#endif
-
-#if __GLASGOW_HASKELL__ >= 504
-import GHC.Ptr ( Ptr(..) )
-#elif __GLASGOW_HASKELL__ >= 500
-import Ptr ( Ptr(..) )
-#endif
-
-#if __GLASGOW_HASKELL__ < 504
-import PrelIOBase ( IO(..) )
-#else
-import GHC.IOBase ( IO(..) )
-#endif
-\end{code}
-
-Compatibility: 4.08 didn't have the Ptr type.
-
-\begin{code}
-#if __GLASGOW_HASKELL__ <= 408
-data Ptr a = Ptr Addr# deriving (Eq, Ord)
-
-nullPtr :: Ptr a
-nullPtr = Ptr (int2Addr# 0#)
-#endif
-
-#if __GLASGOW_HASKELL__ <= 500
--- plusAddr# is a primop in GHC > 5.00
-plusAddr# :: Addr# -> Int# -> Addr#
-plusAddr# a# i# = int2Addr# (addr2Int# a# +# i#)
-#endif
-\end{code}
-
-Wrapper types for bytearrays
-
-\begin{code}
-data BA = BA ByteArray#
-data MBA s = MBA (MutableByteArray# s)
-\end{code}
-
-\begin{code}
-packString :: String -> (Int, BA)
-packString str = (l, arr)
- where
- l@(I# length#) = length str
-
- arr = runST (do
- ch_array <- new_ps_array length#
- -- fill in packed string from "str"
- fill_in ch_array 0# str
- -- freeze the puppy:
- freeze_ps_array ch_array length#
- )
-
- fill_in :: MBA s -> Int# -> [Char] -> ST s ()
- fill_in arr_in# idx [] =
- return ()
- fill_in arr_in# idx (C# c : cs) =
- write_ps_array arr_in# idx c >>
- fill_in arr_in# (idx +# 1#) cs
-\end{code}
-
-Unpacking a string
-
-\begin{code}
-unpackNBytesBA :: BA -> Int -> [Char]
-unpackNBytesBA (BA bytes) (I# len)
- = unpack 0#
- where
- unpack nh
- | nh >=# len = []
- | otherwise = C# ch : unpack (nh +# 1#)
- where
- ch = indexCharArray# bytes nh
-\end{code}
-
-Copying a char string prefix into a byte array.
-
-\begin{code}
-copyPrefixStr :: Addr# -> Int -> BA
-copyPrefixStr a# len@(I# length#) = copy' length#
- where
- copy' length# = runST (do
- {- allocate an array that will hold the string
- -}
- ch_array <- new_ps_array length#
- {- Revert back to Haskell-only solution for the moment.
- _ccall_ memcpy ch_array (A# a) len >>= \ () ->
- write_ps_array ch_array length# (chr# 0#) >>
- -}
- -- fill in packed string from "addr"
- fill_in ch_array 0#
- -- freeze the puppy:
- freeze_ps_array ch_array length#
- )
-
- fill_in :: MBA s -> Int# -> ST s ()
- fill_in arr_in# idx
- | idx ==# length#
- = return ()
- | otherwise
- = case (indexCharOffAddr# a# idx) of { ch ->
- write_ps_array arr_in# idx ch >>
- fill_in arr_in# (idx +# 1#) }
-\end{code}
-
-Copying out a substring, assume a 0-indexed string:
-(and positive lengths, thank you).
-
-\begin{code}
-#ifdef UNUSED
-copySubStr :: Addr# -> Int -> Int -> BA
-copySubStr a# (I# start#) length =
- copyPrefixStr (a# `plusAddr#` start#) length
-#endif
-
-copySubStrBA :: BA -> Int -> Int -> BA
-copySubStrBA (BA barr#) (I# start#) len@(I# length#) = ba
- where
- ba = runST (do
- -- allocate an array that will hold the string
- ch_array <- new_ps_array length#
- -- fill in packed string from "addr"
- fill_in ch_array 0#
- -- freeze the puppy:
- freeze_ps_array ch_array length#
- )
-
- fill_in :: MBA s -> Int# -> ST s ()
- fill_in arr_in# idx
- | idx ==# length#
- = return ()
- | otherwise
- = case (indexCharArray# barr# (start# +# idx)) of { ch ->
- write_ps_array arr_in# idx ch >>
- fill_in arr_in# (idx +# 1#) }
-\end{code}
-
-(Very :-) ``Specialised'' versions of some CharArray things...
-[Copied from PackBase; no real reason -- UGH]
-
-\begin{code}
-new_ps_array :: Int# -> ST s (MBA s)
-write_ps_array :: MBA s -> Int# -> Char# -> ST s ()
-freeze_ps_array :: MBA s -> Int# -> ST s BA
-
-#if __GLASGOW_HASKELL__ < 411
-#define NEW_BYTE_ARRAY newCharArray#
-#else
-#define NEW_BYTE_ARRAY newPinnedByteArray#
-#endif
-
-new_ps_array size = ST $ \ s ->
- case (NEW_BYTE_ARRAY size s) of { (# s2#, barr# #) ->
- (# s2#, MBA barr# #) }
-
-write_ps_array (MBA barr#) n ch = ST $ \ s# ->
- case writeCharArray# barr# n ch s# of { s2# ->
- (# s2#, () #) }
-
--- same as unsafeFreezeByteArray
-freeze_ps_array (MBA arr#) len# = ST $ \ s# ->
- case unsafeFreezeByteArray# arr# s# of { (# s2#, frozen# #) ->
- (# s2#, BA frozen# #) }
-\end{code}
-
-
-Compare two equal-length strings for equality:
-
-\begin{code}
-eqStrPrefix :: Addr# -> ByteArray# -> Int# -> Bool
-eqStrPrefix a# barr# len# =
- inlinePerformIO $ do
- x <- memcmp_ba a# barr# (I# len#)
- return (x == 0)
-
-#ifdef UNUSED
-eqCharStrPrefix :: Addr# -> Addr# -> Int# -> Bool
-eqCharStrPrefix a1# a2# len# =
- inlinePerformIO $ do
- x <- memcmp a1# a2# (I# len#)
- return (x == 0)
-#endif
-
-eqStrPrefixBA :: ByteArray# -> ByteArray# -> Int# -> Int# -> Bool
-eqStrPrefixBA b1# b2# start# len# =
- inlinePerformIO $ do
- x <- memcmp_baoff_ba b2# (I# start#) b1# (I# len#)
- return (x == 0)
-
-#ifdef UNUSED
-eqCharStrPrefixBA :: Addr# -> ByteArray# -> Int# -> Int# -> Bool
-eqCharStrPrefixBA a# b2# start# len# =
- inlinePerformIO $ do
- x <- memcmp_baoff b2# (I# start#) a# (I# len#)
- return (x == 0)
-#endif
-\end{code}
-
-\begin{code}
--- Just like unsafePerformIO, but we inline it. This is safe when
--- there are no side effects, and improves performance.
-{-# INLINE inlinePerformIO #-}
-inlinePerformIO :: IO a -> a
-inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
-
-#if __GLASGOW_HASKELL__ <= 408
-strLength (Ptr a#) = ghc_strlen a#
-foreign import ccall unsafe "ghc_strlen"
- ghc_strlen :: Addr# -> Int
-#else
-foreign import ccall unsafe "ghc_strlen"
- strLength :: Ptr () -> Int
-#endif
-
-foreign import ccall unsafe "ghc_memcmp"
- memcmp :: Addr# -> Addr# -> Int -> IO Int
-
-foreign import ccall unsafe "ghc_memcmp"
- memcmp_ba :: Addr# -> ByteArray# -> Int -> IO Int
-
-foreign import ccall unsafe "ghc_memcmp_off"
- memcmp_baoff :: ByteArray# -> Int -> Addr# -> Int -> IO Int
-
-foreign import ccall unsafe "ghc_memcmp_off"
- memcmp_baoff_ba :: ByteArray# -> Int -> ByteArray# -> Int -> IO Int
-\end{code}
Buffers for scanning string input stored in external arrays.
\begin{code}
+{-# OPTIONS_GHC -O #-}
+-- always optimise this module, it's critical
+
module StringBuffer
(
StringBuffer(..),
-- non-abstract for vs\/HaskellService
-- * Creation\/destruction
- hGetStringBuffer, -- :: FilePath -> IO StringBuffer
- stringToStringBuffer, -- :: String -> IO StringBuffer
+ hGetStringBuffer,
+ stringToStringBuffer,
- -- * Lookup
- currentChar, -- :: StringBuffer -> Char
- prevChar, -- :: StringBuffer -> Char -> Char
- lookAhead, -- :: StringBuffer -> Int -> Char
- atEnd, -- :: StringBuffer -> Bool
+ -- * Inspection
+ nextChar,
+ currentChar,
+ prevChar,
+ atEnd,
- -- * Moving
- stepOn, stepOnBy,
+ -- * Moving and comparison
+ stepOn,
+ offsetBytes,
+ byteDiff,
-- * Conversion
- lexemeToString, -- :: StringBuffer -> Int -> String
- lexemeToFastString, -- :: StringBuffer -> Int -> FastString
+ lexemeToString,
+ lexemeToFastString,
-- * Parsing integers
parseInteger,
#include "HsVersions.h"
-import FastString
-import Panic
+import Encoding
+import FastString (FastString,mkFastString,mkFastStringBytes)
import GLAEXTS
import Foreign
-#if __GLASGOW_HASKELL__ < 503
-import PrelIOBase
-import PrelHandle
-#else
-import GHC.IOBase
-import GHC.IO ( slurpFile )
-#endif
+import GHC.IOBase ( IO(..) )
+import GHC.Base ( unsafeChr )
+
+import System.IO ( hGetBuf )
-import IO ( openFile, hFileSize, IOMode(ReadMode),
+import IO ( hFileSize, IOMode(ReadMode),
hClose )
#if __GLASGOW_HASKELL__ >= 601
import System.IO ( openBinaryFile )
import IOExts ( openFileEx, IOModeEx(..) )
#endif
-#if __GLASGOW_HASKELL__ < 503
-import IArray ( listArray )
-import ArrayBase ( UArray(..) )
-import MutableArray
-import IOExts ( hGetBufBA )
-#else
-import Data.Array.IArray ( listArray )
-import Data.Array.MArray ( unsafeFreeze, newArray_ )
-import Data.Array.Base ( UArray(..) )
-import Data.Array.IO ( IOArray, hGetArray )
-#endif
-
-import Char ( ord )
-
#if __GLASGOW_HASKELL__ < 601
openBinaryFile fp mode = openFileEx fp (BinaryMode mode)
#endif
+
-- -----------------------------------------------------------------------------
-- The StringBuffer type
--- A StringBuffer is a ByteArray# with a pointer into it. We also cache
--- the length of the ByteArray# for speed.
-
+-- |A StringBuffer is an internal pointer to a sized chunk of bytes.
+-- The bytes are intended to be *immutable*. There are pure
+-- operations to read the contents of a StringBuffer.
+--
+-- A StringBuffer may have a finalizer, depending on how it was
+-- obtained.
+--
data StringBuffer
- = StringBuffer
- ByteArray#
- Int# -- length
- Int# -- current pos
+ = StringBuffer {
+ buf :: {-# UNPACK #-} !(ForeignPtr Word8),
+ len :: {-# UNPACK #-} !Int, -- length
+ cur :: {-# UNPACK #-} !Int -- current pos
+ }
+ -- The buffer is assumed to be UTF-8 encoded, and furthermore
+ -- we add three '\0' bytes to the end as sentinels so that the
+ -- decoder doesn't have to check for overflow at every single byte
+ -- of a multibyte sequence.
instance Show StringBuffer where
- showsPrec _ s = showString "<stringbuffer>"
+ showsPrec _ s = showString "<stringbuffer("
+ . shows (len s) . showString "," . shows (cur s)
+ . showString ">"
-- -----------------------------------------------------------------------------
-- Creation / Destruction
hGetStringBuffer :: FilePath -> IO StringBuffer
hGetStringBuffer fname = do
h <- openBinaryFile fname ReadMode
- size <- hFileSize h
- let size_i@(I# sz#) = fromIntegral size
-#if __GLASGOW_HASKELL__ < 503
- arr <- stToIO (newCharArray (0,size_i-1))
- r <- hGetBufBA h arr size_i
-#else
- arr <- newArray_ (0,size_i-1)
- r <- if size_i == 0 then return 0 else hGetArray h arr size_i
-#endif
- hClose h
- if (r /= size_i)
+ size_i <- hFileSize h
+ let size = fromIntegral size_i
+ buf <- mallocForeignPtrArray (size+3)
+ withForeignPtr buf $ \ptr -> do
+ r <- if size == 0 then return 0 else hGetBuf h ptr size
+ hClose h
+ if (r /= size)
then ioError (userError "short read of file")
else do
-#if __GLASGOW_HASKELL__ < 503
- frozen <- stToIO (unsafeFreezeByteArray arr)
- case frozen of
- ByteArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#)
-#else
- frozen <- unsafeFreeze arr
- case frozen of
- UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#)
-#endif
+ pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+ -- sentinels for UTF-8 decoding
+ return (StringBuffer buf size 0)
-#if __GLASGOW_HASKELL__ >= 502
+stringToStringBuffer :: String -> IO StringBuffer
stringToStringBuffer str = do
- let size@(I# sz#) = length str
- arr = listArray (0,size-1) (map (fromIntegral.ord) str)
- :: UArray Int Word8
- case arr of
- UArray _ _ bytearr# -> return (StringBuffer bytearr# sz# 0#)
-#else
-stringToStringBuffer = panic "stringToStringBuffer: not implemented"
-#endif
+ let size = utf8EncodedLength str
+ buf <- mallocForeignPtrArray (size+3)
+ withForeignPtr buf $ \ptr -> do
+ utf8EncodeString ptr str
+ pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
+ -- sentinels for UTF-8 decoding
+ return (StringBuffer buf size 0)
-- -----------------------------------------------------------------------------
--- Lookup
-
-currentChar :: StringBuffer -> Char
-currentChar (StringBuffer arr# l# current#) =
- ASSERT(current# <# l#)
- C# (indexCharArray# arr# current#)
+-- Grab a character
+
+-- Getting our fingers dirty a little here, but this is performance-critical
+{-# INLINE nextChar #-}
+nextChar :: StringBuffer -> (Char,StringBuffer)
+nextChar (StringBuffer buf len (I# cur#)) =
+ inlinePerformIO $ do
+ withForeignPtr buf $ \(Ptr a#) -> do
+ case utf8DecodeChar# (a# `plusAddr#` cur#) of
+ (# c#, b# #) ->
+ let cur' = I# (b# `minusAddr#` a#) in
+ return (C# c#, StringBuffer buf len cur')
+
+currentChar :: StringBuffer -> Char
+currentChar = fst . nextChar
prevChar :: StringBuffer -> Char -> Char
-prevChar (StringBuffer _ _ 0#) deflt = deflt
-prevChar s deflt = lookAhead s (-1)
-
-lookAhead :: StringBuffer -> Int -> Char
-lookAhead (StringBuffer arr# l# c#) (I# i#) =
- ASSERT(off <# l# && off >=# 0#)
- C# (indexCharArray# arr# off)
- where
- off = c# +# i#
+prevChar (StringBuffer buf len 0) deflt = deflt
+prevChar (StringBuffer buf len cur) deflt =
+ inlinePerformIO $ do
+ withForeignPtr buf $ \p -> do
+ p' <- utf8PrevChar (p `plusPtr` cur)
+ return (fst (utf8DecodeChar p'))
-- -----------------------------------------------------------------------------
-- Moving
stepOn :: StringBuffer -> StringBuffer
-stepOn s = stepOnBy 1 s
+stepOn s = snd (nextChar s)
+
+offsetBytes :: Int -> StringBuffer -> StringBuffer
+offsetBytes i s = s { cur = cur s + i }
-stepOnBy :: Int -> StringBuffer -> StringBuffer
-stepOnBy (I# i#) (StringBuffer fo# l# c#) = StringBuffer fo# l# (c# +# i#)
+byteDiff :: StringBuffer -> StringBuffer -> Int
+byteDiff s1 s2 = cur s2 - cur s1
atEnd :: StringBuffer -> Bool
-atEnd (StringBuffer _ l# c#) = l# ==# c#
+atEnd (StringBuffer _ l c) = l == c
-- -----------------------------------------------------------------------------
-- Conversion
-lexemeToString :: StringBuffer -> Int -> String
+lexemeToString :: StringBuffer -> Int {-bytes-} -> String
lexemeToString _ 0 = ""
-lexemeToString (StringBuffer arr# _ current#) (I# len#) = unpack current#
- where
- end = current# +# len#
+lexemeToString (StringBuffer buf _ cur) bytes =
+ inlinePerformIO $
+ withForeignPtr buf $ \ptr ->
+ utf8DecodeString (ptr `plusPtr` cur) bytes
- unpack nh
- | nh >=# end = []
- | otherwise = C# ch : unpack (nh +# 1#)
- where
- ch = indexCharArray# arr# nh
-
-lexemeToFastString :: StringBuffer -> Int -> FastString
+lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
lexemeToFastString _ 0 = mkFastString ""
-lexemeToFastString (StringBuffer fo _ current#) (I# len) =
- mkFastSubStringBA# fo current# len
+lexemeToFastString (StringBuffer buf _ cur) len =
+ inlinePerformIO $
+ withForeignPtr buf $ \ptr ->
+ return $! mkFastStringBytes (ptr `plusPtr` cur) len
-- -----------------------------------------------------------------------------
-- Parsing integer strings in various bases
+byteOff :: StringBuffer -> Int -> Char
+byteOff (StringBuffer buf _ cur) i =
+ inlinePerformIO $ withForeignPtr buf $ \ptr -> do
+ w <- peek (ptr `plusPtr` (cur+i))
+ return (unsafeChr (fromIntegral (w::Word8)))
+
+-- | XXX assumes ASCII digits only
parseInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
parseInteger buf len radix to_int
= go 0 0
where go i x | i == len = x
- | otherwise = go (i+1) (x * radix + toInteger (to_int (lookAhead buf i)))
+ | otherwise = go (i+1) (x * radix + toInteger (to_int (byteOff buf i)))
+
+-- -----------------------------------------------------------------------------
+-- under the carpet
+
+-- Just like unsafePerformIO, but we inline it.
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+
\end{code}
+++ /dev/null
-Various Unicode-related utilities.
-
-\begin{code}
-module UnicodeUtil(
- stringToUtf8, intsToUtf8
- ) where
-
-#include "HsVersions.h"
-
-import Panic ( panic )
-import Char ( chr, ord )
-\end{code}
-
-\begin{code}
-stringToUtf8 :: String -> String
-stringToUtf8 s = intsToUtf8 (map ord s)
-
-intsToUtf8 :: [Int] -> String
-intsToUtf8 [] = ""
-intsToUtf8 (c:s)
- | c >= 1 && c <= 0x7F = chr c : intsToUtf8 s
- | c < 0 = panic ("charToUtf8 ("++show c++")")
- | c <= 0x7FF = chr (0xC0 + c `div` 0x40 ) :
- chr (0x80 + c `mod` 0x40) :
- intsToUtf8 s
- | c <= 0xFFFF = chr (0xE0 + c `div` 0x1000 ) :
- chr (0x80 + c `div` 0x40 `mod` 0x40) :
- chr (0x80 + c `mod` 0x40) :
- intsToUtf8 s
- | c <= 0x10FFFF = chr (0xF0 + c `div` 0x40000 ) :
- chr (0x80 + c `div` 0x1000 `mod` 0x40) :
- chr (0x80 + c `div` 0x40 `mod` 0x40) :
- chr (0x80 + c `mod` 0x40) :
- intsToUtf8 s
- | otherwise = panic ("charToUtf8 "++show c)
-\end{code}