From: Simon Marlow Date: Wed, 9 May 2007 10:38:34 +0000 (+0000) Subject: Store the constructor name in the info table in UTF-8 X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=9c54ee0c9e25617b2a9ad4cdd9d3a6354e2edc0f Store the constructor name in the info table in UTF-8 --- diff --git a/compiler/basicTypes/DataCon.lhs b/compiler/basicTypes/DataCon.lhs index a83d5f8..550be30 100644 --- a/compiler/basicTypes/DataCon.lhs +++ b/compiler/basicTypes/DataCon.lhs @@ -43,6 +43,11 @@ import ListSetOps import Util import Maybes import FastString +import PackageConfig +import Module + +import Data.Char +import Data.Word \end{code} @@ -518,19 +523,6 @@ mk_dict_strict_mark pred | isStrictPred pred = MarkedStrict dataConName :: DataCon -> Name dataConName = dcName --- generate a name in the format: package:Module.OccName --- and the unique identity of the name -dataConIdentity :: DataCon -> String -dataConIdentity dataCon - = prettyName - where - prettyName = pretty packageModule ++ "." ++ pretty occ - nm = getName dataCon - packageModule = nameModule nm - occ = getOccName dataCon - pretty :: Outputable a => a -> String - pretty = showSDoc . ppr - dataConTag :: DataCon -> ConTag dataConTag = dcTag @@ -694,6 +686,19 @@ dataConRepArgTys :: DataCon -> [Type] dataConRepArgTys dc = dcRepArgTys dc \end{code} +The string :. identifying a constructor, which is attached +to its info table and used by the GHCi debugger and the heap profiler. We want +this string to be UTF-8, so we get the bytes directly from the FastStrings. + +\begin{code} +dataConIdentity :: DataCon -> [Word8] +dataConIdentity dc = bytesFS (packageIdFS (modulePackageId mod)) ++ + fromIntegral (ord ':') : bytesFS (moduleNameFS (moduleName mod)) ++ + fromIntegral (ord '.') : bytesFS (occNameFS (nameOccName name)) + where name = dataConName dc + mod = nameModule name +\end{code} + \begin{code} isTupleCon :: DataCon -> Bool diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 0d6925b..4ba4061 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -43,12 +43,18 @@ import Name import DataCon import Unique import StaticFlags +import FastString +import Packages +import Module import Maybes import Constants import Outputable +import Data.Char +import Data.Word + ------------------------------------------------------------------------- -- -- Generating the info table and code for a closure @@ -89,7 +95,7 @@ emitClosureCodeAndInfoTable cl_info args body ; conName <- if is_con - then do cstr <- mkStringCLit $ fromJust conIdentity + then do cstr <- mkByteStringCLit $ fromJust conIdentity return (makeRelativeRefTo info_lbl cstr) else return (mkIntCLit 0) @@ -111,7 +117,8 @@ emitClosureCodeAndInfoTable cl_info args body Just con -> -- Constructors don't have an SRT -- We keep the *zero-indexed* tag in the srt_len -- field of the info table. - (mkIntCLit 0, fromIntegral (dataConTagZ con), Just $ dataConIdentity con) + (mkIntCLit 0, fromIntegral (dataConTagZ con), + Just $ dataConIdentity con) Nothing -> -- Not a constructor let (label, len) = srtLabelAndLength srt info_lbl diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 804aeab..0e8d6c8 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -26,7 +26,7 @@ module CgUtils ( addToMem, addToMemE, mkWordCLit, - mkStringCLit, + mkStringCLit, mkByteStringCLit, packHalfWordsCLit, blankWord ) where diff --git a/compiler/ghci/ByteCodeItbls.lhs b/compiler/ghci/ByteCodeItbls.lhs index 9b2dac0..730e4de 100644 --- a/compiler/ghci/ByteCodeItbls.lhs +++ b/compiler/ghci/ByteCodeItbls.lhs @@ -117,7 +117,7 @@ make_constr_itbls cons , code = code #endif } - qNameCString <- newCString $ dataConIdentity dcon + qNameCString <- newArray0 0 $ dataConIdentity dcon let conInfoTbl = StgConInfoTable { conDesc = qNameCString, infoTable = itbl @@ -273,7 +273,7 @@ type HalfWord = Word16 #endif data StgConInfoTable = StgConInfoTable { - conDesc :: CString, + conDesc :: Ptr Word8, infoTable :: StgInfoTable } diff --git a/compiler/ghci/Linker.lhs b/compiler/ghci/Linker.lhs index afbd3b5..76e9f31 100644 --- a/compiler/ghci/Linker.lhs +++ b/compiler/ghci/Linker.lhs @@ -55,16 +55,15 @@ import DriverPhases import SrcLoc import UniqSet import Constants +import FastString -- Standard libraries import Control.Monad +import Data.Char import Data.IORef import Data.List -import Foreign.Ptr -import Foreign.C.Types -import Foreign.C.String -import Foreign.Storable +import Foreign import System.IO import System.Directory @@ -152,9 +151,10 @@ deleteFromLinkEnv to_remove new_pls = pls { closure_env = new_closure_env } writeIORef v_PersistentLinkerState new_pls --- | Given a data constructor, find its internal name. --- The info tables for data constructors have a field which records the source name --- of the constructor as a CString. The format is: +-- | Given a data constructor in the heap, find its Name. +-- The info tables for data constructors have a field which records +-- the source name of the constructor as a Ptr Word8 (UTF-8 encoded +-- string). The format is: -- -- Package:Module.Name -- @@ -166,11 +166,13 @@ dataConInfoPtrToName x = do theString <- ioToTcRn $ do let ptr = castPtr x :: Ptr StgInfoTable conDescAddress <- getConDescAddress ptr - str <- peekCString conDescAddress - return str + peekArray0 0 conDescAddress let (pkg, mod, occ) = parse theString - occName = mkOccName OccName.dataName occ - modName = mkModule (stringToPackageId pkg) (mkModuleName mod) + pkgFS = mkFastStringByteList pkg + modFS = mkFastStringByteList mod + occFS = mkFastStringByteList occ + occName = mkOccNameFS OccName.dataName occFS + modName = mkModule (fsToPackageId pkgFS) (mkModuleNameFS modFS) lookupOrig modName occName where @@ -215,7 +217,7 @@ dataConInfoPtrToName x = do in the memory location: info_table_ptr + info_table_size -} - getConDescAddress :: Ptr StgInfoTable -> IO (Ptr CChar) + getConDescAddress :: Ptr StgInfoTable -> IO (Ptr Word8) getConDescAddress ptr = do #ifdef GHCI_TABLES_NEXT_TO_CODE offsetToString <- peek $ ptr `plusPtr` (- wORD_SIZE) @@ -231,20 +233,21 @@ dataConInfoPtrToName x = do -- this is not the conventional way of writing Haskell names. We stick with -- convention, even though it makes the parsing code more troublesome. -- Warning: this code assumes that the string is well formed. - parse :: String -> (String, String, String) + parse :: [Word8] -> ([Word8], [Word8], [Word8]) parse input = ASSERT (all (>0) (map length [pkg, mod, occ])) (pkg, mod, occ) where - (pkg, rest1) = break (==':') input + dot = fromIntegral (ord '.') + (pkg, rest1) = break (== fromIntegral (ord ':')) input (mod, occ) - = (concat $ intersperse "." $ reverse modWords, occWord) + = (concat $ intersperse [dot] $ reverse modWords, occWord) where (modWords, occWord) = ASSERT (length rest1 > 0) (parseModOcc [] (tail rest1)) - parseModOcc :: [String] -> String -> ([String], String) + parseModOcc :: [[Word8]] -> [Word8] -> ([[Word8]], [Word8]) parseModOcc acc str - = case break (== '.') str of + = case break (== dot) str of (top, []) -> (acc, top) - (top, '.':bot) -> parseModOcc (top : acc) bot + (top, _:bot) -> parseModOcc (top : acc) bot getHValue :: HscEnv -> Name -> IO HValue diff --git a/compiler/utils/FastString.lhs b/compiler/utils/FastString.lhs index ea30779..26f687c 100644 --- a/compiler/utils/FastString.lhs +++ b/compiler/utils/FastString.lhs @@ -24,6 +24,7 @@ module FastString -- ** Construction mkFastString, mkFastStringBytes, + mkFastStringByteList, mkFastStringForeignPtr, mkFastString#, mkZFastString, @@ -275,6 +276,15 @@ mkFastString str = utf8EncodeString ptr str mkFastStringForeignPtr ptr buf l +-- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@ +mkFastStringByteList :: [Word8] -> FastString +mkFastStringByteList str = + inlinePerformIO $ do + let l = Prelude.length str + buf <- mallocForeignPtrBytes l + withForeignPtr buf $ \ptr -> do + pokeArray (castPtr ptr) str + mkFastStringForeignPtr ptr buf l -- | Creates a Z-encoded 'FastString' from a 'String' mkZFastString :: String -> FastString