From: qrczak Date: Mon, 7 Aug 2000 23:37:24 +0000 (+0000) Subject: [project @ 2000-08-07 23:37:19 by qrczak] X-Git-Tag: Approximately_9120_patches~3905 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=4b17269854ccf10df8b3ca1711410a5ca439ea8a;p=ghc-hetmet.git [project @ 2000-08-07 23:37:19 by qrczak] Now Char, Char#, StgChar have 31 bits (physically 32). "foo"# is still an array of bytes. CharRep represents 32 bits (on a 64-bit arch too). There is also Int8Rep, used in those places where bytes were originally meant. readCharArray, indexCharOffAddr etc. still use bytes. Storable and {I,M}Array use wide Chars. In future perhaps all sized integers should be primitive types. Then some usages of indexing primops scattered through the code could be changed to then-available Int8 ones, and then Char variants of primops could be made wide (other usages that handle text should use conversion that will be provided later). I/O and _ccall_ arguments assume ISO-8859-1. UTF-8 is internally used for string literals (only). Z-encoding is ready for Unicode identifiers. Ranges of intlike and charlike closures are more easily configurable. I've probably broken nativeGen/MachCode.lhs:chrCode for Alpha but I don't know the Alpha assembler to fix it (what is zapnot?). Generally I'm not sure if I've done the NCG changes right. This commit breaks the binary compatibility (of course). TODO: * is* and to{Lower,Upper} in Char (in progress). * Libraries for text conversion (in design / experiments), to be plugged to I/O and a higher level foreign library. * PackedString. * StringBuffer and accepting source in encodings other than ISO-8859-1. --- diff --git a/ghc/compiler/HsVersions.h b/ghc/compiler/HsVersions.h index b90e474..00714cd 100644 --- a/ghc/compiler/HsVersions.h +++ b/ghc/compiler/HsVersions.h @@ -155,13 +155,17 @@ import qualified FastString # define _NIL_ (FastString.mkFastString "") # define _CONS_ FastString.consFS # define _HEAD_ FastString.headFS +# define _HEAD_INT_ FastString.headIntFS # define _TAIL_ FastString.tailFS # define _LENGTH_ FastString.lengthFS # define _PK_ FastString.mkFastString +# define _PK_INT_ FastString.mkFastStringInt # define _UNPK_ FastString.unpackFS +# define _UNPK_INT_ FastString.unpackIntFS # define _APPEND_ `FastString.appendFS` # define _CONCAT_ FastString.concatFS #else +# error I think that FastString is now always used. If not, fix this. # define FAST_STRING String # define SLIT(x) (x) # define _CMP_STRING_ cmpString diff --git a/ghc/compiler/absCSyn/AbsCSyn.lhs b/ghc/compiler/absCSyn/AbsCSyn.lhs index eac8a27..5cf12fc 100644 --- a/ghc/compiler/absCSyn/AbsCSyn.lhs +++ b/ghc/compiler/absCSyn/AbsCSyn.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: AbsCSyn.lhs,v 1.32 2000/08/02 14:13:26 rrt Exp $ +% $Id: AbsCSyn.lhs,v 1.33 2000/08/07 23:37:19 qrczak Exp $ % \section[AbstractC]{Abstract C: the last stop before machine code} @@ -315,7 +315,7 @@ data CAddrMode | CCharLike CAddrMode -- The address of a static char-like closure for -- the specified character. It is guaranteed to be in - -- the range 0..255. + -- the range mIN_CHARLIKE..mAX_CHARLIKE | CIntLike CAddrMode -- The address of a static int-like closure for the -- specified small integer. It is guaranteed to be in diff --git a/ghc/compiler/absCSyn/CStrings.lhs b/ghc/compiler/absCSyn/CStrings.lhs index e231993..6f2a0e3 100644 --- a/ghc/compiler/absCSyn/CStrings.lhs +++ b/ghc/compiler/absCSyn/CStrings.lhs @@ -6,8 +6,7 @@ module CStrings( cSEP, pp_cSEP, - stringToC, charToC, pprFSInCStyle, pprStringInCStyle, - charToEasyHaskell + pprFSInCStyle, pprStringInCStyle ) where #include "HsVersions.h" @@ -36,64 +35,20 @@ pp_cSEP = char '_' \begin{code} pprFSInCStyle :: FAST_STRING -> SDoc -pprFSInCStyle fs = doubleQuotes (text (stringToC (_UNPK_ fs))) +-- Assumes it contains only characters '\0'..'\xFF'! +pprFSInCStyle fs = pprStringInCStyle (_UNPK_ fs) pprStringInCStyle :: String -> SDoc -pprStringInCStyle s = doubleQuotes (text (stringToC s)) - -stringToC :: String -> String --- Convert a string to the form required by C in a C literal string --- Tthe hassle is what to do w/ strings like "ESC 0"... -stringToC "" = "" -stringToC [c] = charToC c -stringToC (c:cs) - -- if we have something "octifiable" in "c", we'd better "octify" - -- the rest of the string, too. - = if (c < ' ' || c > '~') - then (charToC c) ++ (concat (map char_to_C cs)) - else (charToC c) ++ (stringToC cs) - where - char_to_C c | c == '\n' = "\\n" -- use C escapes when we can - | c == '\a' = "\\a" - | c == '\b' = "\\b" -- ToDo: chk some of these... - | c == '\r' = "\\r" - | c == '\t' = "\\t" - | c == '\f' = "\\f" - | c == '\v' = "\\v" - | otherwise = '\\' : (octify (ord c)) +pprStringInCStyle s = doubleQuotes (text (concatMap charToC s)) charToC :: Char -> String --- Convert a character to the form reqd in a C character literal -charToC c = if (c >= ' ' && c <= '~') -- non-portable... - then case c of - '\'' -> "\\'" - '\\' -> "\\\\" - '"' -> "\\\"" - '\n' -> "\\n" - '\a' -> "\\a" - '\b' -> "\\b" - '\r' -> "\\r" - '\t' -> "\\t" - '\f' -> "\\f" - '\v' -> "\\v" - _ -> [c] - else '\\' : (octify (ord c)) - -charToEasyHaskell :: Char -> String --- Convert a character to the form reqd in a Haskell character literal -charToEasyHaskell c - = if (c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z') - || (c >= '0' && c <= '9') - then [c] - else case c of - _ -> '\\' : show (ord c) - -octify :: Int -> String -octify n - = if n < 8 then - [chr (n + ord '0')] - else - octify (n `quot` 8) ++ [chr (n `rem` 8 + ord '0')] +charToC '\"' = "\\\"" +charToC '\'' = "\\\'" +charToC '\\' = "\\\\" +charToC c | c >= ' ' && c <= '~' = [c] + | c > '\xFF' = panic ("charToC "++show c) + | otherwise = ['\\', + chr (ord '0' + ord c `div` 64), + chr (ord '0' + ord c `div` 8 `mod` 8), + chr (ord '0' + ord c `mod` 8)] \end{code} - diff --git a/ghc/compiler/absCSyn/PprAbsC.lhs b/ghc/compiler/absCSyn/PprAbsC.lhs index d98048c..82431ab 100644 --- a/ghc/compiler/absCSyn/PprAbsC.lhs +++ b/ghc/compiler/absCSyn/PprAbsC.lhs @@ -38,7 +38,7 @@ import CmdLineOpts ( opt_SccProfilingOn, opt_GranMacros ) import CostCentre ( pprCostCentreDecl, pprCostCentreStackDecl ) import Costs ( costs, addrModeCosts, CostRes(..), Side(..) ) -import CStrings ( stringToC, pprCLabelString ) +import CStrings ( pprStringInCStyle, pprCLabelString ) import FiniteMap ( addToFM, emptyFM, lookupFM, FiniteMap ) import Literal ( Literal(..) ) import TyCon ( tyConDataCons ) @@ -498,8 +498,8 @@ pprAbsC stmt@(CClosureInfoAndCode cl_info slow maybe_fast cl_descr) _ type_str = pprSMRep (closureSMRep cl_info) - pp_descr = hcat [char '"', text (stringToC cl_descr), char '"'] - pp_type = hcat [char '"', text (stringToC (closureTypeDescr cl_info)), char '"'] + pp_descr = pprStringInCStyle cl_descr + pp_type = pprStringInCStyle (closureTypeDescr cl_info) pprAbsC stmt@(CClosureTbl tycon) _ = vcat ( @@ -1289,6 +1289,7 @@ pprUnionTag RetRep = char 'p' pprUnionTag CostCentreRep = panic "pprUnionTag:CostCentre?" pprUnionTag CharRep = char 'c' +pprUnionTag Int8Rep = ptext SLIT("i8") pprUnionTag IntRep = char 'i' pprUnionTag WordRep = char 'w' pprUnionTag AddrRep = char 'a' @@ -1534,9 +1535,8 @@ ppr_decls_Amode (CLit _) = returnTE (Nothing, Nothing) -- CIntLike must be a literal -- no decls ppr_decls_Amode (CIntLike int) = returnTE (Nothing, Nothing) --- CCharLike may have be arbitrary value -- may have decls -ppr_decls_Amode (CCharLike char) - = ppr_decls_Amode char +-- CCharLike too +ppr_decls_Amode (CCharLike char) = returnTE (Nothing, Nothing) -- now, the only place where we actually print temps/externs... ppr_decls_Amode (CTemp uniq kind) diff --git a/ghc/compiler/basicTypes/Literal.lhs b/ghc/compiler/basicTypes/Literal.lhs index b8f495e..d2f6509 100644 --- a/ghc/compiler/basicTypes/Literal.lhs +++ b/ghc/compiler/basicTypes/Literal.lhs @@ -27,7 +27,7 @@ import TysPrim ( charPrimTy, addrPrimTy, floatPrimTy, doublePrimTy, import PrimRep ( PrimRep(..) ) import Type ( Type, typePrimRep ) import PprType ( pprParendType ) -import CStrings ( charToC, charToEasyHaskell, pprFSInCStyle ) +import CStrings ( pprFSInCStyle ) import Outputable import Util ( thenCmp ) @@ -85,7 +85,7 @@ function applications, etc., etc., has not yet been done. data Literal = ------------------ -- First the primitive guys - MachChar Char + MachChar Int -- Char# At least 31 bits | MachStr FAST_STRING | MachAddr Integer -- Whatever this machine thinks is a "pointer" @@ -159,8 +159,8 @@ int2WordLit (MachInt i) | i < 0 = MachWord (1 + tARGET_MAX_WORD + i) -- (-1) ---> tARGET_MAX_WORD | otherwise = MachWord i -char2IntLit (MachChar c) = MachInt (toInteger (ord c)) -int2CharLit (MachInt i) = MachChar (chr (fromInteger i)) +char2IntLit (MachChar c) = MachInt (toInteger c) +int2CharLit (MachInt i) = MachChar (fromInteger i) float2IntLit (MachFloat f) = MachInt (truncate f) int2FloatLit (MachInt i) = MachFloat (fromInteger i) @@ -268,13 +268,13 @@ pprLit lit iface_style = ifaceStyle sty in case lit of - MachChar ch | code_style -> hcat [ptext SLIT("(C_)"), char '\'', - text (charToC ch), char '\''] - | iface_style -> char '\'' <> text (charToEasyHaskell ch) <> char '\'' - | otherwise -> text ['\'', ch, '\''] + MachChar ch | code_style -> hcat [ptext SLIT("(C_)"), text (show ch)] + | otherwise -> pprHsChar ch MachStr s | code_style -> pprFSInCStyle s - | otherwise -> pprFSAsString s + | otherwise -> pprHsString s + -- Warning: printing MachStr in code_style assumes it contains + -- only characters '\0'..'\xFF'! MachInt i | code_style && i == tARGET_MIN_INT -> parens (integer (i+1) <> text "-1") -- Avoid a problem whereby gcc interprets @@ -300,11 +300,11 @@ pprLit lit | otherwise -> ptext SLIT("__addr") <+> integer p MachLabel l | code_style -> ptext SLIT("(&") <> ptext l <> char ')' - | otherwise -> ptext SLIT("__label") <+> pprFSAsString l + | otherwise -> ptext SLIT("__label") <+> pprHsString l MachLitLit s ty | code_style -> ptext s | otherwise -> parens (hsep [ptext SLIT("__litlit"), - pprFSAsString s, + pprHsString s, pprParendType ty]) pprIntVal :: Integer -> SDoc @@ -337,7 +337,7 @@ Hash values should be zero or a positive integer. No negatives please. \begin{code} hashLiteral :: Literal -> Int -hashLiteral (MachChar c) = ord c + 1000 -- Keep it out of range of common ints +hashLiteral (MachChar c) = c + 1000 -- Keep it out of range of common ints hashLiteral (MachStr s) = hashFS s hashLiteral (MachAddr i) = hashInteger i hashLiteral (MachInt i) = hashInteger i diff --git a/ghc/compiler/basicTypes/MkId.lhs b/ghc/compiler/basicTypes/MkId.lhs index 3504caa..ff2f355 100644 --- a/ghc/compiler/basicTypes/MkId.lhs +++ b/ghc/compiler/basicTypes/MkId.lhs @@ -92,6 +92,8 @@ import Unique import Maybe ( isJust ) import Outputable import Util ( assoc ) +import UnicodeUtil ( stringToUtf8 ) +import Char ( ord ) \end{code} @@ -371,7 +373,7 @@ Similarly for newtypes unN = /\a -> \n:N -> coerce (a->a) n \begin{code} -mkRecordSelId tycon field_label unpack_id +mkRecordSelId tycon field_label unpack_id unpackUtf8_id -- Assumes that all fields with the same field label have the same type -- -- Annoyingly, we have to pass in the unpackCString# Id, because @@ -442,7 +444,16 @@ mkRecordSelId tycon field_label unpack_id error_expr = mkApps (Var rEC_SEL_ERROR_ID) [Type (unUsgTy field_tau), err_string] -- preserves invariant that type args are *not* usage-annotated on top. KSW 1999-04. - err_string = App (Var unpack_id) (Lit (MachStr (_PK_ full_msg))) + err_string + | all safeChar full_msg + = App (Var unpack_id) (Lit (MachStr (_PK_ full_msg))) + | otherwise + = App (Var unpackUtf8_id) (Lit (MachStr (_PK_ (stringToUtf8 (map ord full_msg))))) + where + safeChar c = c >= '\1' && c <= '\xFF' + -- TODO: Putting this Unicode stuff here is ugly. Find a better + -- generic place to make string literals. This logic is repeated + -- in DsUtils. full_msg = showSDoc (sep [text "No match in record selector", ppr sel_id]) \end{code} diff --git a/ghc/compiler/basicTypes/OccName.lhs b/ghc/compiler/basicTypes/OccName.lhs index e2da548..5eb623b 100644 --- a/ghc/compiler/basicTypes/OccName.lhs +++ b/ghc/compiler/basicTypes/OccName.lhs @@ -39,7 +39,7 @@ module OccName ( #include "HsVersions.h" -import Char ( isDigit, isAlpha, isUpper, isLower, ISALPHANUM, ord, chr, digitToInt, intToDigit ) +import Char ( isDigit, isAlpha, isUpper, isLower, ISALPHANUM, ord, chr, digitToInt ) import Util ( thenCmp ) import FiniteMap ( FiniteMap, emptyFM, lookupFM, addToFM, elemFM ) import Outputable @@ -438,8 +438,8 @@ The basic encoding scheme is this. * Most other printable characters translate to 'zx' or 'Zx' for some alphabetic character x -* The others translate as 'zxdd' where 'dd' is exactly two hexadecimal - digits for the ord of the character +* The others translate as 'znnnU' where 'nnn' is the decimal number + of the character Before After -------------------------- @@ -532,9 +532,7 @@ encode_ch '/' = "zs" encode_ch '*' = "zt" encode_ch '_' = "zu" encode_ch '%' = "zv" -encode_ch c = ['z', 'x', intToDigit hi, intToDigit lo] - where - (hi,lo) = ord c `quotRem` 16 +encode_ch c = 'z' : shows (ord c) "U" \end{code} Decode is used for user printing. @@ -577,14 +575,15 @@ decode_escape ('s' : rest) = '/' : decode rest decode_escape ('t' : rest) = '*' : decode rest decode_escape ('u' : rest) = '_' : decode rest decode_escape ('v' : rest) = '%' : decode rest -decode_escape ('x' : d1 : d2 : rest) = chr (digitToInt d1 * 16 + digitToInt d2) : decode rest -- Tuples are coded as Z23T +-- Characters not having a specific code are coded as z224U decode_escape (c : rest) | isDigit c = go (digitToInt c) rest where go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest go n ('T' : rest) = '(' : replicate n ',' ++ ')' : decode rest + go n ('U' : rest) = chr n : decode rest go n other = pprPanic "decode_escape" (ppr n <+> text (c:rest)) decode_escape (c : rest) = pprTrace "decode_escape" (char c) (decode rest) diff --git a/ghc/compiler/basicTypes/Unique.lhs b/ghc/compiler/basicTypes/Unique.lhs index abe3856..5c76485 100644 --- a/ghc/compiler/basicTypes/Unique.lhs +++ b/ghc/compiler/basicTypes/Unique.lhs @@ -177,7 +177,7 @@ module Unique ( trueDataConKey, unboundKey, unboxedConKey, - unpackCString2IdKey, + unpackCStringUtf8IdKey, unpackCStringAppendIdKey, unpackCStringFoldrIdKey, unpackCStringIdKey, @@ -630,7 +630,7 @@ realWorldPrimIdKey = mkPreludeMiscIdUnique 23 recConErrorIdKey = mkPreludeMiscIdUnique 24 recUpdErrorIdKey = mkPreludeMiscIdUnique 25 traceIdKey = mkPreludeMiscIdUnique 26 -unpackCString2IdKey = mkPreludeMiscIdUnique 27 +unpackCStringUtf8IdKey = mkPreludeMiscIdUnique 27 unpackCStringAppendIdKey = mkPreludeMiscIdUnique 28 unpackCStringFoldrIdKey = mkPreludeMiscIdUnique 29 unpackCStringIdKey = mkPreludeMiscIdUnique 30 diff --git a/ghc/compiler/codeGen/CgCon.lhs b/ghc/compiler/codeGen/CgCon.lhs index e04da6b..f14ecab 100644 --- a/ghc/compiler/codeGen/CgCon.lhs +++ b/ghc/compiler/codeGen/CgCon.lhs @@ -33,7 +33,8 @@ import CgUsages ( getRealSp, getVirtSp, setRealAndVirtualSp, getSpRelOffset ) import CgClosure ( cgTopRhsClosure ) import CgRetConv ( assignRegs ) -import Constants ( mAX_INTLIKE, mIN_INTLIKE, mIN_UPD_SIZE ) +import Constants ( mAX_INTLIKE, mIN_INTLIKE, mAX_CHARLIKE, mIN_CHARLIKE, + mIN_UPD_SIZE ) import CgHeapery ( allocDynClosure, inPlaceAllocDynClosure ) import CgTailCall ( performReturn, mkStaticAlgReturnCode, doTailCall, mkUnboxedTupleReturnCode ) @@ -143,6 +144,12 @@ buildDynCon binder cc con [] (mkConLFInfo con)) \end{code} +The following three paragraphs about @Char@-like and @Int@-like +closures are obsolete, but I don't understand the details well enough +to properly word them, sorry. I've changed the treatment of @Char@s to +be analogous to @Int@s: only a subset is preallocated, because @Char@ +has now 31 bits. Only literals are handled here. -- Qrczak + Now for @Char@-like closures. We generate an assignment of the address of the closure to a temporary. It would be possible simply to generate no code, and record the addressing mode in the environment, @@ -160,18 +167,22 @@ Because of this, we use can safely return an addressing mode. \begin{code} buildDynCon binder cc con [arg_amode] - - | maybeCharLikeCon con - = absC (CAssign temp_amode (CCharLike arg_amode)) `thenC` - returnFC temp_id_info - | maybeIntLikeCon con && in_range_int_lit arg_amode = returnFC (stableAmodeIdInfo binder (CIntLike arg_amode) (mkConLFInfo con)) where (temp_amode, temp_id_info) = newTempAmodeAndIdInfo binder (mkConLFInfo con) in_range_int_lit (CLit (MachInt val)) = val <= mAX_INTLIKE && val >= mIN_INTLIKE - in_range_int_lit other_amode = False + in_range_int_lit _other_amode = False + +buildDynCon binder cc con [arg_amode] + | maybeCharLikeCon con && in_range_char_lit arg_amode + = returnFC (stableAmodeIdInfo binder (CCharLike arg_amode) (mkConLFInfo con)) + where + (temp_amode, temp_id_info) = newTempAmodeAndIdInfo binder (mkConLFInfo con) + + in_range_char_lit (CLit (MachChar val)) = val <= mAX_CHARLIKE && val >= mIN_CHARLIKE + in_range_char_lit _other_amode = False \end{code} Now the general case. @@ -296,7 +307,6 @@ cgReturnDataCon con amodes -- do update in place... UpdateCode | not (isNullaryDataCon con) -- no nullary constructors, please - && not (maybeCharLikeCon con) -- no chars please (these are all static) && not (any isFollowableRep (map getAmodeRep amodes)) -- no ptrs please (generational gc...) && closureSize closure_info <= mIN_UPD_SIZE diff --git a/ghc/compiler/codeGen/CgRetConv.lhs b/ghc/compiler/codeGen/CgRetConv.lhs index 3b61312..4c0151e 100644 --- a/ghc/compiler/codeGen/CgRetConv.lhs +++ b/ghc/compiler/codeGen/CgRetConv.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP Project, Glasgow University, 1992-1998 % -% $Id: CgRetConv.lhs,v 1.23 2000/07/11 16:03:37 simonmar Exp $ +% $Id: CgRetConv.lhs,v 1.24 2000/08/07 23:37:20 qrczak Exp $ % \section[CgRetConv]{Return conventions for the code generator} @@ -81,6 +81,7 @@ dataReturnConvPrim Int64Rep = LongReg Int64Rep ILIT(1) dataReturnConvPrim Word64Rep = LongReg Word64Rep ILIT(1) dataReturnConvPrim AddrRep = VanillaReg AddrRep ILIT(1) dataReturnConvPrim CharRep = VanillaReg CharRep ILIT(1) +dataReturnConvPrim Int8Rep = VanillaReg Int8Rep ILIT(1) dataReturnConvPrim FloatRep = FloatReg ILIT(1) dataReturnConvPrim DoubleRep = DoubleReg ILIT(1) dataReturnConvPrim VoidRep = VoidReg diff --git a/ghc/compiler/deSugar/DsExpr.lhs b/ghc/compiler/deSugar/DsExpr.lhs index 94e1ec5..7dfb84a 100644 --- a/ghc/compiler/deSugar/DsExpr.lhs +++ b/ghc/compiler/deSugar/DsExpr.lhs @@ -148,7 +148,7 @@ dsExpr (HsLitOut (HsString s) _) | _LENGTH_ s == 1 = let - the_char = mkConApp charDataCon [mkLit (MachChar (_HEAD_ s))] + the_char = mkConApp charDataCon [mkLit (MachChar (_HEAD_INT_ s))] the_nil = mkNilExpr charTy the_cons = mkConsExpr charTy the_char the_nil in diff --git a/ghc/compiler/deSugar/DsUtils.lhs b/ghc/compiler/deSugar/DsUtils.lhs index c96665f..bf63c5f 100644 --- a/ghc/compiler/deSugar/DsUtils.lhs +++ b/ghc/compiler/deSugar/DsUtils.lhs @@ -70,8 +70,9 @@ import TysWiredIn ( nilDataCon, consDataCon, ) import BasicTypes ( Boxity(..) ) import UniqSet ( mkUniqSet, minusUniqSet, isEmptyUniqSet, UniqSet ) -import Unique ( unpackCStringIdKey, unpackCString2IdKey ) +import Unique ( unpackCStringIdKey, unpackCStringUtf8IdKey ) import Outputable +import UnicodeUtil ( stringToUtf8 ) \end{code} @@ -123,7 +124,7 @@ tidyLitPat lit lit_ty default_pat mk_list (HsString s) = foldr (\c pat -> ConPat consDataCon lit_ty [] [] [mk_char_lit c,pat]) - (ConPat nilDataCon lit_ty [] [] []) (_UNPK_ s) + (ConPat nilDataCon lit_ty [] [] []) (_UNPK_INT_ s) mk_char_lit c = ConPat charDataCon charTy [] [] [LitPat (HsCharPrim c) charPrimTy] \end{code} @@ -390,20 +391,19 @@ mkStringLit str = mkStringLitFS (_PK_ str) mkStringLitFS :: FAST_STRING -> DsM CoreExpr mkStringLitFS str - | any is_NUL (_UNPK_ str) - = -- Must cater for NULs in literal string - dsLookupGlobalValue unpackCString2IdKey `thenDs` \ unpack_id -> - returnDs (mkApps (Var unpack_id) - [Lit (MachStr str), - mkIntLitInt (_LENGTH_ str)]) - - | otherwise - = -- No NULs in the string + | all safeChar chars + = dsLookupGlobalValue unpackCStringIdKey `thenDs` \ unpack_id -> returnDs (App (Var unpack_id) (Lit (MachStr str))) + | otherwise + = + dsLookupGlobalValue unpackCStringUtf8IdKey `thenDs` \ unpack_id -> + returnDs (App (Var unpack_id) (Lit (MachStr (_PK_ (stringToUtf8 chars))))) + where - is_NUL c = c == '\0' + chars = _UNPK_INT_ str + safeChar c = c >= 1 && c <= 0xFF \end{code} %************************************************************************ diff --git a/ghc/compiler/hsSyn/HsBasic.lhs b/ghc/compiler/hsSyn/HsBasic.lhs index d2721ae..11558f7 100644 --- a/ghc/compiler/hsSyn/HsBasic.lhs +++ b/ghc/compiler/hsSyn/HsBasic.lhs @@ -21,8 +21,8 @@ import Ratio ( Rational ) \begin{code} data HsLit - = HsChar Char -- characters - | HsCharPrim Char -- unboxed char literals + = HsChar Int -- characters + | HsCharPrim Int -- unboxed char literals | HsString FAST_STRING -- strings | HsStringPrim FAST_STRING -- packed string @@ -57,10 +57,10 @@ negLiteral (HsFrac f) = HsFrac (-f) \begin{code} instance Outputable HsLit where -- Use "show" because it puts in appropriate escapes - ppr (HsChar c) = text (show c) - ppr (HsCharPrim c) = text (show c) <> char '#' - ppr (HsStringPrim s) = pprFSAsString s <> char '#' - ppr (HsString s) = pprFSAsString s + ppr (HsChar c) = pprHsChar c + ppr (HsCharPrim c) = pprHsChar c <> char '#' + ppr (HsString s) = pprHsString s + ppr (HsStringPrim s) = pprHsString s <> char '#' ppr (HsInt i) = integer i ppr (HsFrac f) = rational f ppr (HsFloatPrim f) = rational f <> char '#' diff --git a/ghc/compiler/hsSyn/HsCore.lhs b/ghc/compiler/hsSyn/HsCore.lhs index c21a2d3..e91e601 100644 --- a/ghc/compiler/hsSyn/HsCore.lhs +++ b/ghc/compiler/hsSyn/HsCore.lhs @@ -190,7 +190,7 @@ pprUfExpr :: Outputable name => (SDoc -> SDoc) -> UfExpr name -> SDoc pprUfExpr add_par (UfVar v) = ppr v pprUfExpr add_par (UfLit l) = ppr l -pprUfExpr add_par (UfLitLit l ty) = add_par (hsep [ptext SLIT("__litlit"), pprFSAsString l, pprParendHsType ty]) +pprUfExpr add_par (UfLitLit l ty) = add_par (hsep [ptext SLIT("__litlit"), pprHsString l, pprParendHsType ty]) pprUfExpr add_par (UfCCall cc ty) = braces (pprCCallOp cc <+> ppr ty) pprUfExpr add_par (UfType ty) = char '@' <+> pprParendHsType ty @@ -238,7 +238,7 @@ instance Outputable name => Outputable (UfNote name) where instance Outputable name => Outputable (UfConAlt name) where ppr UfDefault = text "__DEFAULT" ppr (UfLitAlt l) = ppr l - ppr (UfLitLitAlt l ty) = parens (hsep [ptext SLIT("__litlit"), pprFSAsString l, pprParendHsType ty]) + ppr (UfLitLitAlt l ty) = parens (hsep [ptext SLIT("__litlit"), pprHsString l, pprParendHsType ty]) ppr (UfDataAlt d) = ppr d instance Outputable name => Outputable (UfBinder name) where diff --git a/ghc/compiler/javaGen/Java.lhs b/ghc/compiler/javaGen/Java.lhs index de16154..ec2c506 100644 --- a/ghc/compiler/javaGen/Java.lhs +++ b/ghc/compiler/javaGen/Java.lhs @@ -131,7 +131,7 @@ instance Ord Name where data Lit = IntLit Integer -- unboxed - | CharLit Char -- unboxed + | CharLit Int -- unboxed | StringLit String -- java string deriving Show diff --git a/ghc/compiler/javaGen/JavaGen.lhs b/ghc/compiler/javaGen/JavaGen.lhs index 6093a80..6278a70 100644 --- a/ghc/compiler/javaGen/JavaGen.lhs +++ b/ghc/compiler/javaGen/JavaGen.lhs @@ -605,6 +605,7 @@ suffix _ = "" primName :: PrimType -> String primName PrimInt = "int" primName PrimChar = "char" +primName PrimByte = "byte" primName PrimBoolean = "boolean" primName _ = error "unsupported primitive" @@ -803,6 +804,9 @@ inttype = PrimType PrimInt chartype :: Type chartype = PrimType PrimChar +bytetype :: Type +bytetype = PrimType PrimByte + -- This lets you get inside a possible "Value" type, -- to access the internal unboxed object. access :: Expr -> Type -> Expr @@ -811,6 +815,7 @@ access expr other = expr accessPrim expr PrimInt = Call expr (Name "intValue" inttype) [] accessPrim expr PrimChar = Call expr (Name "charValue" chartype) [] +accessPrim expr PrimByte = Call expr (Name "byteValue" bytetype) [] accessPrim expr other = pprPanic "accessPrim" (text (show other)) -- This is where we map from typename to types, @@ -831,6 +836,7 @@ primRepToType ::PrimRep -> Type primRepToType PtrRep = objectType primRepToType IntRep = inttype primRepToType CharRep = chartype +primRepToType Int8Rep = bytetype primRepToType AddrRep = objectType primRepToType other = pprPanic "primRepToType" (ppr other) diff --git a/ghc/compiler/javaGen/PrintJava.lhs b/ghc/compiler/javaGen/PrintJava.lhs index 0db596d..edaf8e5 100644 --- a/ghc/compiler/javaGen/PrintJava.lhs +++ b/ghc/compiler/javaGen/PrintJava.lhs @@ -220,7 +220,7 @@ call e n es = e <> dot <> n <> parens (hsep (punctuate comma (map expr es))) literal = \l -> case l of { IntLit i -> text (show i) - ; CharLit c -> text (show c) + ; CharLit c -> text "(char)" <+> text (show c) ; StringLit s -> text ("\"" ++ s ++ "\"") -- strings are already printable } diff --git a/ghc/compiler/main/Constants.lhs b/ghc/compiler/main/Constants.lhs index 97d6a3a..660b99f 100644 --- a/ghc/compiler/main/Constants.lhs +++ b/ghc/compiler/main/Constants.lhs @@ -57,6 +57,7 @@ module Constants ( oTHER_TAG, mAX_INTLIKE, mIN_INTLIKE, + mAX_CHARLIKE, mIN_CHARLIKE, spRelToInt, @@ -120,6 +121,10 @@ oTHER_TAG = (INFO_OTHER_TAG :: Integer) -- (-1) unevaluated, probably mIN_INTLIKE, mAX_INTLIKE :: Integer -- Only used to compare with (MachInt Integer) mIN_INTLIKE = MIN_INTLIKE mAX_INTLIKE = MAX_INTLIKE + +mIN_CHARLIKE, mAX_CHARLIKE :: Int -- Only used to compare with (MachChar Int) +mIN_CHARLIKE = MIN_CHARLIKE +mAX_CHARLIKE = MAX_CHARLIKE \end{code} A little function that abstracts the stack direction. Note that most diff --git a/ghc/compiler/nativeGen/AbsCStixGen.lhs b/ghc/compiler/nativeGen/AbsCStixGen.lhs index b9a2c8c..7b0cc21 100644 --- a/ghc/compiler/nativeGen/AbsCStixGen.lhs +++ b/ghc/compiler/nativeGen/AbsCStixGen.lhs @@ -146,7 +146,7 @@ Here we handle top-level things, like @CCodeBlock@s and mk_StCLbl_for_SRT :: CLabel -> StixTree mk_StCLbl_for_SRT label | labelDynamic label - = StIndex CharRep (StCLbl label) (StInt 1) + = StIndex Int8Rep (StCLbl label) (StInt 1) | otherwise = StCLbl label @@ -223,7 +223,8 @@ Here we handle top-level things, like @CCodeBlock@s and = StData (promote_to_word (getAmodeRep amode)) [a2stix amode] -- We need to promote any item smaller than a word to a word - promote_to_word CharRep = WordRep + promote_to_word Int8Rep = IntRep + promote_to_word CharRep = IntRep promote_to_word other = other -- always at least one padding word: this is the static link field @@ -473,7 +474,7 @@ be tuned.) \begin{code} intTag :: Literal -> Integer - intTag (MachChar c) = toInteger (ord c) + intTag (MachChar c) = toInteger c intTag (MachInt i) = i intTag (MachWord w) = intTag (word2IntLit (MachWord w)) intTag _ = panic "intTag" diff --git a/ghc/compiler/nativeGen/MachCode.lhs b/ghc/compiler/nativeGen/MachCode.lhs index cf1aef1..57bdc39 100644 --- a/ghc/compiler/nativeGen/MachCode.lhs +++ b/ghc/compiler/nativeGen/MachCode.lhs @@ -160,7 +160,8 @@ mangleIndexTree (StIndex pk base off) ] where shift DoubleRep = 3::Integer - shift CharRep = 0::Integer + shift CharRep = 2::Integer + shift Int8Rep = 0::Integer shift _ = IF_ARCH_alpha(3,2) \end{code} @@ -3249,14 +3250,16 @@ coerceFP2Int x %* * %************************************************************************ -Integer to character conversion. Where applicable, we try to do this -in one step if the original object is in memory. +Integer to character conversion. \begin{code} chrCode :: StixTree -> NatM Register #if alpha_TARGET_ARCH +-- TODO: This is probably wrong, but I don't know Alpha assembler. +-- It should coerce a 64-bit value to a 32-bit value. + chrCode x = getRegister x `thenNat` \ register -> getNewRegNCG IntRep `thenNat` \ reg -> @@ -3273,47 +3276,23 @@ chrCode x chrCode x = getRegister x `thenNat` \ register -> - let - code__2 dst = let - code = registerCode register dst - src = registerName register dst - in code `appOL` - if isFixed register && src /= dst - then toOL [MOV L (OpReg src) (OpReg dst), - AND L (OpImm (ImmInt 255)) (OpReg dst)] - else unitOL (AND L (OpImm (ImmInt 255)) (OpReg src)) - in - returnNat (Any IntRep code__2) + returnNat ( + case register of + Fixed _ reg code -> Fixed IntRep reg code + Any _ code -> Any IntRep code + ) #endif {- i386_TARGET_ARCH -} -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #if sparc_TARGET_ARCH -chrCode (StInd pk mem) - = getAmode mem `thenNat` \ amode -> - let - code = amodeCode amode - src = amodeAddr amode - src_off = addrOffset src 3 - src__2 = case src_off of Just x -> x - code__2 dst = if maybeToBool src_off then - code `snocOL` LD BU src__2 dst - else - code `snocOL` - LD (primRepToSize pk) src dst `snocOL` - AND False dst (RIImm (ImmInt 255)) dst - in - returnNat (Any pk code__2) - chrCode x = getRegister x `thenNat` \ register -> - getNewRegNCG IntRep `thenNat` \ reg -> - let - code = registerCode register reg - src = registerName register reg - code__2 dst = code `snocOL` AND False src (RIImm (ImmInt 255)) dst - in - returnNat (Any IntRep code__2) + returnNat ( + case register of + Fixed _ reg code -> Fixed IntRep reg code + Any _ code -> Any IntRep code + ) #endif {- sparc_TARGET_ARCH -} \end{code} diff --git a/ghc/compiler/nativeGen/MachMisc.lhs b/ghc/compiler/nativeGen/MachMisc.lhs index 4db56ed..8f5c168 100644 --- a/ghc/compiler/nativeGen/MachMisc.lhs +++ b/ghc/compiler/nativeGen/MachMisc.lhs @@ -100,7 +100,7 @@ sizeOf :: PrimRep -> Integer{-in bytes-} -- the result is an Integer only because it's more convenient sizeOf pr = case (primRepToSize pr) of - IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2; L -> 4; SF -> 4;-} _ -> 8},) + IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2;-} L -> 4; {-SF -> 4;-} _ -> 8},) IF_ARCH_sparc({B -> 1; BU -> 1; {-HW -> 2; HWU -> 2;-} W -> 4; {-D -> 8;-} F -> 4; DF -> 8},) IF_ARCH_i386( {B -> 1; {-S -> 2;-} L -> 4; F -> 4; DF -> 8 },) \end{code} @@ -239,7 +239,7 @@ data Size | BU -- | W -- word (2 bytes): UNUSED -- | WU -- : UNUSED --- | L -- longword (4 bytes): UNUSED + | L -- longword (4 bytes) | Q -- quadword (8 bytes) -- | FF -- VAX F-style floating pt: UNUSED -- | GF -- VAX G-style floating pt: UNUSED @@ -274,7 +274,8 @@ primRepToSize CodePtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( primRepToSize DataPtrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize RetRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize CostCentreRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) -primRepToSize CharRep = IF_ARCH_alpha( BU, IF_ARCH_i386( B, IF_ARCH_sparc( BU,))) +primRepToSize CharRep = IF_ARCH_alpha( L, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) +primRepToSize Int8Rep = IF_ARCH_alpha( B, IF_ARCH_i386( B, IF_ARCH_sparc( B ,))) primRepToSize IntRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize WordRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) primRepToSize AddrRep = IF_ARCH_alpha( Q, IF_ARCH_i386( L, IF_ARCH_sparc( W ,))) diff --git a/ghc/compiler/nativeGen/PprMach.lhs b/ghc/compiler/nativeGen/PprMach.lhs index 697785e..5235a5c 100644 --- a/ghc/compiler/nativeGen/PprMach.lhs +++ b/ghc/compiler/nativeGen/PprMach.lhs @@ -165,7 +165,7 @@ pprSize x = ptext (case x of BU -> SLIT("bu") -- W -> SLIT("w") UNUSED -- WU -> SLIT("wu") UNUSED --- L -> SLIT("l") UNUSED + L -> SLIT("l") Q -> SLIT("q") -- FF -> SLIT("f") UNUSED -- DF -> SLIT("d") UNUSED diff --git a/ghc/compiler/nativeGen/StixPrim.lhs b/ghc/compiler/nativeGen/StixPrim.lhs index dc3bee7..a78c4d6 100644 --- a/ghc/compiler/nativeGen/StixPrim.lhs +++ b/ghc/compiler/nativeGen/StixPrim.lhs @@ -18,7 +18,7 @@ import Literal ( Literal(..), word2IntLit ) import PrimOp ( PrimOp(..), CCall(..), CCallTarget(..) ) import PrimRep ( PrimRep(..), isFloatingRep ) import UniqSupply ( returnUs, thenUs, getUniqueUs, UniqSM ) -import Constants ( mIN_INTLIKE, uF_UPDATEE, bLOCK_SIZE ) +import Constants ( mIN_INTLIKE, mIN_CHARLIKE, uF_UPDATEE, bLOCK_SIZE ) import CLabel ( mkIntlikeClosureLabel, mkCharlikeClosureLabel, mkMAP_FROZEN_infoLabel, mkForeignLabel ) import Outputable @@ -180,7 +180,7 @@ primCode [] WriteForeignObjOp [obj, v] returnUs (\xs -> assign : xs) -- NB: indexing in "pk" units, *not* in bytes (WDP 95/09) -primCode ls IndexByteArrayOp_Char rs = primCode_ReadByteArrayOp CharRep ls rs +primCode ls IndexByteArrayOp_Char rs = primCode_ReadByteArrayOp Int8Rep ls rs primCode ls IndexByteArrayOp_Int rs = primCode_ReadByteArrayOp IntRep ls rs primCode ls IndexByteArrayOp_Word rs = primCode_ReadByteArrayOp WordRep ls rs primCode ls IndexByteArrayOp_Addr rs = primCode_ReadByteArrayOp AddrRep ls rs @@ -190,7 +190,7 @@ primCode ls IndexByteArrayOp_StablePtr rs = primCode_ReadByteArrayOp StablePtrRe primCode ls IndexByteArrayOp_Int64 rs = primCode_ReadByteArrayOp Int64Rep ls rs primCode ls IndexByteArrayOp_Word64 rs = primCode_ReadByteArrayOp Word64Rep ls rs -primCode ls ReadByteArrayOp_Char rs = primCode_ReadByteArrayOp CharRep ls rs +primCode ls ReadByteArrayOp_Char rs = primCode_ReadByteArrayOp Int8Rep ls rs primCode ls ReadByteArrayOp_Int rs = primCode_ReadByteArrayOp IntRep ls rs primCode ls ReadByteArrayOp_Word rs = primCode_ReadByteArrayOp WordRep ls rs primCode ls ReadByteArrayOp_Addr rs = primCode_ReadByteArrayOp AddrRep ls rs @@ -200,7 +200,7 @@ primCode ls ReadByteArrayOp_StablePtr rs = primCode_ReadByteArrayOp StablePtrRep primCode ls ReadByteArrayOp_Int64 rs = primCode_ReadByteArrayOp Int64Rep ls rs primCode ls ReadByteArrayOp_Word64 rs = primCode_ReadByteArrayOp Word64Rep ls rs -primCode ls ReadOffAddrOp_Char rs = primCode_IndexOffAddrOp CharRep ls rs +primCode ls ReadOffAddrOp_Char rs = primCode_IndexOffAddrOp Int8Rep ls rs primCode ls ReadOffAddrOp_Int rs = primCode_IndexOffAddrOp IntRep ls rs primCode ls ReadOffAddrOp_Word rs = primCode_IndexOffAddrOp WordRep ls rs primCode ls ReadOffAddrOp_Addr rs = primCode_IndexOffAddrOp AddrRep ls rs @@ -210,7 +210,7 @@ primCode ls ReadOffAddrOp_StablePtr rs = primCode_IndexOffAddrOp StablePtrRep ls primCode ls ReadOffAddrOp_Int64 rs = primCode_IndexOffAddrOp Int64Rep ls rs primCode ls ReadOffAddrOp_Word64 rs = primCode_IndexOffAddrOp Word64Rep ls rs -primCode ls IndexOffAddrOp_Char rs = primCode_IndexOffAddrOp CharRep ls rs +primCode ls IndexOffAddrOp_Char rs = primCode_IndexOffAddrOp Int8Rep ls rs primCode ls IndexOffAddrOp_Int rs = primCode_IndexOffAddrOp IntRep ls rs primCode ls IndexOffAddrOp_Word rs = primCode_IndexOffAddrOp WordRep ls rs primCode ls IndexOffAddrOp_Addr rs = primCode_IndexOffAddrOp AddrRep ls rs @@ -220,7 +220,7 @@ primCode ls IndexOffAddrOp_StablePtr rs = primCode_IndexOffAddrOp StablePtrRep l primCode ls IndexOffAddrOp_Int64 rs = primCode_IndexOffAddrOp Int64Rep ls rs primCode ls IndexOffAddrOp_Word64 rs = primCode_IndexOffAddrOp Word64Rep ls rs -primCode ls IndexOffForeignObjOp_Char rs = primCode_IndexOffForeignObjOp CharRep ls rs +primCode ls IndexOffForeignObjOp_Char rs = primCode_IndexOffForeignObjOp Int8Rep ls rs primCode ls IndexOffForeignObjOp_Int rs = primCode_IndexOffForeignObjOp IntRep ls rs primCode ls IndexOffForeignObjOp_Word rs = primCode_IndexOffForeignObjOp WordRep ls rs primCode ls IndexOffForeignObjOp_Addr rs = primCode_IndexOffForeignObjOp AddrRep ls rs @@ -230,7 +230,7 @@ primCode ls IndexOffForeignObjOp_StablePtr rs = primCode_IndexOffForeignObjOp St primCode ls IndexOffForeignObjOp_Int64 rs = primCode_IndexOffForeignObjOp Int64Rep ls rs primCode ls IndexOffForeignObjOp_Word64 rs = primCode_IndexOffForeignObjOp Word64Rep ls rs -primCode ls WriteOffAddrOp_Char rs = primCode_WriteOffAddrOp CharRep ls rs +primCode ls WriteOffAddrOp_Char rs = primCode_WriteOffAddrOp Int8Rep ls rs primCode ls WriteOffAddrOp_Int rs = primCode_WriteOffAddrOp IntRep ls rs primCode ls WriteOffAddrOp_Word rs = primCode_WriteOffAddrOp WordRep ls rs primCode ls WriteOffAddrOp_Addr rs = primCode_WriteOffAddrOp AddrRep ls rs @@ -240,7 +240,7 @@ primCode ls WriteOffAddrOp_StablePtr rs = primCode_WriteOffAddrOp StablePtrRep l primCode ls WriteOffAddrOp_Int64 rs = primCode_WriteOffAddrOp Int64Rep ls rs primCode ls WriteOffAddrOp_Word64 rs = primCode_WriteOffAddrOp Word64Rep ls rs -primCode ls WriteByteArrayOp_Char rs = primCode_WriteByteArrayOp CharRep ls rs +primCode ls WriteByteArrayOp_Char rs = primCode_WriteByteArrayOp Int8Rep ls rs primCode ls WriteByteArrayOp_Int rs = primCode_WriteByteArrayOp IntRep ls rs primCode ls WriteByteArrayOp_Word rs = primCode_WriteByteArrayOp WordRep ls rs primCode ls WriteByteArrayOp_Addr rs = primCode_WriteByteArrayOp AddrRep ls rs @@ -482,17 +482,15 @@ amodeToStix (CLbl lbl _) = StCLbl lbl -- For CharLike and IntLike, we attempt some trivial constant-folding here. amodeToStix (CCharLike (CLit (MachChar c))) - = StIndex CharRep cHARLIKE_closure (StInt (toInteger off)) + = StIndex Int8Rep cHARLIKE_closure (StInt (toInteger off)) where - off = charLikeSize * ord c + off = charLikeSize * (c - mIN_CHARLIKE) amodeToStix (CCharLike x) - = StIndex CharRep cHARLIKE_closure off - where - off = StPrim IntMulOp [amodeToStix x, StInt (toInteger charLikeSize)] + = panic "CCharLike" amodeToStix (CIntLike (CLit (MachInt i))) - = StIndex CharRep{-yes,really-} iNTLIKE_closure (StInt (toInteger off)) + = StIndex Int8Rep iNTLIKE_closure (StInt (toInteger off)) where off = intLikeSize * (fromInteger (i - mIN_INTLIKE)) @@ -501,7 +499,7 @@ amodeToStix (CIntLike x) amodeToStix (CLit core) = case core of - MachChar c -> StInt (toInteger (ord c)) + MachChar c -> StInt (toInteger c) MachStr s -> StString s MachAddr a -> StInt a MachInt i -> StInt i diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs index dd020e7..88667c4 100644 --- a/ghc/compiler/parser/Lex.lhs +++ b/ghc/compiler/parser/Lex.lhs @@ -52,7 +52,7 @@ import FastString import StringBuffer import GlaExts import Ctype -import Char ( chr ) +import Char ( chr, ord ) import PrelRead ( readRational__ ) -- Glasgow non-std \end{code} @@ -209,12 +209,12 @@ data Token | ITpragma StringBuffer - | ITchar Char + | ITchar Int | ITstring FAST_STRING - | ITinteger Integer + | ITinteger Integer | ITrational Rational - | ITprimchar Char + | ITprimchar Int | ITprimstring FAST_STRING | ITprimint Integer | ITprimfloat Rational @@ -571,7 +571,7 @@ lexToken cont glaexts buf = _ -> (layoutOff `thenP_` cont ITocurly) (incLexeme buf) -- strings/characters ------------------------------------------------- - '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf) + '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf) '\''# -> lex_char (char_end cont) glaexts (incLexeme buf) -- strictness and cpr pragmas and __scc treated specially. @@ -639,9 +639,11 @@ lex_prag cont buf lex_string cont glaexts s buf = case currentChar# buf of '"'#{-"-} -> - let buf' = incLexeme buf; s' = mkFastString (reverse s) in + let buf' = incLexeme buf; s' = mkFastStringInt (reverse s) in case currentChar# buf' of - '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf') + '#'# | flag glaexts -> if all (<= 0xFF) s + then cont (ITprimstring s') (incLexeme buf') + else lexError "primitive string literal must contain only characters <= '\xFF'" buf' _ -> cont (ITstring s') buf' -- ignore \& in a string, deal with string gaps @@ -666,11 +668,11 @@ lex_stringgap cont glaexts s buf lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf -lex_char :: (Int# -> Char -> P a) -> Int# -> P a +lex_char :: (Int# -> Int -> P a) -> Int# -> P a lex_char cont glaexts buf = case currentChar# buf of '\\'# -> lex_escape (cont glaexts) (incLexeme buf) - c | is_any c -> cont glaexts (C# c) (incLexeme buf) + c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf) other -> charError buf char_end cont glaexts c buf @@ -685,19 +687,19 @@ char_end cont glaexts c buf lex_escape cont buf = let buf' = incLexeme buf in case currentChar# buf of - 'a'# -> cont '\a' buf' - 'b'# -> cont '\b' buf' - 'f'# -> cont '\f' buf' - 'n'# -> cont '\n' buf' - 'r'# -> cont '\r' buf' - 't'# -> cont '\t' buf' - 'v'# -> cont '\v' buf' - '\\'# -> cont '\\' buf' - '"'# -> cont '\"' buf' - '\''# -> cont '\'' buf' + 'a'# -> cont (ord '\a') buf' + 'b'# -> cont (ord '\b') buf' + 'f'# -> cont (ord '\f') buf' + 'n'# -> cont (ord '\n') buf' + 'r'# -> cont (ord '\r') buf' + 't'# -> cont (ord '\t') buf' + 'v'# -> cont (ord '\v') buf' + '\\'# -> cont (ord '\\') buf' + '"'# -> cont (ord '\"') buf' + '\''# -> cont (ord '\'') buf' '^'# -> let c = currentChar# buf' in if c `geChar#` '@'# && c `leChar#` '_'# - then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf') + then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf') else charError buf' 'x'# -> readNum (after_charnum cont) buf' is_hexdigit 16 hex @@ -707,13 +709,12 @@ lex_escape cont buf _ -> case [ (c,buf2) | (p,c) <- silly_escape_chars, Just buf2 <- [prefixMatch buf p] ] of - (c,buf2):_ -> cont c buf2 + (c,buf2):_ -> cont (ord c) buf2 [] -> charError buf' -after_charnum cont i buf - = let int = fromInteger i in - if i >= 0 && i <= 255 - then cont (chr int) buf +after_charnum cont i buf + = if i >= 0 && i <= 0x7FFFFFFF + then cont (fromInteger i) buf else charError buf readNum cont buf is_digit base conv = read buf 0 @@ -951,7 +952,7 @@ lex_qid cont glaexts mod buf just_a_conid = _ -> just_a_conid '-'# -> case lookAhead# buf 1# of - '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#) + '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#) _ -> lex_id3 cont glaexts mod buf just_a_conid _ -> lex_id3 cont glaexts mod buf just_a_conid diff --git a/ghc/compiler/prelude/PrelInfo.lhs b/ghc/compiler/prelude/PrelInfo.lhs index ae88f95..3a8f5a6 100644 --- a/ghc/compiler/prelude/PrelInfo.lhs +++ b/ghc/compiler/prelude/PrelInfo.lhs @@ -266,9 +266,9 @@ knownKeyNames , (map_RDR, mapIdKey) , (append_RDR, appendIdKey) , (unpackCString_RDR, unpackCStringIdKey) - , (unpackCString2_RDR, unpackCString2IdKey) , (unpackCStringAppend_RDR, unpackCStringAppendIdKey) , (unpackCStringFoldr_RDR, unpackCStringFoldrIdKey) + , (unpackCStringUtf8_RDR, unpackCStringUtf8IdKey) -- List operations , (concat_RDR, concatIdKey) diff --git a/ghc/compiler/prelude/PrelNames.lhs b/ghc/compiler/prelude/PrelNames.lhs index 3c2d26c..d7a86c1 100644 --- a/ghc/compiler/prelude/PrelNames.lhs +++ b/ghc/compiler/prelude/PrelNames.lhs @@ -58,7 +58,8 @@ module PrelNames concat_RDR, filter_RDR, zip_RDR, augment_RDR, otherwiseId_RDR, assert_RDR, runSTRep_RDR, - unpackCString_RDR, unpackCString2_RDR, unpackCStringAppend_RDR, unpackCStringFoldr_RDR, + unpackCString_RDR, unpackCStringAppend_RDR, unpackCStringFoldr_RDR, + unpackCStringUtf8_RDR, numClass_RDR, fractionalClass_RDR, eqClass_RDR, ccallableClass_RDR, creturnableClass_RDR, monadClass_RDR, enumClass_RDR, ordClass_RDR, @@ -209,9 +210,9 @@ augment_RDR = varQual pREL_BASE_Name SLIT("augment") -- Strings unpackCString_RDR = varQual pREL_BASE_Name SLIT("unpackCString#") -unpackCString2_RDR = varQual pREL_BASE_Name SLIT("unpackNBytes#") unpackCStringAppend_RDR = varQual pREL_BASE_Name SLIT("unpackAppendCString#") unpackCStringFoldr_RDR = varQual pREL_BASE_Name SLIT("unpackFoldrCString#") +unpackCStringUtf8_RDR = varQual pREL_BASE_Name SLIT("unpackCStringUtf8#") -- Classes Eq and Ord eqClass_RDR = clsQual pREL_BASE_Name SLIT("Eq") diff --git a/ghc/compiler/prelude/PrimRep.lhs b/ghc/compiler/prelude/PrimRep.lhs index 70bb367..6c479d1 100644 --- a/ghc/compiler/prelude/PrimRep.lhs +++ b/ghc/compiler/prelude/PrimRep.lhs @@ -47,6 +47,7 @@ data PrimRep | CostCentreRep -- Pointer to a cost centre | CharRep -- Machine characters + | Int8Rep -- 8 bit integers | IntRep -- integers (same size as ptr on this arch) | WordRep -- ditto (but *unsigned*) | AddrRep -- addresses ("C pointers") @@ -54,7 +55,13 @@ data PrimRep | DoubleRep -- doubles | Word64Rep -- guaranteed to be 64 bits (no more, no less.) | Int64Rep -- guaranteed to be 64 bits (no more, no less.) - + + -- Perhaps all sized integers and words should be primitive types. + + -- Int8Rep is currently used to simulate some old CharRep usages + -- when Char changed size from 8 to 31 bits. It does not correspond + -- to a Haskell unboxed type, in particular it's not used by Int8. + | WeakPtrRep | ForeignObjRep | BCORep @@ -113,7 +120,7 @@ isFollowableRep ForeignObjRep = True -- '' isFollowableRep StableNameRep = True -- '' isFollowableRep ThreadIdRep = True -- pointer to a TSO -isFollowableRep other = False +isFollowableRep other = False separateByPtrFollowness :: (a -> PrimRep) -> [a] -> ([a], [a]) @@ -171,11 +178,12 @@ retPrimRepSize = getPrimRepSize RetRep -- size in bytes, ToDo: cpp in the right vals. -- (used in some settings to figure out how many bytes -- we have to push onto the stack when calling external --- entry points (e.g., stdcalling on win32)) +-- entry points (e.g., stdcalling on win32) getPrimRepSizeInBytes :: PrimRep -> Int getPrimRepSizeInBytes pr = case pr of - CharRep -> 1 + CharRep -> 4 + Int8Rep -> 1 IntRep -> 4 AddrRep -> 4 FloatRep -> 4 @@ -211,6 +219,7 @@ showPrimRep DataPtrRep = "D_" showPrimRep RetRep = "P_" showPrimRep CostCentreRep = "CostCentre" showPrimRep CharRep = "C_" +showPrimRep Int8Rep = "StgInt8" showPrimRep IntRep = "I_" -- short for StgInt showPrimRep WordRep = "W_" -- short for StgWord showPrimRep Int64Rep = "LI_" -- short for StgLongInt @@ -228,6 +237,7 @@ showPrimRep ForeignObjRep = "StgAddr" showPrimRep VoidRep = "!!VOID_KIND!!" primRepString CharRep = "Char" +primRepString Int8Rep = "Char" -- To have names like newCharArray# primRepString IntRep = "Int" primRepString WordRep = "Word" primRepString Int64Rep = "Int64" diff --git a/ghc/compiler/prelude/TysPrim.lhs b/ghc/compiler/prelude/TysPrim.lhs index ff4e305..4be0716 100644 --- a/ghc/compiler/prelude/TysPrim.lhs +++ b/ghc/compiler/prelude/TysPrim.lhs @@ -324,6 +324,7 @@ primitive TyCon for a given PrimRep. \begin{code} primRepTyCon CharRep = charPrimTyCon +primRepTyCon Int8Rep = charPrimTyCon primRepTyCon IntRep = intPrimTyCon primRepTyCon WordRep = wordPrimTyCon primRepTyCon Int64Rep = int64PrimTyCon diff --git a/ghc/compiler/rename/Rename.lhs b/ghc/compiler/rename/Rename.lhs index 8adfdf3..0f9fe08 100644 --- a/ghc/compiler/rename/Rename.lhs +++ b/ghc/compiler/rename/Rename.lhs @@ -49,7 +49,7 @@ import NameSet import TysWiredIn ( unitTyCon, intTyCon, doubleTyCon, boolTyCon ) import PrelRules ( builtinRules ) import PrelInfo ( mAIN_Name, pREL_MAIN_Name, pRELUDE_Name, - ioTyCon_RDR, unpackCString_RDR, unpackCString2_RDR, unpackCStringFoldr_RDR, + ioTyCon_RDR, unpackCString_RDR, unpackCStringFoldr_RDR, unpackCStringUtf8_RDR, fractionalClassKeys, derivingOccurrences ) import Type ( namesOfType, funTyCon ) @@ -243,7 +243,7 @@ implicitFVs mod_name decls implicit_occs = string_occs ++ foldr ((++) . get) implicit_main decls -- Virtually every program has error messages in it somewhere - string_occs = [unpackCString_RDR, unpackCString2_RDR, unpackCStringFoldr_RDR] + string_occs = [unpackCString_RDR, unpackCStringFoldr_RDR, unpackCStringUtf8_RDR] get (TyClD (TyData _ _ _ _ _ _ (Just deriv_classes) _ _)) = concat (map get_deriv deriv_classes) diff --git a/ghc/compiler/typecheck/TcTyDecls.lhs b/ghc/compiler/typecheck/TcTyDecls.lhs index acb1558..955d812 100644 --- a/ghc/compiler/typecheck/TcTyDecls.lhs +++ b/ghc/compiler/typecheck/TcTyDecls.lhs @@ -50,7 +50,7 @@ import Type ( getTyVar, tyVarsOfTypes, splitFunTy, applyTys, import TysWiredIn ( unitTy ) import Var ( tyVarKind ) import VarSet ( intersectVarSet, isEmptyVarSet ) -import Unique ( unpackCStringIdKey ) +import Unique ( unpackCStringIdKey, unpackCStringUtf8IdKey ) import Util ( equivClasses ) import FiniteMap ( FiniteMap, lookupWithDefaultFM ) \end{code} @@ -282,7 +282,8 @@ mkRecordSelector tycon fields@((first_con, first_field_label) : other_fields) = checkTc (all (== field_ty) other_tys) (fieldTypeMisMatch field_name) `thenTc_` tcLookupValueByKey unpackCStringIdKey `thenTc` \ unpack_id -> - returnTc (mkRecordSelId tycon first_field_label unpack_id) + tcLookupValueByKey unpackCStringUtf8IdKey `thenTc` \ unpackUtf8_id -> + returnTc (mkRecordSelId tycon first_field_label unpack_id unpackUtf8_id) where field_ty = fieldLabelType first_field_label field_name = fieldLabelName first_field_label diff --git a/ghc/compiler/utils/FastString.lhs b/ghc/compiler/utils/FastString.lhs index f0e7d9c..95ed4a5 100644 --- a/ghc/compiler/utils/FastString.lhs +++ b/ghc/compiler/utils/FastString.lhs @@ -26,16 +26,18 @@ module FastString mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString mkFastSubString#, -- :: Addr# -> Int# -> Int# -> FastString mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString - + + mkFastStringInt, -- :: [Int] -> FastString + uniqueOfFS, -- :: FastString -> Int# lengthFS, -- :: FastString -> Int nullFastString, -- :: FastString -> Bool - getByteArray#, -- :: FastString -> ByteArray# - getByteArray, -- :: FastString -> _ByteArray Int unpackFS, -- :: FastString -> String + unpackIntFS, -- :: FastString -> [Int] appendFS, -- :: FastString -> FastString -> FastString headFS, -- :: FastString -> Char + headIntFS, -- :: FastString -> Int tailFS, -- :: FastString -> FastString concatFS, -- :: [FastString] -> FastString consFS, -- :: Char -> FastString -> FastString @@ -103,6 +105,7 @@ import Foreign ( ForeignObj(..) ) import IOExts ( IORef, newIORef, readIORef, writeIORef ) import IO +import Char ( chr, ord ) #define hASH_TBL_SIZE 993 @@ -130,6 +133,10 @@ data FastString Addr# -- pointer to the (null-terminated) bytes in C land. Int# -- length (cached) + | UnicodeStr -- if contains characters outside '\1'..'\xFF' + Int# -- unique id + [Int] -- character numbers + instance Eq FastString where a == b = case cmpFS a b of { LT -> False; EQ -> True; GT -> False } a /= b = case cmpFS a b of { LT -> True; EQ -> False; GT -> True } @@ -145,23 +152,16 @@ instance Ord FastString where | otherwise = y compare a b = cmpFS a b -getByteArray# :: FastString -> ByteArray# -getByteArray# (FastString _ _ ba#) = ba# - -getByteArray :: FastString -> ByteArray Int -#if __GLASGOW_HASKELL__ < 405 -getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba# -#else -getByteArray (FastString _ l# ba#) = ByteArray 0 (I# l#) ba# -#endif - lengthFS :: FastString -> Int lengthFS (FastString _ l# _) = I# l# lengthFS (CharStr a# l#) = I# l# +lengthFS (UnicodeStr _ s) = length s nullFastString :: FastString -> Bool nullFastString (FastString _ l# _) = l# ==# 0# nullFastString (CharStr _ l#) = l# ==# 0# +nullFastString (UnicodeStr _ []) = True +nullFastString (UnicodeStr _ (_:_)) = False unpackFS :: FastString -> String unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l# @@ -173,18 +173,29 @@ unpackFS (CharStr addr len#) = | otherwise = C# ch : unpack (nh +# 1#) where ch = indexCharOffAddr# addr nh +unpackFS (UnicodeStr _ s) = map chr s + +unpackIntFS :: FastString -> [Int] +unpackIntFS (UnicodeStr _ s) = s +unpackIntFS fs = map ord (unpackFS fs) appendFS :: FastString -> FastString -> FastString -appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2) +appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2) concatFS :: [FastString] -> FastString -concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better +concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better headFS :: FastString -> Char -headFS f@(FastString _ l# ba#) = - if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f) -headFS f@(CharStr a# l#) = - if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f) +headFS (FastString _ l# ba#) = + if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS") +headFS (CharStr a# l#) = + if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS") +headFS (UnicodeStr _ (c:_)) = chr c +headFS (UnicodeStr _ []) = error ("headFS: empty FS") + +headIntFS :: FastString -> Int +headIntFS (UnicodeStr _ (c:_)) = c +headIntFS fs = ord (headFS fs) indexFS :: FastString -> Int -> Char indexFS f i@(I# i#) = @@ -195,14 +206,16 @@ indexFS f i@(I# i#) = CharStr a# l# | l# ># 0# && l# ># i# -> C# (indexCharOffAddr# a# i#) | otherwise -> error (msg (I# l#)) + UnicodeStr _ s -> chr (s!!i) where msg l = "indexFS: out of range: " ++ show (l,i) tailFS :: FastString -> FastString tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#) +tailFS fs = mkFastStringInt (tail (unpackIntFS fs)) consFS :: Char -> FastString -> FastString -consFS c fs = mkFastString (c:unpackFS fs) +consFS c fs = mkFastStringInt (ord c : unpackIntFS fs) uniqueOfFS :: FastString -> Int# uniqueOfFS (FastString u# _ _) = u# @@ -219,6 +232,7 @@ uniqueOfFS (CharStr a# l#) = case mkFastString# a# l# of { FastString u# _ _ works, but causes the CharStr to be looked up in the hash table each time it is accessed.. -} +uniqueOfFS (UnicodeStr u# _) = u# \end{code} Internally, the compiler will maintain a fast string symbol @@ -226,6 +240,10 @@ table, providing sharing and fast comparison. Creation of new @FastString@s then covertly does a lookup, re-using the @FastString@ if there was a hit. +Caution: mkFastStringUnicode assumes that if the string is in the +table, it sits under the UnicodeStr constructor. Other mkFastString +variants analogously assume the FastString constructor. + \begin{code} data FastStringTable = FastStringTable @@ -315,6 +333,8 @@ mkFastString# a# len# = Just v else bucket_match ls len# a# + bucket_match (UnicodeStr _ _ : ls) len# a# = + bucket_match ls len# a# mkFastSubString# :: Addr# -> Int# -> Int# -> FastString mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#) @@ -362,7 +382,8 @@ mkFastSubStringFO# fo# start# len# = Just v else bucket_match ls start# len# fo# - + bucket_match (UnicodeStr _ _ : ls) start# len# fo# = + bucket_match ls start# len# fo# mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString mkFastSubStringBA# barr# start# len# = @@ -421,6 +442,44 @@ mkFastSubStringBA# barr# start# len# = Just v else bucket_match ls start# len# ba# + UnicodeStr _ _ -> bucket_match ls start# len# ba# + +mkFastStringUnicode :: [Int] -> FastString +mkFastStringUnicode s = + unsafePerformIO ( + readIORef string_table >>= \ ft@(FastStringTable uid# tbl#) -> + let + h = hashUnicode s + in +-- _trace ("hashed(b): "++show (I# h)) $ + lookupTbl ft h >>= \ lookup_result -> + case lookup_result of + [] -> + -- no match, add it to table by copying out the + -- the string into a [Int] + let f_str = UnicodeStr uid# s in + updTbl string_table ft h [f_str] >> + -- _trace ("new(b): " ++ show f_str) $ + return f_str + ls -> + -- non-empty `bucket', scan the list looking + -- entry with same length and compare byte by byte. + -- _trace ("non-empty bucket(b)"++show ls) $ + case bucket_match ls of + Nothing -> + let f_str = UnicodeStr uid# s in + updTbl string_table ft h (f_str:ls) >> + -- _trace ("new(b): " ++ show f_str) $ + return f_str + Just v -> + -- _trace ("re-use(b): "++show v) $ + return v + ) + where + bucket_match [] = Nothing + bucket_match (v@(UnicodeStr _ s'):ls) = + if s' == s then Just v else bucket_match ls + bucket_match (FastString _ _ _ : ls) = bucket_match ls mkFastCharString :: Addr -> FastString mkFastCharString a@(A# a#) = @@ -433,8 +492,8 @@ mkFastCharString# a# = mkFastCharString2 :: Addr -> Int -> FastString mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len# -mkFastString :: String -> FastString -mkFastString str = +mkFastStringNarrow :: String -> FastString +mkFastStringNarrow str = case packString str of #if __GLASGOW_HASKELL__ < 405 (ByteArray (_,I# len#) frozen#) -> @@ -445,6 +504,20 @@ mkFastString str = {- 0-indexed array, len# == index to one beyond end of string, i.e., (0,1) => empty string. -} +mkFastString :: String -> FastString +mkFastString str = if all good str + then mkFastStringNarrow str + else mkFastStringUnicode (map ord str) + where + good c = c >= '\1' && c <= '\xFF' + +mkFastStringInt :: [Int] -> FastString +mkFastStringInt str = if all good str + then mkFastStringNarrow (map chr str) + else mkFastStringUnicode str + where + good c = c >= 1 && c <= 0xFF + mkFastSubString :: Addr -> Int -> Int -> FastString mkFastSubString (A# a#) (I# start#) (I# len#) = mkFastString# (addrOffset# a# start#) len# @@ -505,10 +578,26 @@ hashSubStrBA ba# start# len# = -- c1 = indexCharArray# ba# 1# -- c2 = indexCharArray# ba# 2# +hashUnicode :: [Int] -> Int# + -- use the Addr to produce a hash value between 0 & m (inclusive) +hashUnicode [] = 0# +hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE# +hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE# +hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE# + where + I# len# = length s + I# c0 = s !! 0 + I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#)) + I# c2 = s !! (I# (len# -# 1#)) + \end{code} \begin{code} cmpFS :: FastString -> FastString -> Ordering +cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ + else compare s1 s2 +cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2) +cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2 cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars if u1# ==# u2# then EQ diff --git a/ghc/compiler/utils/Outputable.lhs b/ghc/compiler/utils/Outputable.lhs index 46cb734..5f38e9b8 100644 --- a/ghc/compiler/utils/Outputable.lhs +++ b/ghc/compiler/utils/Outputable.lhs @@ -34,11 +34,11 @@ module Outputable ( hang, punctuate, speakNth, speakNTimes, - printSDoc, printErrs, printDump, + printSDoc, printErrs, printDump, printForC, printForAsm, printForIface, printForUser, pprCode, pprCols, - showSDoc, showSDocDebug, showSDocIface, showsPrecSDoc, - pprFSAsString, + showSDoc, showSDocDebug, showSDocIface, showsPrecSDoc, + pprHsChar, pprHsString, -- error handling @@ -57,6 +57,7 @@ import Pretty ( Doc, Mode(..), TextDetails(..), fullRender ) import Panic import ST ( runST ) import Foreign +import Char ( chr, ord, isDigit ) \end{code} @@ -317,8 +318,43 @@ instance Outputable FastString where ppr fs = text (unpackFS fs) -- Prints an unadorned string, -- no double quotes or anything -pprFSAsString :: FastString -> SDoc -- The Char instance of Show prints -pprFSAsString fs = text (showList (unpackFS fs) "") -- strings with double quotes and escapes +#if __GLASGOW_HASKELL__ < 410 +-- Assume we have only 8-bit Chars. + +pprHsChar :: Int -> SDoc +pprHsChar c = char '\'' <> text (showCharLit c "") <> char '\'' + +pprHsString :: FAST_STRING -> SDoc +pprHsString fs = doubleQuotes (text (foldr showCharLit "" (_UNPK_INT_ fs))) + +showCharLit :: Int -> String -> String +showCharLit c rest + | c == ord '\"' = "\\\"" ++ rest + | c == ord '\'' = "\\\'" ++ rest + | c == ord '\\' = "\\\\" ++ rest + | c >= 0x20 && c <= 0x7E = chr c : rest + | c == ord '\a' = "\\a" ++ rest + | c == ord '\b' = "\\b" ++ rest + | c == ord '\f' = "\\f" ++ rest + | c == ord '\n' = "\\n" ++ rest + | c == ord '\r' = "\\r" ++ rest + | c == ord '\t' = "\\t" ++ rest + | c == ord '\v' = "\\v" ++ rest + | otherwise = ('\\':) $ shows c $ case rest of + d:_ | isDigit d -> "\\&" ++ rest + _ -> rest + +#else +-- We have 31-bit Chars and will simply use Show instances +-- of Char and String. + +pprHsChar :: Int -> SDoc +pprHsChar c = text (show (chr c)) + +pprHsString :: FastString -> SDoc +pprHsString fs = text (show (unpackFS fs)) + +#endif instance Show FastString where showsPrec p fs = showsPrecSDoc p (ppr fs) diff --git a/ghc/compiler/utils/UnicodeUtil.lhs b/ghc/compiler/utils/UnicodeUtil.lhs new file mode 100644 index 0000000..0123e67 --- /dev/null +++ b/ghc/compiler/utils/UnicodeUtil.lhs @@ -0,0 +1,46 @@ +Various Unicode-related utilities. + +\begin{code} +module UnicodeUtil( + stringToUtf8 + ) where + +#include "HsVersions.h" + +import Panic (panic) +import Char (chr, ord) +\end{code} + +\begin{code} +stringToUtf8 :: [Int] -> String +stringToUtf8 [] = "" +stringToUtf8 (c:s) + | c >= 1 && c <= 0x7F = chr c : stringToUtf8 s + | c < 0 = panic ("charToUtf8 ("++show c++")") + | c <= 0x7FF = chr (0xC0 + c `div` 0x40 ) : + chr (0x80 + c `mod` 0x40) : + stringToUtf8 s + | c <= 0xFFFF = chr (0xE0 + c `div` 0x1000 ) : + chr (0x80 + c `div` 0x40 `mod` 0x40) : + chr (0x80 + c `mod` 0x40) : + stringToUtf8 s + | c <= 0x1FFFFF = chr (0xF0 + c `div` 0x40000 ) : + chr (0x80 + c `div` 0x1000 `mod` 0x40) : + chr (0x80 + c `div` 0x40 `mod` 0x40) : + chr (0x80 + c `mod` 0x40) : + stringToUtf8 s + | c <= 0x3FFFFFF = chr (0xF8 + c `div` 0x1000000 ) : + chr (0x80 + c `div` 0x40000 `mod` 0x40) : + chr (0x80 + c `div` 0x1000 `mod` 0x40) : + chr (0x80 + c `div` 0x40 `mod` 0x40) : + chr (0x80 + c `mod` 0x40) : + stringToUtf8 s + | c <= 0x7FFFFFFF = chr (0xFC + c `div` 0x40000000 ) : + chr (0x80 + c `div` 0x1000000 `mod` 0x40) : + chr (0x80 + c `div` 0x40000 `mod` 0x40) : + chr (0x80 + c `div` 0x1000 `mod` 0x40) : + chr (0x80 + c `div` 0x40 `mod` 0x40) : + chr (0x80 + c `mod` 0x40) : + stringToUtf8 s + | otherwise = panic ("charToUtf8 "++show c) +\end{code} diff --git a/ghc/docs/users_guide/glasgow_exts.sgml b/ghc/docs/users_guide/glasgow_exts.sgml index c962f87..aa1c884 100644 --- a/ghc/docs/users_guide/glasgow_exts.sgml +++ b/ghc/docs/users_guide/glasgow_exts.sgml @@ -364,8 +364,8 @@ Literals for these types may be written as follows: 1# an Int# 1.2# a Float# 1.34## a Double# -'a'# a Char#; for weird characters, use '\o<octal>'# -"a"# an Addr# (a `char *') +'a'# a Char#; for weird characters, use e.g. '\o<octal>'# +"a"# an Addr# (a `char *'); only characters '\0'..'\255' allowed literals, primitive diff --git a/ghc/docs/users_guide/vs_haskell.sgml b/ghc/docs/users_guide/vs_haskell.sgml index 1f98fe8..38d1472 100644 --- a/ghc/docs/users_guide/vs_haskell.sgml +++ b/ghc/docs/users_guide/vs_haskell.sgml @@ -122,7 +122,7 @@ numeric types if this stuff keeps you awake at night. This code fragment should elicit a fatal error, but it does not: -main = print (array (1,1) [ 1:=2, 1:=3 ]) +main = print (array (1,1) [(1,2), (1,3)]) @@ -158,15 +158,6 @@ stuck on them. - -Unicode character set: - - -Haskell 98 embraces the Unicode character set, but GHC doesn't -handle it. Yet. - - - diff --git a/ghc/includes/ClosureMacros.h b/ghc/includes/ClosureMacros.h index c8baeee..6e3922e 100644 --- a/ghc/includes/ClosureMacros.h +++ b/ghc/includes/ClosureMacros.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: ClosureMacros.h,v 1.22 2000/05/26 10:14:33 sewardj Exp $ + * $Id: ClosureMacros.h,v 1.23 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 1998-1999 * @@ -142,8 +142,14 @@ extern void* DATA_SECTION_END_MARKER_DECL; #define LOOKS_LIKE_STATIC(r) (!(HEAP_ALLOCED(r))) /* Tiresome predicates needed to check for pointers into the closure tables */ -#define IS_CHARLIKE_CLOSURE(p) ( (P_)(p) >= (P_)CHARLIKE_closure && (char*)(p) <= ((char*)CHARLIKE_closure + 255 * sizeof(StgIntCharlikeClosure)) ) -#define IS_INTLIKE_CLOSURE(p) ( (P_)(p) >= (P_)INTLIKE_closure && (char*)(p) <= ((char*)INTLIKE_closure + 32 * sizeof(StgIntCharlikeClosure)) ) +#define IS_CHARLIKE_CLOSURE(p) \ + ( (P_)(p) >= (P_)CHARLIKE_closure && \ + (char*)(p) <= ((char*)CHARLIKE_closure + \ + (MAX_CHARLIKE-MIN_CHARLIKE) * sizeof(StgIntCharlikeClosure)) ) +#define IS_INTLIKE_CLOSURE(p) \ + ( (P_)(p) >= (P_)INTLIKE_closure && \ + (char*)(p) <= ((char*)INTLIKE_closure + \ + (MAX_INTLIKE-MIN_INTLIKE) * sizeof(StgIntCharlikeClosure)) ) #define LOOKS_LIKE_STATIC_CLOSURE(r) (((*(((unsigned long *)(r))-1)) == 0) || IS_CHARLIKE_CLOSURE(r) || IS_INTLIKE_CLOSURE(r)) #else @@ -332,7 +338,7 @@ static __inline__ StgWord tso_sizeW ( StgTSO *tso ) INTLIKE and CHARLIKE closures. -------------------------------------------------------------------------- */ -#define CHARLIKE_CLOSURE(n) ((P_)&CHARLIKE_closure[n]) +#define CHARLIKE_CLOSURE(n) ((P_)&CHARLIKE_closure[(n)-MIN_CHARLIKE]) #define INTLIKE_CLOSURE(n) ((P_)&INTLIKE_closure[(n)-MIN_INTLIKE]) /* ----------------------------------------------------------------------------- diff --git a/ghc/includes/Constants.h b/ghc/includes/Constants.h index bc6b162..9792a0e 100644 --- a/ghc/includes/Constants.h +++ b/ghc/includes/Constants.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: Constants.h,v 1.13 2000/08/03 11:28:35 simonmar Exp $ + * $Id: Constants.h,v 1.14 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 1998-1999 * @@ -166,11 +166,17 @@ #define MAX_VECTORED_RTN 8 -/*---- Range of built-in table of static small int-like closures. */ +/*---- Range of built-in table of static small int-like and char-like closures. */ -#define MAX_INTLIKE (16) +#define MAX_INTLIKE 16 #define MIN_INTLIKE (-16) +#define MAX_CHARLIKE 255 +#define MIN_CHARLIKE 0 + +/* You can change these constants (I hope) but be sure to modify + rts/StgMiscClosures.hs accordingly. */ + /*---- Minimum number of words left in heap after GC to carry on */ #define HEAP_HWM_WORDS 1024 diff --git a/ghc/includes/HsFFI.h b/ghc/includes/HsFFI.h index 17486b5..46a2776 100644 --- a/ghc/includes/HsFFI.h +++ b/ghc/includes/HsFFI.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: HsFFI.h,v 1.3 2000/04/13 15:37:11 panne Exp $ + * $Id: HsFFI.h,v 1.4 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 2000 * @@ -63,8 +63,8 @@ typedef void* HsForeignObj; /* ... and this StgForeignPtr */ typedef void* HsStablePtr; /* this should correspond to the type of StgChar in StgTypes.h */ -#define HS_CHAR_MIN (0) -#define HS_CHAR_MAX UINT8_MAX +#define HS_CHAR_MIN 0 +#define HS_CHAR_MAX INT32_MAX /* this mirrors the distinction of cases in StgTypes.h */ #if SIZEOF_VOID_P == 8 diff --git a/ghc/includes/PrimOps.h b/ghc/includes/PrimOps.h index 1e14fd6..3565707 100644 --- a/ghc/includes/PrimOps.h +++ b/ghc/includes/PrimOps.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: PrimOps.h,v 1.59 2000/07/21 09:11:19 rrt Exp $ + * $Id: PrimOps.h,v 1.60 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 1998-1999 * @@ -221,7 +221,8 @@ typedef union { #define int2Addrzh(r,a) r=(A_)(a) #define addr2Intzh(r,a) r=(I_)(a) -#define readCharOffAddrzh(r,a,i) r= ((C_ *)(a))[i] +#define readCharOffAddrzh(r,a,i) r= ((unsigned char *)(a))[i] +/* unsigned char is for compatibility: the index is still in bytes. */ #define readIntOffAddrzh(r,a,i) r= ((I_ *)(a))[i] #define readWordOffAddrzh(r,a,i) r= ((W_ *)(a))[i] #define readAddrOffAddrzh(r,a,i) r= ((PP_)(a))[i] @@ -233,7 +234,8 @@ typedef union { #define readWord64OffAddrzh(r,a,i) r= ((LW_ *)(a))[i] #endif -#define writeCharOffAddrzh(a,i,v) ((C_ *)(a))[i] = (v) +#define writeCharOffAddrzh(a,i,v) ((unsigned char *)(a))[i] = (unsigned char)(v) +/* unsigned char is for compatibility: the index is still in bytes. */ #define writeIntOffAddrzh(a,i,v) ((I_ *)(a))[i] = (v) #define writeWordOffAddrzh(a,i,v) ((W_ *)(a))[i] = (v) #define writeAddrOffAddrzh(a,i,v) ((PP_)(a))[i] = (v) @@ -246,7 +248,8 @@ typedef union { #define writeWord64OffAddrzh(a,i,v) ((LW_ *)(a))[i] = (v) #endif -#define indexCharOffAddrzh(r,a,i) r= ((C_ *)(a))[i] +#define indexCharOffAddrzh(r,a,i) r= ((unsigned char *)(a))[i] +/* unsigned char is for compatibility: the index is still in bytes. */ #define indexIntOffAddrzh(r,a,i) r= ((I_ *)(a))[i] #define indexWordOffAddrzh(r,a,i) r= ((W_ *)(a))[i] #define indexAddrOffAddrzh(r,a,i) r= ((PP_)(a))[i] @@ -542,7 +545,8 @@ extern I_ resetGenSymZh(void); /* result ("r") arg ignored in write macros! */ #define writeArrayzh(a,i,v) ((PP_) PTRS_ARR_CTS(a))[(i)]=(v) -#define writeCharArrayzh(a,i,v) ((C_ *)(BYTE_ARR_CTS(a)))[i] = (v) +#define writeCharArrayzh(a,i,v) ((unsigned char *)(BYTE_ARR_CTS(a)))[i] = (unsigned char)(v) +/* unsigned char is for compatibility: the index is still in bytes. */ #define writeIntArrayzh(a,i,v) ((I_ *)(BYTE_ARR_CTS(a)))[i] = (v) #define writeWordArrayzh(a,i,v) ((W_ *)(BYTE_ARR_CTS(a)))[i] = (v) #define writeAddrArrayzh(a,i,v) ((PP_)(BYTE_ARR_CTS(a)))[i] = (v) diff --git a/ghc/includes/RtsAPI.h b/ghc/includes/RtsAPI.h index 35a8df0..bddb06e 100644 --- a/ghc/includes/RtsAPI.h +++ b/ghc/includes/RtsAPI.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: RtsAPI.h,v 1.15 2000/06/27 09:18:04 sewardj Exp $ + * $Id: RtsAPI.h,v 1.16 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 1998-1999 * @@ -48,7 +48,7 @@ extern void getProgArgv ( int *argc, char **argv[] ); /* ---------------------------------------------------------------------------- Building Haskell objects from C datatypes. ------------------------------------------------------------------------- */ -HaskellObj rts_mkChar ( char c ); +HaskellObj rts_mkChar ( unsigned int c ); HaskellObj rts_mkInt ( int i ); HaskellObj rts_mkInt8 ( int i ); HaskellObj rts_mkInt16 ( int i ); @@ -71,7 +71,7 @@ HaskellObj rts_apply ( HaskellObj, HaskellObj ); /* ---------------------------------------------------------------------------- Deconstructing Haskell objects ------------------------------------------------------------------------- */ -char rts_getChar ( HaskellObj ); +unsigned int rts_getChar ( HaskellObj ); int rts_getInt ( HaskellObj ); int rts_getInt32 ( HaskellObj ); unsigned int rts_getWord ( HaskellObj ); diff --git a/ghc/includes/StgTicky.h b/ghc/includes/StgTicky.h index b508e8a..ee9d5d7 100644 --- a/ghc/includes/StgTicky.h +++ b/ghc/includes/StgTicky.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: StgTicky.h,v 1.9 1999/11/11 17:50:47 simonpj Exp $ + * $Id: StgTicky.h,v 1.10 2000/08/07 23:37:23 qrczak Exp $ * * (c) The AQUA project, Glasgow University, 1994-1997 * (c) The GHC Team, 1998-1999 @@ -142,8 +142,8 @@ typedef struct _StgEntCounter { arity:16, /* arity (static info) */ stk_args:16; /* # of args off stack */ /* (rest of args are in registers) */ - StgChar *str; /* name of the thing */ - StgChar *arg_kinds; /* info about the args types */ + char *str; /* name of the thing */ + char *arg_kinds; /* info about the args types */ I_ entry_count; /* Trips to fast entry code */ I_ slow_entry_count; /* Trips to slow entry code */ I_ allocs; /* number of allocations by this fun */ diff --git a/ghc/includes/StgTypes.h b/ghc/includes/StgTypes.h index e6b87f9..d0a8104 100644 --- a/ghc/includes/StgTypes.h +++ b/ghc/includes/StgTypes.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgTypes.h,v 1.12 2000/06/12 11:04:12 simonmar Exp $ + * $Id: StgTypes.h,v 1.13 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 1998-1999 * @@ -89,7 +89,7 @@ typedef void* StgAddr; * Other commonly-used STG datatypes. */ -typedef StgWord8 StgChar; +typedef StgWord32 StgChar; typedef int StgBool; /* * If a double fits in an StgWord, don't bother using floats. @@ -160,7 +160,8 @@ typedef StgFunPtr StgFun(void); typedef union { StgWord w; StgAddr a; - StgWord c; + StgChar c; + StgInt8 i8; StgFloat f; StgInt i; StgPtr p; diff --git a/ghc/lib/std/PrelBase.lhs b/ghc/lib/std/PrelBase.lhs index 235c41b..f79e788 100644 --- a/ghc/lib/std/PrelBase.lhs +++ b/ghc/lib/std/PrelBase.lhs @@ -1,5 +1,5 @@ % ----------------------------------------------------------------------------- -% $Id: PrelBase.lhs,v 1.34 2000/08/02 14:13:27 rrt Exp $ +% $Id: PrelBase.lhs,v 1.35 2000/08/07 23:37:23 qrczak Exp $ % % (c) The University of Glasgow, 1992-2000 % @@ -124,14 +124,14 @@ otherwise = True build = error "urk" foldr = error "urk" -unpackCString# :: Addr# -> [Char] -unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a +unpackCString# :: Addr# -> [Char] +unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a unpackAppendCString# :: Addr# -> [Char] -> [Char] -unpackNBytes# :: Addr# -> Int# -> [Char] -unpackNBytes# a b = error "urk" +unpackCStringUtf8# :: Addr# -> [Char] unpackCString# a = error "urk" unpackFoldrCString# a = error "urk" unpackAppendCString# a = error "urk" +unpackCStringUtf8# a = error "urk" -} \end{code} @@ -185,7 +185,7 @@ class (Eq a) => Ord a where \begin{code} class Functor f where - fmap :: (a -> b) -> f a -> f b + fmap :: (a -> b) -> f a -> f b class Monad m where (>>=) :: m a -> (a -> m b) -> m b @@ -440,7 +440,11 @@ instance Ord Char where (C# c1) < (C# c2) = c1 `ltChar#` c2 chr :: Int -> Char -chr (I# i) | i >=# 0# && i <=# 255# = C# (chr# i) +chr (I# i) | i >=# 0# +#if INT_SIZE_IN_BYTES > 4 + && i <=# 0x7FFFFFFF# +#endif + = C# (chr# i) | otherwise = error ("Prelude.chr: bad argument") unsafeChr :: Int -> Char @@ -623,10 +627,10 @@ This code is needed for virtually all programs, since it's used for unpacking the strings of error messages. \begin{code} -unpackCString# :: Addr# -> [Char] +unpackCString# :: Addr# -> [Char] unpackCString# a = unpackCStringList# a -unpackCStringList# :: Addr# -> [Char] +unpackCStringList# :: Addr# -> [Char] unpackCStringList# addr = unpack 0# where @@ -646,7 +650,7 @@ unpackAppendCString# addr rest where ch = indexCharOffAddr# addr nh -unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a +unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a unpackFoldrCString# addr f z = unpack 0# where @@ -656,11 +660,42 @@ unpackFoldrCString# addr f z where ch = indexCharOffAddr# addr nh -unpackNBytes# :: Addr# -> Int# -> [Char] - -- This one is called by the compiler to unpack literal - -- strings with NULs in them; rare. It's strict! - -- We don't try to do list deforestation for this one +unpackCStringUtf8# :: Addr# -> [Char] +unpackCStringUtf8# addr + = unpack 0# + where + unpack nh + | ch `eqChar#` '\0'# = [] + | ch `leChar#` '\x7F'# = C# ch : unpack (nh +# 1#) + | ch `leChar#` '\xDF'# = C# (chr# ((ord# ch `iShiftL#` 6#) +# + (ord# (indexCharOffAddr# addr (nh +# 1#))) -# 0x3080#)) + : unpack (nh +# 2#) + | ch `leChar#` '\xEF'# = C# (chr# ((ord# ch `iShiftL#` 12#) +# + (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#` 6#) +# + (ord# (indexCharOffAddr# addr (nh +# 2#))) -# 0xE2080#)) + : unpack (nh +# 3#) + | ch `leChar#` '\xF7'# = C# (chr# ((ord# ch `iShiftL#` 18#) +# + (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#` 12#) +# + (ord# (indexCharOffAddr# addr (nh +# 2#)) `iShiftL#` 6#) +# + (ord# (indexCharOffAddr# addr (nh +# 3#))) -# 0x3C82080#)) + : unpack (nh +# 4#) + | ch `leChar#` '\xFB'# = C# (chr# ((ord# ch `iShiftL#` 24#) +# + (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#` 18#) +# + (ord# (indexCharOffAddr# addr (nh +# 2#)) `iShiftL#` 12#) +# + (ord# (indexCharOffAddr# addr (nh +# 3#)) `iShiftL#` 6#) +# + (ord# (indexCharOffAddr# addr (nh +# 4#))) -# 0xFA082080#)) + : unpack (nh +# 5#) + | otherwise = C# (chr# (((ord# ch -# 0xFC#) `iShiftL#` 30#) +# + (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#` 24#) +# + (ord# (indexCharOffAddr# addr (nh +# 2#)) `iShiftL#` 18#) +# + (ord# (indexCharOffAddr# addr (nh +# 3#)) `iShiftL#` 12#) +# + (ord# (indexCharOffAddr# addr (nh +# 4#)) `iShiftL#` 6#) +# + (ord# (indexCharOffAddr# addr (nh +# 5#))) -# 0x82082080#)) + : unpack (nh +# 6#) + where + ch = indexCharOffAddr# addr nh +unpackNBytes# :: Addr# -> Int# -> [Char] unpackNBytes# _addr 0# = [] unpackNBytes# addr len# = unpack [] (len# -# 1#) where diff --git a/ghc/lib/std/PrelEnum.lhs b/ghc/lib/std/PrelEnum.lhs index c941e86..62f3167 100644 --- a/ghc/lib/std/PrelEnum.lhs +++ b/ghc/lib/std/PrelEnum.lhs @@ -1,5 +1,5 @@ % ----------------------------------------------------------------------------- -% $Id: PrelEnum.lhs,v 1.11 2000/06/30 13:39:35 simonmar Exp $ +% $Id: PrelEnum.lhs,v 1.12 2000/08/07 23:37:23 qrczak Exp $ % % (c) The University of Glasgow, 1992-2000 % @@ -180,11 +180,11 @@ instance Enum Ordering where \begin{code} instance Bounded Char where minBound = '\0' - maxBound = '\255' + maxBound = '\x7FFFFFFF' instance Enum Char where succ (C# c#) - | not (ord# c# ==# 255#) = C# (chr# (ord# c# +# 1#)) + | not (ord# c# ==# 0x7FFFFFFF#) = C# (chr# (ord# c# +# 1#)) | otherwise = error ("Prelude.Enum.Char.succ: bad argument") pred (C# c#) | not (ord# c# ==# 0#) = C# (chr# (ord# c# -# 1#)) @@ -194,7 +194,7 @@ instance Enum Char where fromEnum = ord {-# INLINE enumFrom #-} - enumFrom (C# x) = eftChar (ord# x) 255# + enumFrom (C# x) = eftChar (ord# x) 0x7FFFFFFF# -- Blarg: technically I guess enumFrom isn't strict! {-# INLINE enumFromTo #-} @@ -235,13 +235,13 @@ eftCharList x y | x ># y = [] -- For enumFromThenTo we give up on inlining efdCharFB c n x1 x2 - | delta >=# 0# = go_up_char_fb c n x1 delta 255# + | delta >=# 0# = go_up_char_fb c n x1 delta 0x7FFFFFFF# | otherwise = go_dn_char_fb c n x1 delta 0# where delta = x2 -# x1 efdCharList x1 x2 - | delta >=# 0# = go_up_char_list x1 delta 255# + | delta >=# 0# = go_up_char_list x1 delta 0x7FFFFFFF# | otherwise = go_dn_char_list x1 delta 0# where delta = x2 -# x1 diff --git a/ghc/lib/std/PrelRead.lhs b/ghc/lib/std/PrelRead.lhs index ac96ecb..a3bea26 100644 --- a/ghc/lib/std/PrelRead.lhs +++ b/ghc/lib/std/PrelRead.lhs @@ -1,5 +1,5 @@ % ------------------------------------------------------------------------------ -% $Id: PrelRead.lhs,v 1.15 2000/06/30 13:39:36 simonmar Exp $ +% $Id: PrelRead.lhs,v 1.16 2000/08/07 23:37:23 qrczak Exp $ % % (c) The University of Glasgow, 1994-2000 % @@ -14,7 +14,7 @@ Instances of the Read class. module PrelRead where import PrelErr ( error ) -import PrelEnum ( Enum(..) ) +import PrelEnum ( Enum(..), maxBound ) import PrelNum import PrelReal import PrelFloat @@ -272,7 +272,7 @@ lexLitChar ('\\':s) = do fromAsciiLab (x:y:ls) | isUpper y && [x,y] `elem` asciiEscTab = return ([x,y], ls) fromAsciiLab _ = mzero - + asciiEscTab = "DEL" : asciiTab {- @@ -284,8 +284,7 @@ lexLitChar ('\\':s) = do -} checkSize base f str = do (num, res) <- f str - -- Note: this is assumes that a Char is 8 bits long. - if (toAnInt base num) > 255 then + if toAnInteger base num > toInteger (ord maxBound) then mzero else case base of @@ -293,7 +292,7 @@ lexLitChar ('\\':s) = do 16 -> return ('x':num, res) _ -> return (num, res) - toAnInt base xs = foldl (\ acc n -> acc*base + n) 0 (map digitToInt xs) + toAnInteger base = foldl (\ acc n -> acc*base + toInteger (digitToInt n)) 0 lexLitChar (c:s) = return ([c],s) diff --git a/ghc/lib/std/cbits/filePutc.c b/ghc/lib/std/cbits/filePutc.c index f9bacd6..9ca8083 100644 --- a/ghc/lib/std/cbits/filePutc.c +++ b/ghc/lib/std/cbits/filePutc.c @@ -1,7 +1,7 @@ /* * (c) The GRASP/AQUA Project, Glasgow University, 1994-1998 * - * $Id: filePutc.c,v 1.11 1999/12/08 15:47:07 simonmar Exp $ + * $Id: filePutc.c,v 1.12 2000/08/07 23:37:23 qrczak Exp $ * * hPutChar Runtime Support */ @@ -24,6 +24,7 @@ filePutc(StgForeignPtr ptr, StgChar c) { IOFileObject* fo = (IOFileObject*)ptr; int rc = 0; + unsigned char byte = (unsigned char) c; /* What filePutc needs to do: @@ -46,6 +47,9 @@ filePutc(StgForeignPtr ptr, StgChar c) flush(i.e., empty) the buffer first. (We could be smarter about this, but aren't!) + Only the lower 8 bits of a character are written. The data are supposed + to be already converted to the stream's 8-bit encoding. + */ if ( FILEOBJ_READABLE(fo) && FILEOBJ_JUST_READ(fo) ) { @@ -60,7 +64,7 @@ filePutc(StgForeignPtr ptr, StgChar c) ; } else { /* We're buffered, add it to the pack */ - ((char*)fo->buf)[fo->bufWPtr] = (char)c; + ((unsigned char*)fo->buf)[fo->bufWPtr] = byte; fo->bufWPtr++; /* If the buffer filled up as a result, *or* the added character terminated a line @@ -79,10 +83,10 @@ filePutc(StgForeignPtr ptr, StgChar c) while ((rc = ( #ifdef USE_WINSOCK fo->flags & FILEOBJ_WINSOCK ? - send(fo->fd, &c, 1, 0) : - write(fo->fd, &c, 1))) <= 0) { + send(fo->fd, &byte, 1, 0) : + write(fo->fd, &byte, 1))) <= 0) { #else - write(fo->fd, &c, 1))) <= 0) { + write(fo->fd, &byte, 1))) <= 0) { #endif if ( rc == -1 && errno == EAGAIN) { diff --git a/ghc/rts/ForeignCall.c b/ghc/rts/ForeignCall.c index a6a4646..5b1a0c6 100644 --- a/ghc/rts/ForeignCall.c +++ b/ghc/rts/ForeignCall.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- - * $Id: ForeignCall.c,v 1.17 2000/05/26 10:14:34 sewardj Exp $ + * $Id: ForeignCall.c,v 1.18 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team 1994-1999. * @@ -431,7 +431,7 @@ unpackArgsAndCallHaskell_x86_nocallconv_wrk ( StgStablePtr stableptr, while (*argp) { switch (*argp) { case CHAR_REP: - node = rts_apply ( node, rts_mkChar ( *(char*)args ) ); + node = rts_apply ( node, rts_mkChar ( *(unsigned int*)args ) ); args += 4; break; case INT_REP: diff --git a/ghc/rts/Prelude.c b/ghc/rts/Prelude.c index 154b046..5f8ed6d 100644 --- a/ghc/rts/Prelude.c +++ b/ghc/rts/Prelude.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- - * $Id: Prelude.c,v 1.8 2000/06/15 13:23:52 daan Exp $ + * $Id: Prelude.c,v 1.9 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 1998-2000 * @@ -193,10 +193,10 @@ void fixupRTStoPreludeRefs ( void*(*ask_hugs_dynamic_linker)(char*) ) { int i; - for(i=0;i<=255;i++) + for(i=0; i<=MAX_CHARLIKE-MIN_CHARLIKE; i++) (CHARLIKE_closure[i]).header.info = Czh_static_info; - for(i=0;i<=32;i++) + for(i=0; i<=MAX_INTLIKE-MIN_INTLIKE; i++) (INTLIKE_closure[i]).header.info = Izh_static_info; } #endif diff --git a/ghc/rts/PrimOps.hc b/ghc/rts/PrimOps.hc index e288b32..015e34a 100644 --- a/ghc/rts/PrimOps.hc +++ b/ghc/rts/PrimOps.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: PrimOps.hc,v 1.52 2000/05/10 11:02:00 simonmar Exp $ + * $Id: PrimOps.hc,v 1.53 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 1998-2000 * @@ -263,13 +263,14 @@ W_ GHC_ZCCReturnable_static_info[0]; FE_ \ } -newByteArray(Char, sizeof(C_)) -newByteArray(Int, sizeof(I_)); -newByteArray(Word, sizeof(W_)); -newByteArray(Addr, sizeof(P_)); -newByteArray(Float, sizeof(StgFloat)); -newByteArray(Double, sizeof(StgDouble)); -newByteArray(StablePtr, sizeof(StgStablePtr)); +newByteArray(Char, 1) +/* Char arrays really contain only 8-bit bytes for compatibility. */ +newByteArray(Int, sizeof(I_)) +newByteArray(Word, sizeof(W_)) +newByteArray(Addr, sizeof(P_)) +newByteArray(Float, sizeof(StgFloat)) +newByteArray(Double, sizeof(StgDouble)) +newByteArray(StablePtr, sizeof(StgStablePtr)) FN_(newArrayzh_fast) { diff --git a/ghc/rts/RtsAPI.c b/ghc/rts/RtsAPI.c index 992be89..f22ec7e 100644 --- a/ghc/rts/RtsAPI.c +++ b/ghc/rts/RtsAPI.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: RtsAPI.c,v 1.17 2000/04/26 10:17:41 simonmar Exp $ + * $Id: RtsAPI.c,v 1.18 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 1998-2000 * @@ -19,11 +19,11 @@ Building Haskell objects from C datatypes. ------------------------------------------------------------------------- */ HaskellObj -rts_mkChar (char c) +rts_mkChar (unsigned int c) { StgClosure *p = (StgClosure *)allocate(CONSTR_sizeW(0,1)); p->header.info = Czh_con_info; - p->payload[0] = (StgClosure *)((StgInt)c); + p->payload[0] = (StgClosure *)(StgChar)c; return p; } @@ -207,12 +207,12 @@ rts_apply (HaskellObj f, HaskellObj arg) Deconstructing Haskell objects ------------------------------------------------------------------------- */ -char +unsigned int rts_getChar (HaskellObj p) { if ( p->header.info == Czh_con_info || p->header.info == Czh_static_info) { - return (char)(StgWord)(p->payload[0]); + return (StgChar)(StgWord)(p->payload[0]); } else { barf("getChar: not a Char"); } diff --git a/ghc/rts/StgCRun.c b/ghc/rts/StgCRun.c index 09a8016..ad567da 100644 --- a/ghc/rts/StgCRun.c +++ b/ghc/rts/StgCRun.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgCRun.c,v 1.18 2000/04/17 14:46:31 sewardj Exp $ + * $Id: StgCRun.c,v 1.19 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 1998-2000 * @@ -238,7 +238,7 @@ EXTFUN(StgReturn) StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg) { - StgChar space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ]; + unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ]; StgThreadReturnCode r; __asm__ volatile ( @@ -321,7 +321,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { StgThreadReturnCode StgRun(StgFunPtr f, StgRegTable *basereg) { - StgChar space[RESERVED_C_STACK_BYTES+sizeof(void *)]; + unsigned char space[RESERVED_C_STACK_BYTES+sizeof(void *)]; register void *i7 __asm__("%i7"); ((void **)(space))[100] = i7; f(); diff --git a/ghc/rts/StrHash.c b/ghc/rts/StrHash.c index 231eaa7..7b6e66b 100644 --- a/ghc/rts/StrHash.c +++ b/ghc/rts/StrHash.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StrHash.c,v 1.1 2000/04/05 15:32:08 simonmar Exp $ + * $Id: StrHash.c,v 1.2 2000/08/07 23:37:23 qrczak Exp $ * * (c) The GHC Team, 1994-2000 * @@ -15,39 +15,39 @@ #include "Rts.h" #include "StrHash.h" -static const StgChar auxTable[] = { -(StgChar)0x01, (StgChar)0x57, (StgChar)0x31, (StgChar)0x0c, (StgChar)0xb0, (StgChar)0xb2, (StgChar)0x66, (StgChar)0xa6, -(StgChar)0x79, (StgChar)0xc1, (StgChar)0x06, (StgChar)0x54, (StgChar)0xf9, (StgChar)0xe6, (StgChar)0x2c, (StgChar)0xa3, -(StgChar)0x0e, (StgChar)0xc5, (StgChar)0xd5, (StgChar)0xb5, (StgChar)0xa1, (StgChar)0x55, (StgChar)0xda, (StgChar)0x50, -(StgChar)0x40, (StgChar)0xef, (StgChar)0x18, (StgChar)0xe2, (StgChar)0xec, (StgChar)0x8e, (StgChar)0x26, (StgChar)0xc8, -(StgChar)0x6e, (StgChar)0xb1, (StgChar)0x68, (StgChar)0x67, (StgChar)0x8d, (StgChar)0xfd, (StgChar)0xff, (StgChar)0x32, -(StgChar)0x4d, (StgChar)0x65, (StgChar)0x51, (StgChar)0x12, (StgChar)0x2d, (StgChar)0x60, (StgChar)0x1f, (StgChar)0xde, -(StgChar)0x19, (StgChar)0x6b, (StgChar)0xbe, (StgChar)0x46, (StgChar)0x56, (StgChar)0xed, (StgChar)0xf0, (StgChar)0x22, -(StgChar)0x48, (StgChar)0xf2, (StgChar)0x14, (StgChar)0xd6, (StgChar)0xf4, (StgChar)0xe3, (StgChar)0x95, (StgChar)0xeb, -(StgChar)0x61, (StgChar)0xea, (StgChar)0x39, (StgChar)0x16, (StgChar)0x3c, (StgChar)0xfa, (StgChar)0x52, (StgChar)0xaf, -(StgChar)0xd0, (StgChar)0x05, (StgChar)0x7f, (StgChar)0xc7, (StgChar)0x6f, (StgChar)0x3e, (StgChar)0x87, (StgChar)0xf8, -(StgChar)0xae, (StgChar)0xa9, (StgChar)0xd3, (StgChar)0x3a, (StgChar)0x42, (StgChar)0x9a, (StgChar)0x6a, (StgChar)0xc3, -(StgChar)0xf5, (StgChar)0xab, (StgChar)0x11, (StgChar)0xbb, (StgChar)0xb6, (StgChar)0xb3, (StgChar)0x00, (StgChar)0xf3, -(StgChar)0x84, (StgChar)0x38, (StgChar)0x94, (StgChar)0x4b, (StgChar)0x80, (StgChar)0x85, (StgChar)0x9e, (StgChar)0x64, -(StgChar)0x82, (StgChar)0x7e, (StgChar)0x5b, (StgChar)0x0d, (StgChar)0x99, (StgChar)0xf6, (StgChar)0xd8, (StgChar)0xdb, -(StgChar)0x77, (StgChar)0x44, (StgChar)0xdf, (StgChar)0x4e, (StgChar)0x53, (StgChar)0x58, (StgChar)0xc9, (StgChar)0x63, -(StgChar)0x7a, (StgChar)0x0b, (StgChar)0x5c, (StgChar)0x20, (StgChar)0x88, (StgChar)0x72, (StgChar)0x34, (StgChar)0x0a, -(StgChar)0x8a, (StgChar)0x1e, (StgChar)0x30, (StgChar)0xb7, (StgChar)0x9c, (StgChar)0x23, (StgChar)0x3d, (StgChar)0x1a, -(StgChar)0x8f, (StgChar)0x4a, (StgChar)0xfb, (StgChar)0x5e, (StgChar)0x81, (StgChar)0xa2, (StgChar)0x3f, (StgChar)0x98, -(StgChar)0xaa, (StgChar)0x07, (StgChar)0x73, (StgChar)0xa7, (StgChar)0xf1, (StgChar)0xce, (StgChar)0x03, (StgChar)0x96, -(StgChar)0x37, (StgChar)0x3b, (StgChar)0x97, (StgChar)0xdc, (StgChar)0x5a, (StgChar)0x35, (StgChar)0x17, (StgChar)0x83, -(StgChar)0x7d, (StgChar)0xad, (StgChar)0x0f, (StgChar)0xee, (StgChar)0x4f, (StgChar)0x5f, (StgChar)0x59, (StgChar)0x10, -(StgChar)0x69, (StgChar)0x89, (StgChar)0xe1, (StgChar)0xe0, (StgChar)0xd9, (StgChar)0xa0, (StgChar)0x25, (StgChar)0x7b, -(StgChar)0x76, (StgChar)0x49, (StgChar)0x02, (StgChar)0x9d, (StgChar)0x2e, (StgChar)0x74, (StgChar)0x09, (StgChar)0x91, -(StgChar)0x86, (StgChar)0xe4, (StgChar)0xcf, (StgChar)0xd4, (StgChar)0xca, (StgChar)0xd7, (StgChar)0x45, (StgChar)0xe5, -(StgChar)0x1b, (StgChar)0xbc, (StgChar)0x43, (StgChar)0x7c, (StgChar)0xa8, (StgChar)0xfc, (StgChar)0x2a, (StgChar)0x04, -(StgChar)0x1d, (StgChar)0x6c, (StgChar)0x15, (StgChar)0xf7, (StgChar)0x13, (StgChar)0xcd, (StgChar)0x27, (StgChar)0xcb, -(StgChar)0xe9, (StgChar)0x28, (StgChar)0xba, (StgChar)0x93, (StgChar)0xc6, (StgChar)0xc0, (StgChar)0x9b, (StgChar)0x21, -(StgChar)0xa4, (StgChar)0xbf, (StgChar)0x62, (StgChar)0xcc, (StgChar)0xa5, (StgChar)0xb4, (StgChar)0x75, (StgChar)0x4c, -(StgChar)0x8c, (StgChar)0x24, (StgChar)0xd2, (StgChar)0xac, (StgChar)0x29, (StgChar)0x36, (StgChar)0x9f, (StgChar)0x08, -(StgChar)0xb9, (StgChar)0xe8, (StgChar)0x71, (StgChar)0xc4, (StgChar)0xe7, (StgChar)0x2f, (StgChar)0x92, (StgChar)0x78, -(StgChar)0x33, (StgChar)0x41, (StgChar)0x1c, (StgChar)0x90, (StgChar)0xfe, (StgChar)0xdd, (StgChar)0x5d, (StgChar)0xbd, -(StgChar)0xc2, (StgChar)0x8b, (StgChar)0x70, (StgChar)0x2b, (StgChar)0x47, (StgChar)0x6d, (StgChar)0xb8, (StgChar)0xd1}; +static const unsigned char auxTable[] = { +0x01, 0x57, 0x31, 0x0c, 0xb0, 0xb2, 0x66, 0xa6, +0x79, 0xc1, 0x06, 0x54, 0xf9, 0xe6, 0x2c, 0xa3, +0x0e, 0xc5, 0xd5, 0xb5, 0xa1, 0x55, 0xda, 0x50, +0x40, 0xef, 0x18, 0xe2, 0xec, 0x8e, 0x26, 0xc8, +0x6e, 0xb1, 0x68, 0x67, 0x8d, 0xfd, 0xff, 0x32, +0x4d, 0x65, 0x51, 0x12, 0x2d, 0x60, 0x1f, 0xde, +0x19, 0x6b, 0xbe, 0x46, 0x56, 0xed, 0xf0, 0x22, +0x48, 0xf2, 0x14, 0xd6, 0xf4, 0xe3, 0x95, 0xeb, +0x61, 0xea, 0x39, 0x16, 0x3c, 0xfa, 0x52, 0xaf, +0xd0, 0x05, 0x7f, 0xc7, 0x6f, 0x3e, 0x87, 0xf8, +0xae, 0xa9, 0xd3, 0x3a, 0x42, 0x9a, 0x6a, 0xc3, +0xf5, 0xab, 0x11, 0xbb, 0xb6, 0xb3, 0x00, 0xf3, +0x84, 0x38, 0x94, 0x4b, 0x80, 0x85, 0x9e, 0x64, +0x82, 0x7e, 0x5b, 0x0d, 0x99, 0xf6, 0xd8, 0xdb, +0x77, 0x44, 0xdf, 0x4e, 0x53, 0x58, 0xc9, 0x63, +0x7a, 0x0b, 0x5c, 0x20, 0x88, 0x72, 0x34, 0x0a, +0x8a, 0x1e, 0x30, 0xb7, 0x9c, 0x23, 0x3d, 0x1a, +0x8f, 0x4a, 0xfb, 0x5e, 0x81, 0xa2, 0x3f, 0x98, +0xaa, 0x07, 0x73, 0xa7, 0xf1, 0xce, 0x03, 0x96, +0x37, 0x3b, 0x97, 0xdc, 0x5a, 0x35, 0x17, 0x83, +0x7d, 0xad, 0x0f, 0xee, 0x4f, 0x5f, 0x59, 0x10, +0x69, 0x89, 0xe1, 0xe0, 0xd9, 0xa0, 0x25, 0x7b, +0x76, 0x49, 0x02, 0x9d, 0x2e, 0x74, 0x09, 0x91, +0x86, 0xe4, 0xcf, 0xd4, 0xca, 0xd7, 0x45, 0xe5, +0x1b, 0xbc, 0x43, 0x7c, 0xa8, 0xfc, 0x2a, 0x04, +0x1d, 0x6c, 0x15, 0xf7, 0x13, 0xcd, 0x27, 0xcb, +0xe9, 0x28, 0xba, 0x93, 0xc6, 0xc0, 0x9b, 0x21, +0xa4, 0xbf, 0x62, 0xcc, 0xa5, 0xb4, 0x75, 0x4c, +0x8c, 0x24, 0xd2, 0xac, 0x29, 0x36, 0x9f, 0x08, +0xb9, 0xe8, 0x71, 0xc4, 0xe7, 0x2f, 0x92, 0x78, +0x33, 0x41, 0x1c, 0x90, 0xfe, 0xdd, 0x5d, 0xbd, +0xc2, 0x8b, 0x70, 0x2b, 0x47, 0x6d, 0xb8, 0xd1}; hash_t hash_str(char *str) diff --git a/ghc/rts/parallel/Pack.c b/ghc/rts/parallel/Pack.c index 6f6ad59..72c66bf 100644 --- a/ghc/rts/parallel/Pack.c +++ b/ghc/rts/parallel/Pack.c @@ -1,6 +1,6 @@ /* Time-stamp: - $Id: Pack.c,v 1.4 2000/03/31 03:09:37 hwloidl Exp $ + $Id: Pack.c,v 1.5 2000/08/07 23:37:24 qrczak Exp $ Graph packing and unpacking code for sending it to another processor and retrieving the original graph structure from the packet. @@ -722,12 +722,21 @@ StgClosure *closure; switch (info->type) { case CONSTR_CHARLIKE: - IF_PAR_DEBUG(pack, - belch("*>^^ Packing a charlike closure %d", - ((StgIntCharlikeClosure*)closure)->data)); - - PackPLC((StgPtr)CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)closure)->data)); - return; + { + StgChar val = ((StgIntCharlikeClosure*)closure)->data; + + if ((val <= MAX_CHARLIKE) && (val >= MIN_CHARLIKE)) { + IF_PAR_DEBUG(pack, + belch("*>^^ Packing a small charlike %d as a PLC", val)); + PackPLC((StgPtr)CHARLIKE_CLOSURE(val)); + } else { + IF_PAR_DEBUG(pack, + belch("*>^^ Packing a big charlike %d as a normal closure", + val)); + PackGeneric(closure); + } + return; + } case CONSTR_INTLIKE: { @@ -737,14 +746,13 @@ StgClosure *closure; IF_PAR_DEBUG(pack, belch("*>^^ Packing a small intlike %d as a PLC", val)); PackPLC((StgPtr)INTLIKE_CLOSURE(val)); - return; } else { IF_PAR_DEBUG(pack, belch("*>^^ Packing a big intlike %d as a normal closure", val)); PackGeneric(closure); - return; } + return; } case CONSTR: @@ -1200,13 +1208,23 @@ PackPAP(StgPAP *pap) { /* distinguish static closure (PLC) from other closures (FM) */ switch (get_itbl((StgClosure*)q)->type) { case CONSTR_CHARLIKE: - IF_PAR_DEBUG(pack, - belch("*>** PackPAP: packing a charlike closure %d", - ((StgIntCharlikeClosure*)q)->data)); - - PackPLC((StgPtr)CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)q)->data)); - p++; - break; + { + StgChar val = ((StgIntCharlikeClosure*)q)->data; + + if ((val <= MAX_CHARLIKE) && (val >= MIN_CHARLIKE)) { + IF_PAR_DEBUG(pack, + belch("*>** PackPAP: Packing ptr to a small charlike %d as a PLC", val)); + PackPLC((StgPtr)CHARLIKE_CLOSURE(val)); + } else { + IF_PAR_DEBUG(pack, + belch("*>** PackPAP: Packing a ptr to a big charlike %d as a FM", + val)); + Pack((StgWord)(ARGTAG_MAX+1)); + PackFetchMe((StgClosure *)q); + } + p++; + break; + } case CONSTR_INTLIKE: { @@ -1216,17 +1234,15 @@ PackPAP(StgPAP *pap) { IF_PAR_DEBUG(pack, belch("*>** PackPAP: Packing ptr to a small intlike %d as a PLC", val)); PackPLC((StgPtr)INTLIKE_CLOSURE(val)); - p++; - break; } else { IF_PAR_DEBUG(pack, belch("*>** PackPAP: Packing a ptr to a big intlike %d as a FM", val)); Pack((StgWord)(ARGTAG_MAX+1)); PackFetchMe((StgClosure *)q); - p++; - break; } + p++; + break; } case THUNK_STATIC: // ToDo: check whether that's ok case FUN_STATIC: // ToDo: check whether that's ok