[project @ 2000-08-07 23:37:19 by qrczak]

author qrczak <unknown>

Mon, 7 Aug 2000 23:37:24 +0000 (23:37 +0000)

committer qrczak <unknown>

Mon, 7 Aug 2000 23:37:24 +0000 (23:37 +0000)
author qrczak <unknown>
Mon, 7 Aug 2000 23:37:24 +0000 (23:37 +0000)
committer qrczak <unknown>
Mon, 7 Aug 2000 23:37:24 +0000 (23:37 +0000)
diff --git a/ghc/compiler/HsVersions.h b/ghc/compiler/HsVersions.h

index b90e474..00714cd 100644 (file)
--- a/ghc/compiler/HsVersions.h
+++ b/ghc/compiler/HsVersions.h
@@ -155,13 +155,17 @@ import qualified FastString
  # define _NIL_         (FastString.mkFastString "")
  # define _CONS_                FastString.consFS
  # define _HEAD_                FastString.headFS
+# define _HEAD_INT_    FastString.headIntFS
  # define _TAIL_                FastString.tailFS
  # define _LENGTH_      FastString.lengthFS
  # define _PK_          FastString.mkFastString
+# define _PK_INT_      FastString.mkFastStringInt
  # define _UNPK_                FastString.unpackFS
+# define _UNPK_INT_    FastString.unpackIntFS
  # define _APPEND_      `FastString.appendFS`
  # define _CONCAT_      FastString.concatFS
  #else
+# error I think that FastString is now always used. If not, fix this.
  # define FAST_STRING String
  # define SLIT(x)      (x)
  # define _CMP_STRING_ cmpString
diff --git a/ghc/compiler/absCSyn/AbsCSyn.lhs b/ghc/compiler/absCSyn/AbsCSyn.lhs

index eac8a27..5cf12fc 100644 (file)
--- a/ghc/compiler/absCSyn/AbsCSyn.lhs
+++ b/ghc/compiler/absCSyn/AbsCSyn.lhs
@@ -1,7 +1,7 @@
  %
  % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
  %
-% $Id: AbsCSyn.lhs,v 1.32 2000/08/02 14:13:26 rrt Exp $
+% $Id: AbsCSyn.lhs,v 1.33 2000/08/07 23:37:19 qrczak Exp $
  %
  \section[AbstractC]{Abstract C: the last stop before machine code}
  
@@ -315,7 +315,7 @@ data CAddrMode
  
    | CCharLike CAddrMode        -- The address of a static char-like closure for
                         -- the specified character.  It is guaranteed to be in
-                       -- the range 0..255.
+                       -- the range mIN_CHARLIKE..mAX_CHARLIKE
  
    | CIntLike CAddrMode -- The address of a static int-like closure for the
                         -- specified small integer.  It is guaranteed to be in
diff --git a/ghc/compiler/absCSyn/CStrings.lhs b/ghc/compiler/absCSyn/CStrings.lhs

index e231993..6f2a0e3 100644 (file)
--- a/ghc/compiler/absCSyn/CStrings.lhs
+++ b/ghc/compiler/absCSyn/CStrings.lhs
@@ -6,8 +6,7 @@ module CStrings(
  
         cSEP, pp_cSEP,
  
-       stringToC, charToC, pprFSInCStyle, pprStringInCStyle,
-       charToEasyHaskell
+       pprFSInCStyle, pprStringInCStyle
    ) where
  
  #include "HsVersions.h"
@@ -36,64 +35,20 @@ pp_cSEP = char '_'
  
  \begin{code}
  pprFSInCStyle :: FAST_STRING -> SDoc
-pprFSInCStyle fs = doubleQuotes (text (stringToC (_UNPK_ fs)))
+-- Assumes it contains only characters '\0'..'\xFF'!
+pprFSInCStyle fs = pprStringInCStyle (_UNPK_ fs)
  
  pprStringInCStyle :: String -> SDoc
-pprStringInCStyle s = doubleQuotes (text (stringToC s))
-
-stringToC   :: String -> String
--- Convert a string to the form required by C in a C literal string
--- Tthe hassle is what to do w/ strings like "ESC 0"...
-stringToC ""  = ""
-stringToC [c] = charToC c
-stringToC (c:cs)
-    -- if we have something "octifiable" in "c", we'd better "octify"
-    -- the rest of the string, too.
-  = if (c < ' ' || c > '~')
-    then (charToC c) ++ (concat (map char_to_C cs))
-    else (charToC c) ++ (stringToC cs)
-  where
-    char_to_C c | c == '\n' = "\\n"    -- use C escapes when we can
-               | c == '\a' = "\\a"
-               | c == '\b' = "\\b"     -- ToDo: chk some of these...
-               | c == '\r' = "\\r"
-               | c == '\t' = "\\t"
-               | c == '\f' = "\\f"
-               | c == '\v' = "\\v"
-               | otherwise = '\\' : (octify (ord c))
+pprStringInCStyle s = doubleQuotes (text (concatMap charToC s))
  
  charToC :: Char -> String
--- Convert a character to the form reqd in a C character literal
-charToC c = if (c >= ' ' && c <= '~')  -- non-portable...
-           then case c of
-                 '\'' -> "\\'"
-                 '\\' -> "\\\\"
-                 '"'  -> "\\\""
-                 '\n' -> "\\n"
-                 '\a' -> "\\a"
-                 '\b' -> "\\b"
-                 '\r' -> "\\r"
-                 '\t' -> "\\t"
-                 '\f' -> "\\f"
-                 '\v' -> "\\v"
-                 _    -> [c]
-           else '\\' : (octify (ord c))
-
-charToEasyHaskell :: Char -> String
--- Convert a character to the form reqd in a Haskell character literal
-charToEasyHaskell c
-  = if (c >= 'a' && c <= 'z')
-    || (c >= 'A' && c <= 'Z')
-    || (c >= '0' && c <= '9')
-    then [c]
-    else case c of
-         _    -> '\\' : show (ord c)
-
-octify :: Int -> String
-octify n
-  = if n < 8 then
-       [chr (n + ord '0')]
-    else
-       octify (n `quot` 8) ++ [chr (n `rem` 8 + ord '0')]
+charToC '\"' = "\\\""
+charToC '\'' = "\\\'"
+charToC '\\' = "\\\\"
+charToC c | c >= ' ' && c <= '~' = [c]
+          | c > '\xFF' = panic ("charToC "++show c)
+          | otherwise = ['\\',
+                         chr (ord '0' + ord c `div` 64),
+                         chr (ord '0' + ord c `div` 8 `mod` 8),
+                         chr (ord '0' + ord c         `mod` 8)]
  \end{code}
-
diff --git a/ghc/compiler/absCSyn/PprAbsC.lhs b/ghc/compiler/absCSyn/PprAbsC.lhs

index d98048c..82431ab 100644 (file)
--- a/ghc/compiler/absCSyn/PprAbsC.lhs
+++ b/ghc/compiler/absCSyn/PprAbsC.lhs
@@ -38,7 +38,7 @@ import CmdLineOpts    ( opt_SccProfilingOn, opt_GranMacros )
  import CostCentre      ( pprCostCentreDecl, pprCostCentreStackDecl )
  
  import Costs           ( costs, addrModeCosts, CostRes(..), Side(..) )
-import CStrings                ( stringToC, pprCLabelString )
+import CStrings                ( pprStringInCStyle, pprCLabelString )
  import FiniteMap       ( addToFM, emptyFM, lookupFM, FiniteMap )
  import Literal         ( Literal(..) )
  import TyCon           ( tyConDataCons )
@@ -498,8 +498,8 @@ pprAbsC stmt@(CClosureInfoAndCode cl_info slow maybe_fast cl_descr) _
  
      type_str = pprSMRep (closureSMRep cl_info)
  
-    pp_descr = hcat [char '"', text (stringToC cl_descr), char '"']
-    pp_type  = hcat [char '"', text (stringToC (closureTypeDescr cl_info)), char '"']
+    pp_descr = pprStringInCStyle cl_descr
+    pp_type  = pprStringInCStyle (closureTypeDescr cl_info)
  
  pprAbsC stmt@(CClosureTbl tycon) _
    = vcat (
@@ -1289,6 +1289,7 @@ pprUnionTag RetRep                = char 'p'
  pprUnionTag CostCentreRep      = panic "pprUnionTag:CostCentre?"
  
  pprUnionTag CharRep            = char 'c'
+pprUnionTag Int8Rep            = ptext SLIT("i8")
  pprUnionTag IntRep             = char 'i'
  pprUnionTag WordRep            = char 'w'
  pprUnionTag AddrRep            = char 'a'
@@ -1534,9 +1535,8 @@ ppr_decls_Amode (CLit _)  = returnTE (Nothing, Nothing)
  -- CIntLike must be a literal -- no decls
  ppr_decls_Amode (CIntLike int) = returnTE (Nothing, Nothing)
  
--- CCharLike may have be arbitrary value -- may have decls
-ppr_decls_Amode (CCharLike char)
-  = ppr_decls_Amode char
+-- CCharLike too
+ppr_decls_Amode (CCharLike char) = returnTE (Nothing, Nothing)
  
  -- now, the only place where we actually print temps/externs...
  ppr_decls_Amode (CTemp uniq kind)
diff --git a/ghc/compiler/basicTypes/Literal.lhs b/ghc/compiler/basicTypes/Literal.lhs

index b8f495e..d2f6509 100644 (file)
--- a/ghc/compiler/basicTypes/Literal.lhs
+++ b/ghc/compiler/basicTypes/Literal.lhs
@@ -27,7 +27,7 @@ import TysPrim                ( charPrimTy, addrPrimTy, floatPrimTy, doublePrimTy,
  import PrimRep         ( PrimRep(..) )
  import Type            ( Type, typePrimRep )
  import PprType         ( pprParendType )
-import CStrings                ( charToC, charToEasyHaskell, pprFSInCStyle )
+import CStrings                ( pprFSInCStyle )
  
  import Outputable
  import Util            ( thenCmp )
@@ -85,7 +85,7 @@ function applications, etc., etc., has not yet been done.
  data Literal
    =    ------------------
         -- First the primitive guys
-    MachChar   Char
+    MachChar   Int             -- Char#        At least 31 bits
    | MachStr    FAST_STRING
  
    | MachAddr   Integer -- Whatever this machine thinks is a "pointer"
@@ -159,8 +159,8 @@ int2WordLit (MachInt i)
    | i < 0     = MachWord (1 + tARGET_MAX_WORD + i)     -- (-1)  --->  tARGET_MAX_WORD
    | otherwise = MachWord i
  
-char2IntLit (MachChar c) = MachInt  (toInteger (ord c))
-int2CharLit (MachInt  i) = MachChar (chr (fromInteger i))
+char2IntLit (MachChar c) = MachInt  (toInteger c)
+int2CharLit (MachInt  i) = MachChar (fromInteger i)
  
  float2IntLit (MachFloat f) = MachInt   (truncate    f)
  int2FloatLit (MachInt   i) = MachFloat (fromInteger i)
@@ -268,13 +268,13 @@ pprLit lit
        iface_style = ifaceStyle sty
      in
      case lit of
-      MachChar ch | code_style  -> hcat [ptext SLIT("(C_)"), char '\'', 
-                                        text (charToC ch), char '\'']
-                 | iface_style -> char '\'' <> text (charToEasyHaskell ch) <> char '\''
-                 | otherwise   -> text ['\'', ch, '\'']
+      MachChar ch | code_style -> hcat [ptext SLIT("(C_)"), text (show ch)]
+                 | otherwise  -> pprHsChar ch
  
        MachStr s | code_style -> pprFSInCStyle s
-               | otherwise  -> pprFSAsString s
+               | otherwise  -> pprHsString s
+      -- Warning: printing MachStr in code_style assumes it contains
+      -- only characters '\0'..'\xFF'!
  
        MachInt i | code_style && i == tARGET_MIN_INT -> parens (integer (i+1) <> text "-1")
                                 -- Avoid a problem whereby gcc interprets
@@ -300,11 +300,11 @@ pprLit lit
                  | otherwise  -> ptext SLIT("__addr") <+> integer p
  
        MachLabel l | code_style -> ptext SLIT("(&") <> ptext l <> char ')'
-                 | otherwise  -> ptext SLIT("__label") <+> pprFSAsString l
+                 | otherwise  -> ptext SLIT("__label") <+> pprHsString l
  
        MachLitLit s ty | code_style  -> ptext s
                       | otherwise   -> parens (hsep [ptext SLIT("__litlit"), 
-                                                    pprFSAsString s,
+                                                    pprHsString s,
                                                      pprParendType ty])
  
  pprIntVal :: Integer -> SDoc
@@ -337,7 +337,7 @@ Hash values should be zero or a positive integer.  No negatives please.
  
  \begin{code}
  hashLiteral :: Literal -> Int
-hashLiteral (MachChar c)       = ord c + 1000  -- Keep it out of range of common ints
+hashLiteral (MachChar c)       = c + 1000      -- Keep it out of range of common ints
  hashLiteral (MachStr s)        = hashFS s
  hashLiteral (MachAddr i)       = hashInteger i
  hashLiteral (MachInt i)        = hashInteger i
diff --git a/ghc/compiler/basicTypes/MkId.lhs b/ghc/compiler/basicTypes/MkId.lhs

index 3504caa..ff2f355 100644 (file)
--- a/ghc/compiler/basicTypes/MkId.lhs
+++ b/ghc/compiler/basicTypes/MkId.lhs
@@ -92,6 +92,8 @@ import Unique
  import Maybe            ( isJust )
  import Outputable
  import Util            ( assoc )
+import UnicodeUtil      ( stringToUtf8 )
+import Char             ( ord )
  \end{code}             
  
  
@@ -371,7 +373,7 @@ Similarly for newtypes
         unN = /\a -> \n:N -> coerce (a->a) n
  
  \begin{code}
-mkRecordSelId tycon field_label unpack_id
+mkRecordSelId tycon field_label unpack_id unpackUtf8_id
         -- Assumes that all fields with the same field label have the same type
         --
         -- Annoyingly, we have to pass in the unpackCString# Id, because
@@ -442,7 +444,16 @@ mkRecordSelId tycon field_label unpack_id
  
      error_expr = mkApps (Var rEC_SEL_ERROR_ID) [Type (unUsgTy field_tau), err_string]
         -- preserves invariant that type args are *not* usage-annotated on top.  KSW 1999-04.
-    err_string = App (Var unpack_id) (Lit (MachStr (_PK_ full_msg)))
+    err_string
+        | all safeChar full_msg
+            = App (Var unpack_id) (Lit (MachStr (_PK_ full_msg)))
+        | otherwise
+            = App (Var unpackUtf8_id) (Lit (MachStr (_PK_ (stringToUtf8 (map ord full_msg)))))
+        where
+        safeChar c = c >= '\1' && c <= '\xFF'
+        -- TODO: Putting this Unicode stuff here is ugly. Find a better
+        -- generic place to make string literals. This logic is repeated
+        -- in DsUtils.
      full_msg   = showSDoc (sep [text "No match in record selector", ppr sel_id]) 
  \end{code}
  
diff --git a/ghc/compiler/basicTypes/OccName.lhs b/ghc/compiler/basicTypes/OccName.lhs

index e2da548..5eb623b 100644 (file)
--- a/ghc/compiler/basicTypes/OccName.lhs
+++ b/ghc/compiler/basicTypes/OccName.lhs
@@ -39,7 +39,7 @@ module OccName (
  
  #include "HsVersions.h"
  
-import Char    ( isDigit, isAlpha, isUpper, isLower, ISALPHANUM, ord, chr, digitToInt, intToDigit )
+import Char    ( isDigit, isAlpha, isUpper, isLower, ISALPHANUM, ord, chr, digitToInt )
  import Util    ( thenCmp )
  import FiniteMap ( FiniteMap, emptyFM, lookupFM, addToFM, elemFM )
  import Outputable
@@ -438,8 +438,8 @@ The basic encoding scheme is this.
  * Most other printable characters translate to 'zx' or 'Zx' for some
         alphabetic character x
  
-* The others translate as 'zxdd' where 'dd' is exactly two hexadecimal
-       digits for the ord of the character
+* The others translate as 'znnnU' where 'nnn' is the decimal number
+        of the character
  
         Before          After
         --------------------------
@@ -532,9 +532,7 @@ encode_ch '/'  = "zs"
  encode_ch '*'  = "zt"
  encode_ch '_'  = "zu"
  encode_ch '%'  = "zv"
-encode_ch c    = ['z', 'x', intToDigit hi, intToDigit lo]
-              where
-                (hi,lo) = ord c `quotRem` 16
+encode_ch c    = 'z' : shows (ord c) "U"
  \end{code}
  
  Decode is used for user printing.
@@ -577,14 +575,15 @@ decode_escape ('s' : rest) = '/' : decode rest
  decode_escape ('t' : rest) = '*' : decode rest
  decode_escape ('u' : rest) = '_' : decode rest
  decode_escape ('v' : rest) = '%' : decode rest
-decode_escape ('x' : d1 : d2 : rest) = chr (digitToInt d1 * 16 + digitToInt d2)  : decode rest
  
  -- Tuples are coded as Z23T
+-- Characters not having a specific code are coded as z224U
  decode_escape (c : rest)
    | isDigit c = go (digitToInt c) rest
    where
      go n (c : rest) | isDigit c = go (10*n + digitToInt c) rest
      go n ('T' : rest)          = '(' : replicate n ',' ++ ')' : decode rest
+    go n ('U' : rest)           = chr n : decode rest
      go n other = pprPanic "decode_escape" (ppr n <+> text (c:rest))
  
  decode_escape (c : rest) = pprTrace "decode_escape" (char c) (decode rest)
diff --git a/ghc/compiler/basicTypes/Unique.lhs b/ghc/compiler/basicTypes/Unique.lhs

index abe3856..5c76485 100644 (file)
--- a/ghc/compiler/basicTypes/Unique.lhs
+++ b/ghc/compiler/basicTypes/Unique.lhs
@@ -177,7 +177,7 @@ module Unique (
         trueDataConKey,
         unboundKey,
         unboxedConKey,
-       unpackCString2IdKey,
+       unpackCStringUtf8IdKey,
         unpackCStringAppendIdKey,
         unpackCStringFoldrIdKey,
         unpackCStringIdKey,
@@ -630,7 +630,7 @@ realWorldPrimIdKey        = mkPreludeMiscIdUnique 23
  recConErrorIdKey             = mkPreludeMiscIdUnique 24
  recUpdErrorIdKey             = mkPreludeMiscIdUnique 25
  traceIdKey                   = mkPreludeMiscIdUnique 26
-unpackCString2IdKey          = mkPreludeMiscIdUnique 27
+unpackCStringUtf8IdKey       = mkPreludeMiscIdUnique 27
  unpackCStringAppendIdKey      = mkPreludeMiscIdUnique 28
  unpackCStringFoldrIdKey              = mkPreludeMiscIdUnique 29
  unpackCStringIdKey           = mkPreludeMiscIdUnique 30
diff --git a/ghc/compiler/codeGen/CgCon.lhs b/ghc/compiler/codeGen/CgCon.lhs

index e04da6b..f14ecab 100644 (file)
--- a/ghc/compiler/codeGen/CgCon.lhs
+++ b/ghc/compiler/codeGen/CgCon.lhs
@@ -33,7 +33,8 @@ import CgUsages               ( getRealSp, getVirtSp, setRealAndVirtualSp,
                           getSpRelOffset )
  import CgClosure       ( cgTopRhsClosure )
  import CgRetConv       ( assignRegs )
-import Constants       ( mAX_INTLIKE, mIN_INTLIKE, mIN_UPD_SIZE )
+import Constants       ( mAX_INTLIKE, mIN_INTLIKE, mAX_CHARLIKE, mIN_CHARLIKE,
+                         mIN_UPD_SIZE )
  import CgHeapery       ( allocDynClosure, inPlaceAllocDynClosure )
  import CgTailCall      ( performReturn, mkStaticAlgReturnCode, doTailCall,
                           mkUnboxedTupleReturnCode )
@@ -143,6 +144,12 @@ buildDynCon binder cc con []
                                 (mkConLFInfo con))
  \end{code}
  
+The following three paragraphs about @Char@-like and @Int@-like
+closures are obsolete, but I don't understand the details well enough
+to properly word them, sorry. I've changed the treatment of @Char@s to
+be analogous to @Int@s: only a subset is preallocated, because @Char@
+has now 31 bits. Only literals are handled here. -- Qrczak
+
  Now for @Char@-like closures.  We generate an assignment of the
  address of the closure to a temporary.  It would be possible simply to
  generate no code, and record the addressing mode in the environment,
@@ -160,18 +167,22 @@ Because of this, we use can safely return an addressing mode.
  
  \begin{code}
  buildDynCon binder cc con [arg_amode]
-
-  | maybeCharLikeCon con
-  = absC (CAssign temp_amode (CCharLike arg_amode))    `thenC`
-    returnFC temp_id_info
-
    | maybeIntLikeCon con && in_range_int_lit arg_amode
    = returnFC (stableAmodeIdInfo binder (CIntLike arg_amode) (mkConLFInfo con))
    where
      (temp_amode, temp_id_info) = newTempAmodeAndIdInfo binder (mkConLFInfo con)
  
      in_range_int_lit (CLit (MachInt val)) = val <= mAX_INTLIKE && val >= mIN_INTLIKE
-    in_range_int_lit other_amode         = False
+    in_range_int_lit _other_amode        = False
+
+buildDynCon binder cc con [arg_amode]
+  | maybeCharLikeCon con && in_range_char_lit arg_amode
+  = returnFC (stableAmodeIdInfo binder (CCharLike arg_amode) (mkConLFInfo con))
+  where
+    (temp_amode, temp_id_info) = newTempAmodeAndIdInfo binder (mkConLFInfo con)
+
+    in_range_char_lit (CLit (MachChar val)) = val <= mAX_CHARLIKE && val >= mIN_CHARLIKE
+    in_range_char_lit _other_amode         = False
  \end{code}
  
  Now the general case.
@@ -296,7 +307,6 @@ cgReturnDataCon con amodes
         -- do update in place...
        UpdateCode
         |  not (isNullaryDataCon con)  -- no nullary constructors, please
-       && not (maybeCharLikeCon con)  -- no chars please (these are all static)
         && not (any isFollowableRep (map getAmodeRep amodes))
                                         -- no ptrs please (generational gc...)
         && closureSize closure_info <= mIN_UPD_SIZE
diff --git a/ghc/compiler/codeGen/CgRetConv.lhs b/ghc/compiler/codeGen/CgRetConv.lhs

index 3b61312..4c0151e 100644 (file)
--- a/ghc/compiler/codeGen/CgRetConv.lhs
+++ b/ghc/compiler/codeGen/CgRetConv.lhs
@@ -1,7 +1,7 @@
  %
  % (c) The GRASP Project, Glasgow University, 1992-1998
  %
-% $Id: CgRetConv.lhs,v 1.23 2000/07/11 16:03:37 simonmar Exp $
+% $Id: CgRetConv.lhs,v 1.24 2000/08/07 23:37:20 qrczak Exp $
  %
  \section[CgRetConv]{Return conventions for the code generator}
  
@@ -81,6 +81,7 @@ dataReturnConvPrim Int64Rep   = LongReg Int64Rep  ILIT(1)
  dataReturnConvPrim Word64Rep   = LongReg Word64Rep ILIT(1)
  dataReturnConvPrim AddrRep     = VanillaReg AddrRep ILIT(1)
  dataReturnConvPrim CharRep     = VanillaReg CharRep ILIT(1)
+dataReturnConvPrim Int8Rep     = VanillaReg Int8Rep ILIT(1)
  dataReturnConvPrim FloatRep    = FloatReg  ILIT(1)
  dataReturnConvPrim DoubleRep   = DoubleReg ILIT(1)
  dataReturnConvPrim VoidRep     = VoidReg
diff --git a/ghc/compiler/deSugar/DsExpr.lhs b/ghc/compiler/deSugar/DsExpr.lhs

index 94e1ec5..7dfb84a 100644 (file)
--- a/ghc/compiler/deSugar/DsExpr.lhs
+++ b/ghc/compiler/deSugar/DsExpr.lhs
@@ -148,7 +148,7 @@ dsExpr (HsLitOut (HsString s) _)
  
    | _LENGTH_ s == 1
    = let
-       the_char = mkConApp charDataCon [mkLit (MachChar (_HEAD_ s))]
+       the_char = mkConApp charDataCon [mkLit (MachChar (_HEAD_INT_ s))]
         the_nil  = mkNilExpr charTy
         the_cons = mkConsExpr charTy the_char the_nil
      in
diff --git a/ghc/compiler/deSugar/DsUtils.lhs b/ghc/compiler/deSugar/DsUtils.lhs

index c96665f..bf63c5f 100644 (file)
--- a/ghc/compiler/deSugar/DsUtils.lhs
+++ b/ghc/compiler/deSugar/DsUtils.lhs
@@ -70,8 +70,9 @@ import TysWiredIn     ( nilDataCon, consDataCon,
                         )
  import BasicTypes      ( Boxity(..) )
  import UniqSet         ( mkUniqSet, minusUniqSet, isEmptyUniqSet, UniqSet )
-import Unique          ( unpackCStringIdKey, unpackCString2IdKey )
+import Unique          ( unpackCStringIdKey, unpackCStringUtf8IdKey )
  import Outputable
+import UnicodeUtil      ( stringToUtf8 )
  \end{code}
  
  
@@ -123,7 +124,7 @@ tidyLitPat lit lit_ty default_pat
  
      mk_list (HsString s)     = foldr
         (\c pat -> ConPat consDataCon lit_ty [] [] [mk_char_lit c,pat])
-       (ConPat nilDataCon lit_ty [] [] []) (_UNPK_ s)
+       (ConPat nilDataCon lit_ty [] [] []) (_UNPK_INT_ s)
  
      mk_char_lit c            = ConPat charDataCon charTy [] [] [LitPat (HsCharPrim c) charPrimTy]
  \end{code}
@@ -390,20 +391,19 @@ mkStringLit str   = mkStringLitFS (_PK_ str)
  
  mkStringLitFS :: FAST_STRING  -> DsM CoreExpr
  mkStringLitFS str
-  | any is_NUL (_UNPK_ str)
-  =     -- Must cater for NULs in literal string
-    dsLookupGlobalValue unpackCString2IdKey    `thenDs` \ unpack_id ->
-    returnDs (mkApps (Var unpack_id)
-                    [Lit (MachStr str),
-                    mkIntLitInt (_LENGTH_ str)])
-
-  | otherwise
-  =    -- No NULs in the string
+  | all safeChar chars
+  =
      dsLookupGlobalValue unpackCStringIdKey     `thenDs` \ unpack_id ->
      returnDs (App (Var unpack_id) (Lit (MachStr str)))
  
+  | otherwise
+  =
+    dsLookupGlobalValue unpackCStringUtf8IdKey `thenDs` \ unpack_id ->
+    returnDs (App (Var unpack_id) (Lit (MachStr (_PK_ (stringToUtf8 chars)))))
+
    where
-    is_NUL c = c == '\0'
+    chars = _UNPK_INT_ str
+    safeChar c = c >= 1 && c <= 0xFF
  \end{code}
  
  %************************************************************************
diff --git a/ghc/compiler/hsSyn/HsBasic.lhs b/ghc/compiler/hsSyn/HsBasic.lhs

index d2721ae..11558f7 100644 (file)
--- a/ghc/compiler/hsSyn/HsBasic.lhs
+++ b/ghc/compiler/hsSyn/HsBasic.lhs
@@ -21,8 +21,8 @@ import Ratio  ( Rational )
  
  \begin{code}
  data HsLit
-  = HsChar         Char        -- characters
-  | HsCharPrim     Char        -- unboxed char literals
+  = HsChar         Int         -- characters
+  | HsCharPrim     Int         -- unboxed char literals
    | HsString       FAST_STRING -- strings
    | HsStringPrim    FAST_STRING        -- packed string
  
@@ -57,10 +57,10 @@ negLiteral (HsFrac f) = HsFrac (-f)
  \begin{code}
  instance Outputable HsLit where
         -- Use "show" because it puts in appropriate escapes
-    ppr (HsChar c)      = text (show c)
-    ppr (HsCharPrim c)  = text (show c) <> char '#'
-    ppr (HsStringPrim s) = pprFSAsString s <> char '#'
-    ppr (HsString s)    = pprFSAsString s
+    ppr (HsChar c)      = pprHsChar c
+    ppr (HsCharPrim c)  = pprHsChar c <> char '#'
+    ppr (HsString s)    = pprHsString s
+    ppr (HsStringPrim s) = pprHsString s <> char '#'
      ppr (HsInt i)       = integer i
      ppr (HsFrac f)      = rational f
      ppr (HsFloatPrim f)         = rational f <> char '#'
diff --git a/ghc/compiler/hsSyn/HsCore.lhs b/ghc/compiler/hsSyn/HsCore.lhs

index c21a2d3..e91e601 100644 (file)
--- a/ghc/compiler/hsSyn/HsCore.lhs
+++ b/ghc/compiler/hsSyn/HsCore.lhs
@@ -190,7 +190,7 @@ pprUfExpr :: Outputable name => (SDoc -> SDoc) -> UfExpr name -> SDoc
  
  pprUfExpr add_par (UfVar v)       = ppr v
  pprUfExpr add_par (UfLit l)       = ppr l
-pprUfExpr add_par (UfLitLit l ty) = add_par (hsep [ptext SLIT("__litlit"), pprFSAsString l, pprParendHsType ty])
+pprUfExpr add_par (UfLitLit l ty) = add_par (hsep [ptext SLIT("__litlit"), pprHsString l, pprParendHsType ty])
  pprUfExpr add_par (UfCCall cc ty) = braces (pprCCallOp cc <+> ppr ty)
  pprUfExpr add_par (UfType ty)     = char '@' <+> pprParendHsType ty
  
@@ -238,7 +238,7 @@ instance Outputable name => Outputable (UfNote name) where
  instance Outputable name => Outputable (UfConAlt name) where
      ppr UfDefault         = text "__DEFAULT"
      ppr (UfLitAlt l)       = ppr l
-    ppr (UfLitLitAlt l ty) = parens (hsep [ptext SLIT("__litlit"), pprFSAsString l, pprParendHsType ty])
+    ppr (UfLitLitAlt l ty) = parens (hsep [ptext SLIT("__litlit"), pprHsString l, pprParendHsType ty])
      ppr (UfDataAlt d)     = ppr d
  
  instance Outputable name => Outputable (UfBinder name) where
diff --git a/ghc/compiler/javaGen/Java.lhs b/ghc/compiler/javaGen/Java.lhs

index de16154..ec2c506 100644 (file)
--- a/ghc/compiler/javaGen/Java.lhs
+++ b/ghc/compiler/javaGen/Java.lhs
@@ -131,7 +131,7 @@ instance Ord Name where
  
  data Lit
    = IntLit Integer     -- unboxed
-  | CharLit Char       -- unboxed
+  | CharLit Int        -- unboxed
    | StringLit String   -- java string
    deriving Show
  
diff --git a/ghc/compiler/javaGen/JavaGen.lhs b/ghc/compiler/javaGen/JavaGen.lhs

index 6093a80..6278a70 100644 (file)
--- a/ghc/compiler/javaGen/JavaGen.lhs
+++ b/ghc/compiler/javaGen/JavaGen.lhs
@@ -605,6 +605,7 @@ suffix _            = ""
  primName :: PrimType -> String
  primName PrimInt       = "int"
  primName PrimChar      = "char"
+primName PrimByte      = "byte"
  primName PrimBoolean   = "boolean"
  primName _             = error "unsupported primitive"
  
@@ -803,6 +804,9 @@ inttype = PrimType PrimInt
  chartype :: Type
  chartype = PrimType PrimChar
  
+bytetype :: Type
+bytetype = PrimType PrimByte
+
  -- This lets you get inside a possible "Value" type,
  -- to access the internal unboxed object.
  access :: Expr -> Type -> Expr
@@ -811,6 +815,7 @@ access expr other           = expr
  
  accessPrim expr PrimInt  = Call expr (Name "intValue" inttype) []
  accessPrim expr PrimChar = Call expr (Name "charValue" chartype) []
+accessPrim expr PrimByte = Call expr (Name "byteValue" bytetype) []
  accessPrim expr other    = pprPanic "accessPrim" (text (show other))
  
  -- This is where we map from typename to types,
@@ -831,6 +836,7 @@ primRepToType ::PrimRep -> Type
  primRepToType PtrRep  = objectType
  primRepToType IntRep  = inttype
  primRepToType CharRep = chartype
+primRepToType Int8Rep = bytetype
  primRepToType AddrRep = objectType
  primRepToType other   = pprPanic "primRepToType" (ppr other)
  
diff --git a/ghc/compiler/javaGen/PrintJava.lhs b/ghc/compiler/javaGen/PrintJava.lhs

index 0db596d..edaf8e5 100644 (file)
--- a/ghc/compiler/javaGen/PrintJava.lhs
+++ b/ghc/compiler/javaGen/PrintJava.lhs
@@ -220,7 +220,7 @@ call e n es = e <> dot <> n <> parens (hsep (punctuate comma (map expr es)))
  literal = \l ->
    case l of
      { IntLit i    -> text (show i)
-    ; CharLit c   -> text (show c)
+    ; CharLit c   -> text "(char)" <+> text (show c)
      ; StringLit s -> text ("\"" ++ s ++ "\"")  -- strings are already printable
      }
  
diff --git a/ghc/compiler/main/Constants.lhs b/ghc/compiler/main/Constants.lhs

index 97d6a3a..660b99f 100644 (file)
--- a/ghc/compiler/main/Constants.lhs
+++ b/ghc/compiler/main/Constants.lhs
@@ -57,6 +57,7 @@ module Constants (
         oTHER_TAG,
  
         mAX_INTLIKE, mIN_INTLIKE,
+       mAX_CHARLIKE, mIN_CHARLIKE,
  
         spRelToInt,
  
@@ -120,6 +121,10 @@ oTHER_TAG = (INFO_OTHER_TAG :: Integer)    -- (-1) unevaluated, probably
  mIN_INTLIKE, mAX_INTLIKE :: Integer    -- Only used to compare with (MachInt Integer)
  mIN_INTLIKE = MIN_INTLIKE
  mAX_INTLIKE = MAX_INTLIKE
+
+mIN_CHARLIKE, mAX_CHARLIKE :: Int      -- Only used to compare with (MachChar Int)
+mIN_CHARLIKE = MIN_CHARLIKE
+mAX_CHARLIKE = MAX_CHARLIKE
  \end{code}
  
  A little function that abstracts the stack direction.  Note that most
diff --git a/ghc/compiler/nativeGen/AbsCStixGen.lhs b/ghc/compiler/nativeGen/AbsCStixGen.lhs

index b9a2c8c..7b0cc21 100644 (file)
--- a/ghc/compiler/nativeGen/AbsCStixGen.lhs
+++ b/ghc/compiler/nativeGen/AbsCStixGen.lhs
@@ -146,7 +146,7 @@ Here we handle top-level things, like @CCodeBlock@s and
         mk_StCLbl_for_SRT :: CLabel -> StixTree
         mk_StCLbl_for_SRT label
            | labelDynamic label
-          = StIndex CharRep (StCLbl label) (StInt 1)
+          = StIndex Int8Rep (StCLbl label) (StInt 1)
            | otherwise
            = StCLbl label
  
@@ -223,7 +223,8 @@ Here we handle top-level things, like @CCodeBlock@s and
         = StData (promote_to_word (getAmodeRep amode)) [a2stix amode]
  
      -- We need to promote any item smaller than a word to a word
-    promote_to_word CharRep = WordRep
+    promote_to_word Int8Rep = IntRep
+    promote_to_word CharRep = IntRep
      promote_to_word other   = other
  
      -- always at least one padding word: this is the static link field
@@ -473,7 +474,7 @@ be tuned.)
  \begin{code}
  
   intTag :: Literal -> Integer
- intTag (MachChar c)  = toInteger (ord c)
+ intTag (MachChar c)  = toInteger c
   intTag (MachInt i)   = i
   intTag (MachWord w)  = intTag (word2IntLit (MachWord w))
   intTag _             = panic "intTag"
diff --git a/ghc/compiler/nativeGen/MachCode.lhs b/ghc/compiler/nativeGen/MachCode.lhs

index cf1aef1..57bdc39 100644 (file)
--- a/ghc/compiler/nativeGen/MachCode.lhs
+++ b/ghc/compiler/nativeGen/MachCode.lhs
@@ -160,7 +160,8 @@ mangleIndexTree (StIndex pk base off)
        ]
    where
      shift DoubleRep    = 3::Integer
-    shift CharRep       = 0::Integer
+    shift CharRep       = 2::Integer
+    shift Int8Rep       = 0::Integer
      shift _            = IF_ARCH_alpha(3,2)
  \end{code}
  
@@ -3249,14 +3250,16 @@ coerceFP2Int x
  %*                                                                     *
  %************************************************************************
  
-Integer to character conversion.  Where applicable, we try to do this
-in one step if the original object is in memory.
+Integer to character conversion.
  
  \begin{code}
  chrCode :: StixTree -> NatM Register
  
  #if alpha_TARGET_ARCH
  
+-- TODO: This is probably wrong, but I don't know Alpha assembler.
+-- It should coerce a 64-bit value to a 32-bit value.
+
  chrCode x
    = getRegister x              `thenNat` \ register ->
      getNewRegNCG IntRep                `thenNat` \ reg ->
@@ -3273,47 +3276,23 @@ chrCode x
  
  chrCode x
    = getRegister x              `thenNat` \ register ->
-    let
-       code__2 dst = let
-                         code = registerCode register dst
-                         src  = registerName register dst
-                     in code `appOL`
-                        if   isFixed register && src /= dst
-                        then toOL [MOV L (OpReg src) (OpReg dst),
-                                   AND L (OpImm (ImmInt 255)) (OpReg dst)]
-                        else unitOL (AND L (OpImm (ImmInt 255)) (OpReg src))
-    in
-    returnNat (Any IntRep code__2)
+    returnNat (
+    case register of
+       Fixed _ reg code -> Fixed IntRep reg code
+       Any   _ code     -> Any   IntRep code
+    )
  
  #endif {- i386_TARGET_ARCH -}
  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  #if sparc_TARGET_ARCH
  
-chrCode (StInd pk mem)
-  = getAmode mem               `thenNat` \ amode ->
-    let
-       code    = amodeCode amode
-       src     = amodeAddr amode
-       src_off = addrOffset src 3
-       src__2  = case src_off of Just x -> x
-       code__2 dst = if maybeToBool src_off then
-                       code `snocOL` LD BU src__2 dst
-                   else
-                       code `snocOL`
-                       LD (primRepToSize pk) src dst  `snocOL`
-                       AND False dst (RIImm (ImmInt 255)) dst
-    in
-    returnNat (Any pk code__2)
-
  chrCode x
    = getRegister x              `thenNat` \ register ->
-    getNewRegNCG IntRep                `thenNat` \ reg ->
-    let
-       code = registerCode register reg
-       src  = registerName register reg
-       code__2 dst = code `snocOL` AND False src (RIImm (ImmInt 255)) dst
-    in
-    returnNat (Any IntRep code__2)
+    returnNat (
+    case register of
+       Fixed _ reg code -> Fixed IntRep reg code
+       Any   _ code     -> Any   IntRep code
+    )
  
  #endif {- sparc_TARGET_ARCH -}
  \end{code}
diff --git a/ghc/compiler/nativeGen/MachMisc.lhs b/ghc/compiler/nativeGen/MachMisc.lhs

index 4db56ed..8f5c168 100644 (file)
--- a/ghc/compiler/nativeGen/MachMisc.lhs
+++ b/ghc/compiler/nativeGen/MachMisc.lhs
@@ -100,7 +100,7 @@ sizeOf :: PrimRep -> Integer{-in bytes-}
      -- the result is an Integer only because it's more convenient
  
  sizeOf pr = case (primRepToSize pr) of
-  IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2; L -> 4; SF -> 4;-} _ -> 8},)
+  IF_ARCH_alpha({B -> 1; BU -> 1; {-W -> 2; WU -> 2;-} L -> 4; {-SF -> 4;-} _ -> 8},)
    IF_ARCH_sparc({B -> 1; BU -> 1; {-HW -> 2; HWU -> 2;-} W -> 4; {-D -> 8;-} F -> 4; DF -> 8},)
    IF_ARCH_i386( {B -> 1; {-S -> 2;-} L -> 4; F -> 4; DF -> 8 },)
  \end{code}
@@ -239,7 +239,7 @@ data Size
      | BU
  --  | W            -- word (2 bytes): UNUSED
  --  | WU    -- : UNUSED
---  | L            -- longword (4 bytes): UNUSED
+    | L            -- longword (4 bytes)
      | Q            -- quadword (8 bytes)
  --  | FF    -- VAX F-style floating pt: UNUSED
  --  | GF    -- VAX G-style floating pt: UNUSED
@@ -274,7 +274,8 @@ primRepToSize CodePtrRep    = IF_ARCH_alpha( Q,      IF_ARCH_i386( L, IF_ARCH_sparc(
  primRepToSize DataPtrRep    = IF_ARCH_alpha( Q,         IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
  primRepToSize RetRep       = IF_ARCH_alpha( Q,  IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
  primRepToSize CostCentreRep = IF_ARCH_alpha( Q,         IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
-primRepToSize CharRep      = IF_ARCH_alpha( BU, IF_ARCH_i386( B, IF_ARCH_sparc( BU,)))
+primRepToSize CharRep      = IF_ARCH_alpha( L,  IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
+primRepToSize Int8Rep      = IF_ARCH_alpha( B,  IF_ARCH_i386( B, IF_ARCH_sparc( B ,)))
  primRepToSize IntRep       = IF_ARCH_alpha( Q,  IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
  primRepToSize WordRep      = IF_ARCH_alpha( Q,  IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
  primRepToSize AddrRep      = IF_ARCH_alpha( Q,  IF_ARCH_i386( L, IF_ARCH_sparc( W ,)))
diff --git a/ghc/compiler/nativeGen/PprMach.lhs b/ghc/compiler/nativeGen/PprMach.lhs

index 697785e..5235a5c 100644 (file)
--- a/ghc/compiler/nativeGen/PprMach.lhs
+++ b/ghc/compiler/nativeGen/PprMach.lhs
@@ -165,7 +165,7 @@ pprSize x = ptext (case x of
          BU -> SLIT("bu")
  --      W  -> SLIT("w") UNUSED
  --      WU -> SLIT("wu") UNUSED
---      L  -> SLIT("l") UNUSED
+        L  -> SLIT("l")
          Q  -> SLIT("q")
  --      FF -> SLIT("f") UNUSED
  --      DF -> SLIT("d") UNUSED
diff --git a/ghc/compiler/nativeGen/StixPrim.lhs b/ghc/compiler/nativeGen/StixPrim.lhs

index dc3bee7..a78c4d6 100644 (file)
--- a/ghc/compiler/nativeGen/StixPrim.lhs
+++ b/ghc/compiler/nativeGen/StixPrim.lhs
@@ -18,7 +18,7 @@ import Literal                ( Literal(..), word2IntLit )
  import PrimOp          ( PrimOp(..), CCall(..), CCallTarget(..) )
  import PrimRep         ( PrimRep(..), isFloatingRep )
  import UniqSupply      ( returnUs, thenUs, getUniqueUs, UniqSM )
-import Constants       ( mIN_INTLIKE, uF_UPDATEE, bLOCK_SIZE )
+import Constants       ( mIN_INTLIKE, mIN_CHARLIKE, uF_UPDATEE, bLOCK_SIZE )
  import CLabel          ( mkIntlikeClosureLabel, mkCharlikeClosureLabel,
                           mkMAP_FROZEN_infoLabel, mkForeignLabel )
  import Outputable
@@ -180,7 +180,7 @@ primCode [] WriteForeignObjOp [obj, v]
      returnUs (\xs -> assign : xs)
  
  -- NB: indexing in "pk" units, *not* in bytes (WDP 95/09)
-primCode ls IndexByteArrayOp_Char      rs = primCode_ReadByteArrayOp CharRep      ls rs
+primCode ls IndexByteArrayOp_Char      rs = primCode_ReadByteArrayOp Int8Rep      ls rs
  primCode ls IndexByteArrayOp_Int       rs = primCode_ReadByteArrayOp IntRep       ls rs
  primCode ls IndexByteArrayOp_Word      rs = primCode_ReadByteArrayOp WordRep      ls rs
  primCode ls IndexByteArrayOp_Addr      rs = primCode_ReadByteArrayOp AddrRep      ls rs
@@ -190,7 +190,7 @@ primCode ls IndexByteArrayOp_StablePtr rs = primCode_ReadByteArrayOp StablePtrRe
  primCode ls IndexByteArrayOp_Int64     rs = primCode_ReadByteArrayOp Int64Rep     ls rs
  primCode ls IndexByteArrayOp_Word64    rs = primCode_ReadByteArrayOp Word64Rep    ls rs
  
-primCode ls ReadByteArrayOp_Char      rs = primCode_ReadByteArrayOp CharRep      ls rs
+primCode ls ReadByteArrayOp_Char      rs = primCode_ReadByteArrayOp Int8Rep      ls rs
  primCode ls ReadByteArrayOp_Int       rs = primCode_ReadByteArrayOp IntRep       ls rs
  primCode ls ReadByteArrayOp_Word      rs = primCode_ReadByteArrayOp WordRep      ls rs
  primCode ls ReadByteArrayOp_Addr      rs = primCode_ReadByteArrayOp AddrRep      ls rs
@@ -200,7 +200,7 @@ primCode ls ReadByteArrayOp_StablePtr rs = primCode_ReadByteArrayOp StablePtrRep
  primCode ls ReadByteArrayOp_Int64     rs = primCode_ReadByteArrayOp Int64Rep     ls rs
  primCode ls ReadByteArrayOp_Word64    rs = primCode_ReadByteArrayOp Word64Rep    ls rs
  
-primCode ls ReadOffAddrOp_Char      rs = primCode_IndexOffAddrOp CharRep      ls rs
+primCode ls ReadOffAddrOp_Char      rs = primCode_IndexOffAddrOp Int8Rep      ls rs
  primCode ls ReadOffAddrOp_Int       rs = primCode_IndexOffAddrOp IntRep       ls rs
  primCode ls ReadOffAddrOp_Word      rs = primCode_IndexOffAddrOp WordRep      ls rs
  primCode ls ReadOffAddrOp_Addr      rs = primCode_IndexOffAddrOp AddrRep      ls rs
@@ -210,7 +210,7 @@ primCode ls ReadOffAddrOp_StablePtr rs = primCode_IndexOffAddrOp StablePtrRep ls
  primCode ls ReadOffAddrOp_Int64     rs = primCode_IndexOffAddrOp Int64Rep     ls rs
  primCode ls ReadOffAddrOp_Word64    rs = primCode_IndexOffAddrOp Word64Rep    ls rs
  
-primCode ls IndexOffAddrOp_Char      rs = primCode_IndexOffAddrOp CharRep      ls rs
+primCode ls IndexOffAddrOp_Char      rs = primCode_IndexOffAddrOp Int8Rep      ls rs
  primCode ls IndexOffAddrOp_Int       rs = primCode_IndexOffAddrOp IntRep       ls rs
  primCode ls IndexOffAddrOp_Word      rs = primCode_IndexOffAddrOp WordRep      ls rs
  primCode ls IndexOffAddrOp_Addr      rs = primCode_IndexOffAddrOp AddrRep      ls rs
@@ -220,7 +220,7 @@ primCode ls IndexOffAddrOp_StablePtr rs = primCode_IndexOffAddrOp StablePtrRep l
  primCode ls IndexOffAddrOp_Int64     rs = primCode_IndexOffAddrOp Int64Rep     ls rs
  primCode ls IndexOffAddrOp_Word64    rs = primCode_IndexOffAddrOp Word64Rep    ls rs
  
-primCode ls IndexOffForeignObjOp_Char      rs = primCode_IndexOffForeignObjOp CharRep      ls rs
+primCode ls IndexOffForeignObjOp_Char      rs = primCode_IndexOffForeignObjOp Int8Rep      ls rs
  primCode ls IndexOffForeignObjOp_Int       rs = primCode_IndexOffForeignObjOp IntRep       ls rs
  primCode ls IndexOffForeignObjOp_Word      rs = primCode_IndexOffForeignObjOp WordRep      ls rs
  primCode ls IndexOffForeignObjOp_Addr      rs = primCode_IndexOffForeignObjOp AddrRep      ls rs
@@ -230,7 +230,7 @@ primCode ls IndexOffForeignObjOp_StablePtr rs = primCode_IndexOffForeignObjOp St
  primCode ls IndexOffForeignObjOp_Int64     rs = primCode_IndexOffForeignObjOp Int64Rep     ls rs
  primCode ls IndexOffForeignObjOp_Word64    rs = primCode_IndexOffForeignObjOp Word64Rep    ls rs
  
-primCode ls WriteOffAddrOp_Char      rs = primCode_WriteOffAddrOp CharRep      ls rs
+primCode ls WriteOffAddrOp_Char      rs = primCode_WriteOffAddrOp Int8Rep      ls rs
  primCode ls WriteOffAddrOp_Int       rs = primCode_WriteOffAddrOp IntRep       ls rs
  primCode ls WriteOffAddrOp_Word      rs = primCode_WriteOffAddrOp WordRep      ls rs
  primCode ls WriteOffAddrOp_Addr      rs = primCode_WriteOffAddrOp AddrRep      ls rs
@@ -240,7 +240,7 @@ primCode ls WriteOffAddrOp_StablePtr rs = primCode_WriteOffAddrOp StablePtrRep l
  primCode ls WriteOffAddrOp_Int64     rs = primCode_WriteOffAddrOp Int64Rep     ls rs
  primCode ls WriteOffAddrOp_Word64    rs = primCode_WriteOffAddrOp Word64Rep    ls rs
  
-primCode ls WriteByteArrayOp_Char      rs = primCode_WriteByteArrayOp CharRep      ls rs
+primCode ls WriteByteArrayOp_Char      rs = primCode_WriteByteArrayOp Int8Rep      ls rs
  primCode ls WriteByteArrayOp_Int       rs = primCode_WriteByteArrayOp IntRep       ls rs
  primCode ls WriteByteArrayOp_Word      rs = primCode_WriteByteArrayOp WordRep      ls rs
  primCode ls WriteByteArrayOp_Addr      rs = primCode_WriteByteArrayOp AddrRep      ls rs
@@ -482,17 +482,15 @@ amodeToStix (CLbl      lbl _) = StCLbl lbl
   -- For CharLike and IntLike, we attempt some trivial constant-folding here.
  
  amodeToStix (CCharLike (CLit (MachChar c)))
-  = StIndex CharRep cHARLIKE_closure (StInt (toInteger off))
+  = StIndex Int8Rep cHARLIKE_closure (StInt (toInteger off))
    where
-    off = charLikeSize * ord c
+    off = charLikeSize * (c - mIN_CHARLIKE)
  
  amodeToStix (CCharLike x)
-  = StIndex CharRep cHARLIKE_closure off
-  where
-    off = StPrim IntMulOp [amodeToStix x, StInt (toInteger charLikeSize)]
+  = panic "CCharLike"
  
  amodeToStix (CIntLike (CLit (MachInt i)))
-  = StIndex CharRep{-yes,really-} iNTLIKE_closure (StInt (toInteger off))
+  = StIndex Int8Rep iNTLIKE_closure (StInt (toInteger off))
    where
      off = intLikeSize * (fromInteger (i - mIN_INTLIKE))
  
@@ -501,7 +499,7 @@ amodeToStix (CIntLike x)
  
  amodeToStix (CLit core)
    = case core of
-      MachChar c     -> StInt (toInteger (ord c))
+      MachChar c     -> StInt (toInteger c)
        MachStr s             -> StString s
        MachAddr a     -> StInt a
        MachInt i      -> StInt i
diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs

index dd020e7..88667c4 100644 (file)
--- a/ghc/compiler/parser/Lex.lhs
+++ b/ghc/compiler/parser/Lex.lhs
@@ -52,7 +52,7 @@ import FastString
  import StringBuffer
  import GlaExts
  import Ctype
-import Char            ( chr )
+import Char            ( chr, ord )
  import PrelRead        ( readRational__ ) -- Glasgow non-std
  \end{code}
  
@@ -209,12 +209,12 @@ data Token
  
    | ITpragma StringBuffer
  
-  | ITchar       Char 
+  | ITchar       Int
    | ITstring     FAST_STRING
-  | ITinteger    Integer 
+  | ITinteger    Integer
    | ITrational   Rational
  
-  | ITprimchar   Char
+  | ITprimchar   Int
    | ITprimstring FAST_STRING
    | ITprimint    Integer
    | ITprimfloat  Rational
@@ -571,7 +571,7 @@ lexToken cont glaexts buf =
            _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
  
      -- strings/characters -------------------------------------------------
-    '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
+    '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf)
      '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
  
      -- strictness and cpr pragmas and __scc treated specially.
@@ -639,9 +639,11 @@ lex_prag cont buf
  lex_string cont glaexts s buf
    = case currentChar# buf of
         '"'#{-"-} -> 
-          let buf' = incLexeme buf; s' = mkFastString (reverse s) in
+          let buf' = incLexeme buf; s' = mkFastStringInt (reverse s) in
            case currentChar# buf' of
-               '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
+               '#'# | flag glaexts -> if all (<= 0xFF) s
+                    then cont (ITprimstring s') (incLexeme buf')
+                    else lexError "primitive string literal must contain only characters <= '\xFF'" buf'
                 _                   -> cont (ITstring s') buf'
  
         -- ignore \& in a string, deal with string gaps
@@ -666,11 +668,11 @@ lex_stringgap cont glaexts s buf
  
  lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
  
-lex_char :: (Int# -> Char -> P a) -> Int# -> P a
+lex_char :: (Int# -> Int -> P a) -> Int# -> P a
  lex_char cont glaexts buf
    = case currentChar# buf of
         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
-       c | is_any c -> cont glaexts (C# c) (incLexeme buf)
+       c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf)
         other -> charError buf
  
  char_end cont glaexts c buf
@@ -685,19 +687,19 @@ char_end cont glaexts c buf
  lex_escape cont buf
    = let buf' = incLexeme buf in
      case currentChar# buf of
-       'a'#       -> cont '\a' buf'
-       'b'#       -> cont '\b' buf'
-       'f'#       -> cont '\f' buf'
-       'n'#       -> cont '\n' buf'
-       'r'#       -> cont '\r' buf'
-       't'#       -> cont '\t' buf'
-       'v'#       -> cont '\v' buf'
-       '\\'#      -> cont '\\' buf'
-       '"'#       -> cont '\"' buf'
-       '\''#      -> cont '\'' buf'
+       'a'#       -> cont (ord '\a') buf'
+       'b'#       -> cont (ord '\b') buf'
+       'f'#       -> cont (ord '\f') buf'
+       'n'#       -> cont (ord '\n') buf'
+       'r'#       -> cont (ord '\r') buf'
+       't'#       -> cont (ord '\t') buf'
+       'v'#       -> cont (ord '\v') buf'
+       '\\'#      -> cont (ord '\\') buf'
+       '"'#       -> cont (ord '\"') buf'
+       '\''#      -> cont (ord '\'') buf'
         '^'#       -> let c = currentChar# buf' in
                       if c `geChar#` '@'# && c `leChar#` '_'#
-                       then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
+                       then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
                         else charError buf'
  
         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
@@ -707,13 +709,12 @@ lex_escape cont buf
  
         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
                                        Just buf2 <- [prefixMatch buf p] ] of
-                           (c,buf2):_ -> cont c buf2
+                           (c,buf2):_ -> cont (ord c) buf2
                             [] -> charError buf'
  
-after_charnum cont i buf 
-  = let int = fromInteger i in
-    if i >= 0 && i <= 255 
-       then cont (chr int) buf
+after_charnum cont i buf
+  = if i >= 0 && i <= 0x7FFFFFFF
+       then cont (fromInteger i) buf
         else charError buf
  
  readNum cont buf is_digit base conv = read buf 0
@@ -951,7 +952,7 @@ lex_qid cont glaexts mod buf just_a_conid =
       _    -> just_a_conid
  
    '-'# -> case lookAhead# buf 1# of
-            '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
+            '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
              _    -> lex_id3 cont glaexts mod buf just_a_conid
    _    -> lex_id3 cont glaexts mod buf just_a_conid
  
diff --git a/ghc/compiler/prelude/PrelInfo.lhs b/ghc/compiler/prelude/PrelInfo.lhs

index ae88f95..3a8f5a6 100644 (file)
--- a/ghc/compiler/prelude/PrelInfo.lhs
+++ b/ghc/compiler/prelude/PrelInfo.lhs
@@ -266,9 +266,9 @@ knownKeyNames
      , (map_RDR,                        mapIdKey)
      , (append_RDR,             appendIdKey)
      , (unpackCString_RDR,      unpackCStringIdKey)
-    , (unpackCString2_RDR,     unpackCString2IdKey)
      , (unpackCStringAppend_RDR,        unpackCStringAppendIdKey)
      , (unpackCStringFoldr_RDR, unpackCStringFoldrIdKey)
+    , (unpackCStringUtf8_RDR,          unpackCStringUtf8IdKey)
  
         -- List operations
      , (concat_RDR,             concatIdKey)
diff --git a/ghc/compiler/prelude/PrelNames.lhs b/ghc/compiler/prelude/PrelNames.lhs

index 3c2d26c..d7a86c1 100644 (file)
--- a/ghc/compiler/prelude/PrelNames.lhs
+++ b/ghc/compiler/prelude/PrelNames.lhs
@@ -58,7 +58,8 @@ module PrelNames
         concat_RDR, filter_RDR, zip_RDR, augment_RDR,
         otherwiseId_RDR, assert_RDR, runSTRep_RDR,
  
-       unpackCString_RDR, unpackCString2_RDR, unpackCStringAppend_RDR, unpackCStringFoldr_RDR,
+       unpackCString_RDR, unpackCStringAppend_RDR, unpackCStringFoldr_RDR,
+        unpackCStringUtf8_RDR,
         numClass_RDR, fractionalClass_RDR, eqClass_RDR, 
         ccallableClass_RDR, creturnableClass_RDR,
         monadClass_RDR, enumClass_RDR, ordClass_RDR,
@@ -209,9 +210,9 @@ augment_RDR    = varQual pREL_BASE_Name SLIT("augment")
  
  -- Strings
  unpackCString_RDR       = varQual pREL_BASE_Name SLIT("unpackCString#")
-unpackCString2_RDR      = varQual pREL_BASE_Name SLIT("unpackNBytes#")
  unpackCStringAppend_RDR = varQual pREL_BASE_Name SLIT("unpackAppendCString#")
  unpackCStringFoldr_RDR  = varQual pREL_BASE_Name SLIT("unpackFoldrCString#")
+unpackCStringUtf8_RDR   = varQual pREL_BASE_Name SLIT("unpackCStringUtf8#")
  
  -- Classes Eq and Ord
  eqClass_RDR            = clsQual pREL_BASE_Name SLIT("Eq")
diff --git a/ghc/compiler/prelude/PrimRep.lhs b/ghc/compiler/prelude/PrimRep.lhs

index 70bb367..6c479d1 100644 (file)
--- a/ghc/compiler/prelude/PrimRep.lhs
+++ b/ghc/compiler/prelude/PrimRep.lhs
@@ -47,6 +47,7 @@ data PrimRep
    | CostCentreRep      -- Pointer to a cost centre
  
    | CharRep            -- Machine characters
+  | Int8Rep             --         8 bit integers
    | IntRep             --         integers (same size as ptr on this arch)
    | WordRep            --         ditto (but *unsigned*)
    | AddrRep            --         addresses ("C pointers")
@@ -54,7 +55,13 @@ data PrimRep
    | DoubleRep          --         doubles
    | Word64Rep          --    guaranteed to be 64 bits (no more, no less.)
    | Int64Rep           --    guaranteed to be 64 bits (no more, no less.)
-
+  
+  -- Perhaps all sized integers and words should be primitive types.
+  
+  -- Int8Rep is currently used to simulate some old CharRep usages
+  -- when Char changed size from 8 to 31 bits. It does not correspond
+  -- to a Haskell unboxed type, in particular it's not used by Int8.
+  
    | WeakPtrRep
    | ForeignObjRep      
    | BCORep
@@ -113,7 +120,7 @@ isFollowableRep ForeignObjRep = True        --      ''
  isFollowableRep StableNameRep = True    --      ''
  isFollowableRep ThreadIdRep   = True   -- pointer to a TSO
  
-isFollowableRep other          = False
+isFollowableRep other        = False
  
  separateByPtrFollowness :: (a -> PrimRep) -> [a] -> ([a], [a])
  
@@ -171,11 +178,12 @@ retPrimRepSize = getPrimRepSize RetRep
  -- size in bytes, ToDo: cpp in the right vals.
  -- (used in some settings to figure out how many bytes
  -- we have to push onto the stack when calling external
--- entry points (e.g., stdcalling on win32))
+-- entry points (e.g., stdcalling on win32)
  getPrimRepSizeInBytes :: PrimRep -> Int
  getPrimRepSizeInBytes pr =
   case pr of
-    CharRep        ->    1
+    CharRep        ->    4
+    Int8Rep        ->    1
      IntRep         ->    4
      AddrRep        ->    4
      FloatRep       ->    4
@@ -211,6 +219,7 @@ showPrimRep DataPtrRep     = "D_"
  showPrimRep RetRep         = "P_"
  showPrimRep CostCentreRep  = "CostCentre"
  showPrimRep CharRep       = "C_"
+showPrimRep Int8Rep       = "StgInt8"
  showPrimRep IntRep        = "I_"       -- short for StgInt
  showPrimRep WordRep       = "W_"       -- short for StgWord
  showPrimRep Int64Rep       = "LI_"       -- short for StgLongInt
@@ -228,6 +237,7 @@ showPrimRep ForeignObjRep  = "StgAddr"
  showPrimRep VoidRep       = "!!VOID_KIND!!"
  
  primRepString CharRep          = "Char"
+primRepString Int8Rep          = "Char" -- To have names like newCharArray#
  primRepString IntRep           = "Int"
  primRepString WordRep          = "Word"
  primRepString Int64Rep         = "Int64"
diff --git a/ghc/compiler/prelude/TysPrim.lhs b/ghc/compiler/prelude/TysPrim.lhs

index ff4e305..4be0716 100644 (file)
--- a/ghc/compiler/prelude/TysPrim.lhs
+++ b/ghc/compiler/prelude/TysPrim.lhs
@@ -324,6 +324,7 @@ primitive TyCon for a given PrimRep.
  
  \begin{code}
  primRepTyCon CharRep   = charPrimTyCon
+primRepTyCon Int8Rep   = charPrimTyCon
  primRepTyCon IntRep    = intPrimTyCon
  primRepTyCon WordRep   = wordPrimTyCon
  primRepTyCon Int64Rep  = int64PrimTyCon
diff --git a/ghc/compiler/rename/Rename.lhs b/ghc/compiler/rename/Rename.lhs

index 8adfdf3..0f9fe08 100644 (file)
--- a/ghc/compiler/rename/Rename.lhs
+++ b/ghc/compiler/rename/Rename.lhs
@@ -49,7 +49,7 @@ import NameSet
  import TysWiredIn      ( unitTyCon, intTyCon, doubleTyCon, boolTyCon )
  import PrelRules       ( builtinRules )
  import PrelInfo                ( mAIN_Name, pREL_MAIN_Name, pRELUDE_Name,
-                         ioTyCon_RDR, unpackCString_RDR, unpackCString2_RDR, unpackCStringFoldr_RDR,
+                         ioTyCon_RDR, unpackCString_RDR, unpackCStringFoldr_RDR, unpackCStringUtf8_RDR,
                           fractionalClassKeys, derivingOccurrences 
                         )
  import Type            ( namesOfType, funTyCon )
@@ -243,7 +243,7 @@ implicitFVs mod_name decls
      implicit_occs = string_occs ++ foldr ((++) . get) implicit_main decls
  
         -- Virtually every program has error messages in it somewhere
-    string_occs = [unpackCString_RDR, unpackCString2_RDR, unpackCStringFoldr_RDR]
+    string_occs = [unpackCString_RDR, unpackCStringFoldr_RDR, unpackCStringUtf8_RDR]
  
      get (TyClD (TyData _ _ _ _ _ _ (Just deriv_classes) _ _))
         = concat (map get_deriv deriv_classes)
diff --git a/ghc/compiler/typecheck/TcTyDecls.lhs b/ghc/compiler/typecheck/TcTyDecls.lhs

index acb1558..955d812 100644 (file)
--- a/ghc/compiler/typecheck/TcTyDecls.lhs
+++ b/ghc/compiler/typecheck/TcTyDecls.lhs
@@ -50,7 +50,7 @@ import Type           ( getTyVar, tyVarsOfTypes, splitFunTy, applyTys,
  import TysWiredIn      ( unitTy )
  import Var             ( tyVarKind )
  import VarSet          ( intersectVarSet, isEmptyVarSet )
-import Unique          ( unpackCStringIdKey )
+import Unique          ( unpackCStringIdKey, unpackCStringUtf8IdKey )
  import Util            ( equivClasses )
  import FiniteMap        ( FiniteMap, lookupWithDefaultFM )
  \end{code}
@@ -282,7 +282,8 @@ mkRecordSelector tycon fields@((first_con, first_field_label) : other_fields)
    = checkTc (all (== field_ty) other_tys)
             (fieldTypeMisMatch field_name)      `thenTc_`
      tcLookupValueByKey unpackCStringIdKey      `thenTc` \ unpack_id ->
-    returnTc (mkRecordSelId tycon first_field_label unpack_id)
+    tcLookupValueByKey unpackCStringUtf8IdKey  `thenTc` \ unpackUtf8_id ->
+    returnTc (mkRecordSelId tycon first_field_label unpack_id unpackUtf8_id)
    where
      field_ty   = fieldLabelType first_field_label
      field_name = fieldLabelName first_field_label
diff --git a/ghc/compiler/utils/FastString.lhs b/ghc/compiler/utils/FastString.lhs

index f0e7d9c..95ed4a5 100644 (file)
--- a/ghc/compiler/utils/FastString.lhs
+++ b/ghc/compiler/utils/FastString.lhs
@@ -26,16 +26,18 @@ module FastString
          mkFastSubStringBA#, -- :: ByteArray# -> Int# -> Int# -> FastString
          mkFastSubString#,   -- :: Addr# -> Int# -> Int# -> FastString
          mkFastSubStringFO#, -- :: ForeignObj# -> Int# -> Int# -> FastString
-       
+
+        mkFastStringInt,    -- :: [Int] -> FastString
+
          uniqueOfFS,        -- :: FastString -> Int#
         lengthFS,           -- :: FastString -> Int
         nullFastString,     -- :: FastString -> Bool
  
-       getByteArray#,      -- :: FastString -> ByteArray#
-        getByteArray,       -- :: FastString -> _ByteArray Int
         unpackFS,           -- :: FastString -> String
+       unpackIntFS,        -- :: FastString -> [Int]
         appendFS,           -- :: FastString -> FastString -> FastString
          headFS,                    -- :: FastString -> Char
+        headIntFS,         -- :: FastString -> Int
          tailFS,                    -- :: FastString -> FastString
         concatFS,           -- :: [FastString] -> FastString
          consFS,             -- :: Char -> FastString -> FastString
@@ -103,6 +105,7 @@ import Foreign              ( ForeignObj(..) )
  
  import IOExts          ( IORef, newIORef, readIORef, writeIORef )
  import IO
+import Char             ( chr, ord )
  
  #define hASH_TBL_SIZE 993
  
@@ -130,6 +133,10 @@ data FastString
        Addr#      -- pointer to the (null-terminated) bytes in C land.
        Int#       -- length  (cached)
  
+  | UnicodeStr   -- if contains characters outside '\1'..'\xFF'
+      Int#       -- unique id
+      [Int]      -- character numbers
+
  instance Eq FastString where
    a == b = case cmpFS a b of { LT -> False; EQ -> True;  GT -> False }
    a /= b = case cmpFS a b of { LT -> True;  EQ -> False; GT -> True  }
@@ -145,23 +152,16 @@ instance Ord FastString where
              | otherwise        =  y
      compare a b = cmpFS a b
  
-getByteArray# :: FastString -> ByteArray#
-getByteArray# (FastString _ _ ba#) = ba#
-
-getByteArray :: FastString -> ByteArray Int
-#if __GLASGOW_HASKELL__ < 405
-getByteArray (FastString _ l# ba#) = ByteArray (0,I# l#) ba#
-#else
-getByteArray (FastString _ l# ba#) = ByteArray 0 (I# l#) ba#
-#endif
-
  lengthFS :: FastString -> Int
  lengthFS (FastString _ l# _) = I# l#
  lengthFS (CharStr a# l#) = I# l#
+lengthFS (UnicodeStr _ s) = length s
  
  nullFastString :: FastString -> Bool
  nullFastString (FastString _ l# _) = l# ==# 0#
  nullFastString (CharStr _ l#) = l# ==# 0#
+nullFastString (UnicodeStr _ []) = True
+nullFastString (UnicodeStr _ (_:_)) = False
  
  unpackFS :: FastString -> String
  unpackFS (FastString _ l# ba#) = unpackNBytesBA# ba# l#
@@ -173,18 +173,29 @@ unpackFS (CharStr addr len#) =
        | otherwise   = C# ch : unpack (nh +# 1#)
        where
         ch = indexCharOffAddr# addr nh
+unpackFS (UnicodeStr _ s) = map chr s
+
+unpackIntFS :: FastString -> [Int]
+unpackIntFS (UnicodeStr _ s) = s
+unpackIntFS fs = map ord (unpackFS fs)
  
  appendFS :: FastString -> FastString -> FastString
-appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
+appendFS fs1 fs2 = mkFastStringInt (unpackIntFS fs1 ++ unpackIntFS fs2)
  
  concatFS :: [FastString] -> FastString
-concatFS ls = mkFastString (concat (map (unpackFS) ls)) -- ToDo: do better
+concatFS ls = mkFastStringInt (concat (map unpackIntFS ls)) -- ToDo: do better
  
  headFS :: FastString -> Char
-headFS f@(FastString _ l# ba#) = 
- if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
-headFS f@(CharStr a# l#) = 
- if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS: " ++ unpackFS f)
+headFS (FastString _ l# ba#) = 
+ if l# ># 0# then C# (indexCharArray# ba# 0#) else error ("headFS: empty FS")
+headFS (CharStr a# l#) = 
+ if l# ># 0# then C# (indexCharOffAddr# a# 0#) else error ("headFS: empty FS")
+headFS (UnicodeStr _ (c:_)) = chr c
+headFS (UnicodeStr _ []) = error ("headFS: empty FS")
+
+headIntFS :: FastString -> Int
+headIntFS (UnicodeStr _ (c:_)) = c
+headIntFS fs = ord (headFS fs)
  
  indexFS :: FastString -> Int -> Char
  indexFS f i@(I# i#) =
@@ -195,14 +206,16 @@ indexFS f i@(I# i#) =
     CharStr a# l#
       | l# ># 0# && l# ># i#  -> C# (indexCharOffAddr# a# i#)
       | otherwise            -> error (msg (I# l#))
+   UnicodeStr _ s           -> chr (s!!i)
   where
    msg l =  "indexFS: out of range: " ++ show (l,i)
  
  tailFS :: FastString -> FastString
  tailFS (FastString _ l# ba#) = mkFastSubStringBA# ba# 1# (l# -# 1#)
+tailFS fs = mkFastStringInt (tail (unpackIntFS fs))
  
  consFS :: Char -> FastString -> FastString
-consFS c fs = mkFastString (c:unpackFS fs)
+consFS c fs = mkFastStringInt (ord c : unpackIntFS fs)
  
  uniqueOfFS :: FastString -> Int#
  uniqueOfFS (FastString u# _ _) = u#
@@ -219,6 +232,7 @@ uniqueOfFS (CharStr a# l#)     = case mkFastString# a# l# of { FastString u# _ _
       works, but causes the CharStr to be looked up in the hash
       table each time it is accessed..
     -}
+uniqueOfFS (UnicodeStr u# _) = u#
  \end{code}
  
  Internally, the compiler will maintain a fast string symbol
@@ -226,6 +240,10 @@ table, providing sharing and fast comparison. Creation of
  new @FastString@s then covertly does a lookup, re-using the
  @FastString@ if there was a hit.
  
+Caution: mkFastStringUnicode assumes that if the string is in the
+table, it sits under the UnicodeStr constructor. Other mkFastString
+variants analogously assume the FastString constructor.
+
  \begin{code}
  data FastStringTable = 
   FastStringTable
@@ -315,6 +333,8 @@ mkFastString# a# len# =
          Just v
        else
          bucket_match ls len# a#
+   bucket_match (UnicodeStr _ _ : ls) len# a# =
+      bucket_match ls len# a#
  
  mkFastSubString# :: Addr# -> Int# -> Int# -> FastString
  mkFastSubString# a# start# len# = mkFastCharString2 (A# (addrOffset# a# start#)) (I# len#)
@@ -362,7 +382,8 @@ mkFastSubStringFO# fo# start# len# =
          Just v
        else
          bucket_match ls start# len# fo#
-
+   bucket_match (UnicodeStr _ _ : ls) start# len# fo# =
+      bucket_match ls start# len# fo#
  
  mkFastSubStringBA# :: ByteArray# -> Int# -> Int# -> FastString
  mkFastSubStringBA# barr# start# len# =
@@ -421,6 +442,44 @@ mkFastSubStringBA# barr# start# len# =
          Just v
        else
          bucket_match ls start# len# ba#
+     UnicodeStr _ _ -> bucket_match ls start# len# ba#
+
+mkFastStringUnicode :: [Int] -> FastString
+mkFastStringUnicode s =
+ unsafePerformIO  (
+  readIORef string_table       >>= \ ft@(FastStringTable uid# tbl#) ->
+  let
+   h = hashUnicode s
+  in
+--  _trace ("hashed(b): "++show (I# h)) $
+  lookupTbl ft h               >>= \ lookup_result ->
+  case lookup_result of
+    [] -> 
+       -- no match, add it to table by copying out the
+       -- the string into a [Int]
+          let f_str = UnicodeStr uid# s in
+          updTbl string_table ft h [f_str]     >>
+          -- _trace ("new(b): " ++ show f_str)   $
+         return f_str
+    ls -> 
+       -- non-empty `bucket', scan the list looking
+       -- entry with same length and compare byte by byte. 
+       -- _trace ("non-empty bucket(b)"++show ls) $
+       case bucket_match ls of
+        Nothing -> 
+              let f_str = UnicodeStr uid# s in
+              updTbl string_table ft h (f_str:ls) >>
+             -- _trace ("new(b): " ++ show f_str)   $
+             return f_str
+        Just v  -> 
+              -- _trace ("re-use(b): "++show v) $
+             return v
+  )
+ where
+   bucket_match [] = Nothing
+   bucket_match (v@(UnicodeStr _ s'):ls) =
+       if s' == s then Just v else bucket_match ls
+   bucket_match (FastString _ _ _ : ls) = bucket_match ls
  
  mkFastCharString :: Addr -> FastString
  mkFastCharString a@(A# a#) = 
@@ -433,8 +492,8 @@ mkFastCharString# a# =
  mkFastCharString2 :: Addr -> Int -> FastString
  mkFastCharString2 a@(A# a#) (I# len#) = CharStr a# len#
  
-mkFastString :: String -> FastString
-mkFastString str = 
+mkFastStringNarrow :: String -> FastString
+mkFastStringNarrow str =
   case packString str of
  #if __GLASGOW_HASKELL__ < 405
    (ByteArray (_,I# len#) frozen#) -> 
@@ -445,6 +504,20 @@ mkFastString str =
      {- 0-indexed array, len# == index to one beyond end of string,
         i.e., (0,1) => empty string.    -}
  
+mkFastString :: String -> FastString
+mkFastString str = if all good str
+    then mkFastStringNarrow str
+    else mkFastStringUnicode (map ord str)
+    where
+    good c = c >= '\1' && c <= '\xFF'
+
+mkFastStringInt :: [Int] -> FastString
+mkFastStringInt str = if all good str
+    then mkFastStringNarrow (map chr str)
+    else mkFastStringUnicode str
+    where
+    good c = c >= 1 && c <= 0xFF
+
  mkFastSubString :: Addr -> Int -> Int -> FastString
  mkFastSubString (A# a#) (I# start#) (I# len#) =
   mkFastString# (addrOffset# a# start#) len#
@@ -505,10 +578,26 @@ hashSubStrBA ba# start# len# =
  --    c1 = indexCharArray# ba# 1#
  --    c2 = indexCharArray# ba# 2#
  
+hashUnicode :: [Int] -> Int#
+ -- use the Addr to produce a hash value between 0 & m (inclusive)
+hashUnicode [] = 0#
+hashUnicode [I# c0] = ((c0 *# 631#) +# 1#) `remInt#` hASH_TBL_SIZE#
+hashUnicode [I# c0, I# c1] = ((c0 *# 631#) +# (c1 *# 217#) +# 2#) `remInt#` hASH_TBL_SIZE#
+hashUnicode s = ((c0 *# 631#) +# (c1 *# 217#) +# (c2 *# 43#) +# len#) `remInt#` hASH_TBL_SIZE#
+  where
+    I# len# = length s
+    I# c0 = s !! 0
+    I# c1 = s !! (I# (len# `quotInt#` 2# -# 1#))
+    I# c2 = s !! (I# (len# -# 1#))
+
  \end{code}
  
  \begin{code}
  cmpFS :: FastString -> FastString -> Ordering
+cmpFS (UnicodeStr u1# s1) (UnicodeStr u2# s2) = if u1# ==# u2# then EQ
+    else compare s1 s2
+cmpFS (UnicodeStr _ s1) s2 = compare s1 (unpackIntFS s2)
+cmpFS s1 (UnicodeStr _ s2) = compare (unpackIntFS s1) s2
  cmpFS (FastString u1# _ b1#) (FastString u2# _ b2#) = -- assume non-null chars
    if u1# ==# u2# then
       EQ
diff --git a/ghc/compiler/utils/Outputable.lhs b/ghc/compiler/utils/Outputable.lhs

index 46cb734..5f38e9b 100644 (file)
--- a/ghc/compiler/utils/Outputable.lhs
+++ b/ghc/compiler/utils/Outputable.lhs
@@ -34,11 +34,11 @@ module Outputable (
         hang, punctuate,
         speakNth, speakNTimes,
  
-       printSDoc, printErrs, printDump, 
+       printSDoc, printErrs, printDump,
         printForC, printForAsm, printForIface, printForUser,
         pprCode, pprCols,
-       showSDoc, showSDocDebug, showSDocIface, showsPrecSDoc, 
-       pprFSAsString,
+       showSDoc, showSDocDebug, showSDocIface, showsPrecSDoc,
+       pprHsChar, pprHsString,
  
  
         -- error handling
@@ -57,6 +57,7 @@ import Pretty         ( Doc, Mode(..), TextDetails(..), fullRender )
  import Panic
  import ST              ( runST )
  import Foreign
+import Char             ( chr, ord, isDigit )
  \end{code}
  
  
@@ -317,8 +318,43 @@ instance Outputable FastString where
      ppr fs = text (unpackFS fs)                -- Prints an unadorned string,
                                         -- no double quotes or anything
  
-pprFSAsString :: FastString -> SDoc                    -- The Char instance of Show prints
-pprFSAsString fs = text (showList (unpackFS fs) "")    -- strings with double quotes and escapes
+#if __GLASGOW_HASKELL__ < 410
+-- Assume we have only 8-bit Chars.
+
+pprHsChar :: Int -> SDoc
+pprHsChar c = char '\'' <> text (showCharLit c "") <> char '\''
+
+pprHsString :: FAST_STRING -> SDoc
+pprHsString fs = doubleQuotes (text (foldr showCharLit "" (_UNPK_INT_ fs)))
+
+showCharLit :: Int -> String -> String
+showCharLit c rest
+    | c == ord '\"' = "\\\"" ++ rest
+    | c == ord '\'' = "\\\'" ++ rest
+    | c == ord '\\' = "\\\\" ++ rest
+    | c >= 0x20 && c <= 0x7E = chr c : rest
+    | c == ord '\a' = "\\a" ++ rest
+    | c == ord '\b' = "\\b" ++ rest
+    | c == ord '\f' = "\\f" ++ rest
+    | c == ord '\n' = "\\n" ++ rest
+    | c == ord '\r' = "\\r" ++ rest
+    | c == ord '\t' = "\\t" ++ rest
+    | c == ord '\v' = "\\v" ++ rest
+    | otherwise     = ('\\':) $ shows c $ case rest of
+        d:_ | isDigit d -> "\\&" ++ rest
+        _               -> rest
+
+#else
+-- We have 31-bit Chars and will simply use Show instances
+-- of Char and String.
+
+pprHsChar :: Int -> SDoc
+pprHsChar c = text (show (chr c))
+
+pprHsString :: FastString -> SDoc
+pprHsString fs = text (show (unpackFS fs))
+
+#endif
  
  instance Show FastString  where
      showsPrec p fs = showsPrecSDoc p (ppr fs)
diff --git a/ghc/compiler/utils/UnicodeUtil.lhs b/ghc/compiler/utils/UnicodeUtil.lhs

new file mode 100644 (file)

index 0000000..0123e67
--- /dev/null
+++ b/ghc/compiler/utils/UnicodeUtil.lhs
@@ -0,0 +1,46 @@
+Various Unicode-related utilities.
+
+\begin{code}
+module UnicodeUtil(
+    stringToUtf8
+  ) where
+
+#include "HsVersions.h"
+
+import Panic (panic)
+import Char  (chr, ord)
+\end{code}
+
+\begin{code}
+stringToUtf8 :: [Int] -> String
+stringToUtf8 []       = ""
+stringToUtf8 (c:s)
+    | c >= 1 && c <= 0x7F = chr c : stringToUtf8 s
+    | c < 0           = panic ("charToUtf8 ("++show c++")")
+    | c <= 0x7FF      = chr (0xC0 + c `div`       0x40           ) :
+                        chr (0x80 + c                  `mod` 0x40) :
+                        stringToUtf8 s
+    | c <= 0xFFFF     = chr (0xE0 + c `div`     0x1000           ) :
+                        chr (0x80 + c `div`       0x40 `mod` 0x40) :
+                        chr (0x80 + c                  `mod` 0x40) :
+                        stringToUtf8 s
+    | c <= 0x1FFFFF   = chr (0xF0 + c `div`    0x40000           ) :
+                        chr (0x80 + c `div`     0x1000 `mod` 0x40) :
+                        chr (0x80 + c `div`       0x40 `mod` 0x40) :
+                        chr (0x80 + c                  `mod` 0x40) :
+                        stringToUtf8 s
+    | c <= 0x3FFFFFF  = chr (0xF8 + c `div`  0x1000000           ) :
+                        chr (0x80 + c `div`    0x40000 `mod` 0x40) :
+                        chr (0x80 + c `div`     0x1000 `mod` 0x40) :
+                        chr (0x80 + c `div`       0x40 `mod` 0x40) :
+                        chr (0x80 + c                  `mod` 0x40) :
+                        stringToUtf8 s
+    | c <= 0x7FFFFFFF = chr (0xFC + c `div` 0x40000000           ) :
+                        chr (0x80 + c `div`  0x1000000 `mod` 0x40) :
+                        chr (0x80 + c `div`    0x40000 `mod` 0x40) :
+                        chr (0x80 + c `div`     0x1000 `mod` 0x40) :
+                        chr (0x80 + c `div`       0x40 `mod` 0x40) :
+                        chr (0x80 + c                  `mod` 0x40) :
+                        stringToUtf8 s
+    | otherwise       = panic ("charToUtf8 "++show c)
+\end{code}
diff --git a/ghc/docs/users_guide/glasgow_exts.sgml b/ghc/docs/users_guide/glasgow_exts.sgml

index c962f87..aa1c884 100644 (file)
--- a/ghc/docs/users_guide/glasgow_exts.sgml
+++ b/ghc/docs/users_guide/glasgow_exts.sgml
@@ -364,8 +364,8 @@ Literals for these types may be written as follows:
  1#              an Int#
  1.2#            a Float#
  1.34##          a Double#
-'a'#            a Char#; for weird characters, use '\o&#60;octal&#62;'#
-"a"#            an Addr# (a `char *')
+'a'#            a Char#; for weird characters, use e.g. '\o&#60;octal&#62;'#
+"a"#            an Addr# (a `char *'); only characters '\0'..'\255' allowed
  </ProgramListing>
  
  <IndexTerm><Primary>literals, primitive</Primary></IndexTerm>
diff --git a/ghc/docs/users_guide/vs_haskell.sgml b/ghc/docs/users_guide/vs_haskell.sgml

index 1f98fe8..38d1472 100644 (file)
--- a/ghc/docs/users_guide/vs_haskell.sgml
+++ b/ghc/docs/users_guide/vs_haskell.sgml
@@ -122,7 +122,7 @@ numeric types if this stuff keeps you awake at night.
  This code fragment <Emphasis>should</Emphasis> elicit a fatal error, but it does not:
  
  <ProgramListing>
-main = print (array (1,1) [ 1:=2, 1:=3 ])
+main = print (array (1,1) [(1,2), (1,3)])
  </ProgramListing>
  
  </Para>
@@ -158,15 +158,6 @@ stuck on them.
  </Para>
  </ListItem>
  </VarListEntry>
-<VarListEntry>
-<Term>Unicode character set:</Term>
-<ListItem>
-<Para>
-Haskell 98 embraces the Unicode character set, but GHC doesn't
-handle it. Yet.
-</Para>
-</ListItem>
-</VarListEntry>
  </VariableList>
  </Para>
  
diff --git a/ghc/includes/ClosureMacros.h b/ghc/includes/ClosureMacros.h

index c8baeee..6e3922e 100644 (file)
--- a/ghc/includes/ClosureMacros.h
+++ b/ghc/includes/ClosureMacros.h
@@ -1,5 +1,5 @@
  /* ----------------------------------------------------------------------------
- * $Id: ClosureMacros.h,v 1.22 2000/05/26 10:14:33 sewardj Exp $
+ * $Id: ClosureMacros.h,v 1.23 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 1998-1999
   *
@@ -142,8 +142,14 @@ extern void* DATA_SECTION_END_MARKER_DECL;
  #define LOOKS_LIKE_STATIC(r) (!(HEAP_ALLOCED(r)))
  
  /* Tiresome predicates needed to check for pointers into the closure tables */
-#define IS_CHARLIKE_CLOSURE(p)  ( (P_)(p) >= (P_)CHARLIKE_closure && (char*)(p) <= ((char*)CHARLIKE_closure + 255 * sizeof(StgIntCharlikeClosure)) )
-#define IS_INTLIKE_CLOSURE(p)  ( (P_)(p) >= (P_)INTLIKE_closure && (char*)(p) <= ((char*)INTLIKE_closure + 32 * sizeof(StgIntCharlikeClosure)) )
+#define IS_CHARLIKE_CLOSURE(p) \
+    ( (P_)(p) >= (P_)CHARLIKE_closure && \
+      (char*)(p) <= ((char*)CHARLIKE_closure + \
+                     (MAX_CHARLIKE-MIN_CHARLIKE) * sizeof(StgIntCharlikeClosure)) )
+#define IS_INTLIKE_CLOSURE(p) \
+    ( (P_)(p) >= (P_)INTLIKE_closure && \
+      (char*)(p) <= ((char*)INTLIKE_closure + \
+                     (MAX_INTLIKE-MIN_INTLIKE) * sizeof(StgIntCharlikeClosure)) )
  
  #define LOOKS_LIKE_STATIC_CLOSURE(r) (((*(((unsigned long *)(r))-1)) == 0) || IS_CHARLIKE_CLOSURE(r) || IS_INTLIKE_CLOSURE(r))
  #else
@@ -332,7 +338,7 @@ static __inline__ StgWord tso_sizeW ( StgTSO *tso )
     INTLIKE and CHARLIKE closures.
     -------------------------------------------------------------------------- */
  
-#define CHARLIKE_CLOSURE(n) ((P_)&CHARLIKE_closure[n])
+#define CHARLIKE_CLOSURE(n) ((P_)&CHARLIKE_closure[(n)-MIN_CHARLIKE])
  #define INTLIKE_CLOSURE(n)  ((P_)&INTLIKE_closure[(n)-MIN_INTLIKE])
  
  /* -----------------------------------------------------------------------------
diff --git a/ghc/includes/Constants.h b/ghc/includes/Constants.h

index bc6b162..9792a0e 100644 (file)
--- a/ghc/includes/Constants.h
+++ b/ghc/includes/Constants.h
@@ -1,5 +1,5 @@
  /* ----------------------------------------------------------------------------
- * $Id: Constants.h,v 1.13 2000/08/03 11:28:35 simonmar Exp $
+ * $Id: Constants.h,v 1.14 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 1998-1999
   *
@@ -166,11 +166,17 @@
  
  #define MAX_VECTORED_RTN 8
  
-/*---- Range of built-in table of static small int-like closures. */
+/*---- Range of built-in table of static small int-like and char-like closures. */
  
-#define MAX_INTLIKE            (16)
+#define MAX_INTLIKE            16
  #define MIN_INTLIKE            (-16)
  
+#define MAX_CHARLIKE           255
+#define MIN_CHARLIKE           0
+
+/* You can change these constants (I hope) but be sure to modify
+   rts/StgMiscClosures.hs accordingly. */
+
  /*---- Minimum number of words left in heap after GC to carry on */
  
  #define HEAP_HWM_WORDS 1024
diff --git a/ghc/includes/HsFFI.h b/ghc/includes/HsFFI.h

index 17486b5..46a2776 100644 (file)
--- a/ghc/includes/HsFFI.h
+++ b/ghc/includes/HsFFI.h
@@ -1,5 +1,5 @@
  /* -----------------------------------------------------------------------------
- * $Id: HsFFI.h,v 1.3 2000/04/13 15:37:11 panne Exp $
+ * $Id: HsFFI.h,v 1.4 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 2000
   *
@@ -63,8 +63,8 @@ typedef void*                 HsForeignObj;   /* ... and this StgForeignPtr       */
  typedef void*                  HsStablePtr;
  
  /* this should correspond to the type of StgChar in StgTypes.h */
-#define HS_CHAR_MIN            (0)
-#define HS_CHAR_MAX            UINT8_MAX
+#define HS_CHAR_MIN            0
+#define HS_CHAR_MAX            INT32_MAX
  
  /* this mirrors the distinction of cases in StgTypes.h */
  #if   SIZEOF_VOID_P == 8
diff --git a/ghc/includes/PrimOps.h b/ghc/includes/PrimOps.h

index 1e14fd6..3565707 100644 (file)
--- a/ghc/includes/PrimOps.h
+++ b/ghc/includes/PrimOps.h
@@ -1,5 +1,5 @@
  /* -----------------------------------------------------------------------------
- * $Id: PrimOps.h,v 1.59 2000/07/21 09:11:19 rrt Exp $
+ * $Id: PrimOps.h,v 1.60 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 1998-1999
   *
@@ -221,7 +221,8 @@ typedef union {
  #define int2Addrzh(r,a)        r=(A_)(a)
  #define addr2Intzh(r,a)        r=(I_)(a)
  
-#define readCharOffAddrzh(r,a,i)       r= ((C_ *)(a))[i]
+#define readCharOffAddrzh(r,a,i)       r= ((unsigned char *)(a))[i]
+/* unsigned char is for compatibility: the index is still in bytes. */
  #define readIntOffAddrzh(r,a,i)        r= ((I_ *)(a))[i]
  #define readWordOffAddrzh(r,a,i)       r= ((W_ *)(a))[i]
  #define readAddrOffAddrzh(r,a,i)       r= ((PP_)(a))[i]
@@ -233,7 +234,8 @@ typedef union {
  #define readWord64OffAddrzh(r,a,i)     r= ((LW_ *)(a))[i]
  #endif
  
-#define writeCharOffAddrzh(a,i,v)       ((C_ *)(a))[i] = (v)
+#define writeCharOffAddrzh(a,i,v)       ((unsigned char *)(a))[i] = (unsigned char)(v)
+/* unsigned char is for compatibility: the index is still in bytes. */
  #define writeIntOffAddrzh(a,i,v)        ((I_ *)(a))[i] = (v)
  #define writeWordOffAddrzh(a,i,v)       ((W_ *)(a))[i] = (v)
  #define writeAddrOffAddrzh(a,i,v)       ((PP_)(a))[i] = (v)
@@ -246,7 +248,8 @@ typedef union {
  #define writeWord64OffAddrzh(a,i,v)  ((LW_ *)(a))[i] = (v)
  #endif
  
-#define indexCharOffAddrzh(r,a,i)      r= ((C_ *)(a))[i]
+#define indexCharOffAddrzh(r,a,i)      r= ((unsigned char *)(a))[i]
+/* unsigned char is for compatibility: the index is still in bytes. */
  #define indexIntOffAddrzh(r,a,i)       r= ((I_ *)(a))[i]
  #define indexWordOffAddrzh(r,a,i)      r= ((W_ *)(a))[i]
  #define indexAddrOffAddrzh(r,a,i)      r= ((PP_)(a))[i]
@@ -542,7 +545,8 @@ extern I_ resetGenSymZh(void);
  /* result ("r") arg ignored in write macros! */
  #define writeArrayzh(a,i,v)    ((PP_) PTRS_ARR_CTS(a))[(i)]=(v)
  
-#define writeCharArrayzh(a,i,v)          ((C_ *)(BYTE_ARR_CTS(a)))[i] = (v)
+#define writeCharArrayzh(a,i,v)          ((unsigned char *)(BYTE_ARR_CTS(a)))[i] = (unsigned char)(v)
+/* unsigned char is for compatibility: the index is still in bytes. */
  #define writeIntArrayzh(a,i,v)   ((I_ *)(BYTE_ARR_CTS(a)))[i] = (v)
  #define writeWordArrayzh(a,i,v)          ((W_ *)(BYTE_ARR_CTS(a)))[i] = (v)
  #define writeAddrArrayzh(a,i,v)          ((PP_)(BYTE_ARR_CTS(a)))[i] = (v)
diff --git a/ghc/includes/RtsAPI.h b/ghc/includes/RtsAPI.h

index 35a8df0..bddb06e 100644 (file)
--- a/ghc/includes/RtsAPI.h
+++ b/ghc/includes/RtsAPI.h
@@ -1,5 +1,5 @@
  /* ----------------------------------------------------------------------------
- * $Id: RtsAPI.h,v 1.15 2000/06/27 09:18:04 sewardj Exp $
+ * $Id: RtsAPI.h,v 1.16 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 1998-1999
   *
@@ -48,7 +48,7 @@ extern void getProgArgv            ( int *argc, char **argv[] );
  /* ----------------------------------------------------------------------------
     Building Haskell objects from C datatypes.
     ------------------------------------------------------------------------- */
-HaskellObj   rts_mkChar       ( char c );
+HaskellObj   rts_mkChar       ( unsigned int c );
  HaskellObj   rts_mkInt        ( int i );
  HaskellObj   rts_mkInt8       ( int i );
  HaskellObj   rts_mkInt16      ( int i );
@@ -71,7 +71,7 @@ HaskellObj   rts_apply        ( HaskellObj, HaskellObj );
  /* ----------------------------------------------------------------------------
     Deconstructing Haskell objects
     ------------------------------------------------------------------------- */
-char         rts_getChar      ( HaskellObj );
+unsigned int rts_getChar      ( HaskellObj );
  int          rts_getInt       ( HaskellObj );
  int          rts_getInt32     ( HaskellObj );
  unsigned int rts_getWord      ( HaskellObj );
diff --git a/ghc/includes/StgTicky.h b/ghc/includes/StgTicky.h

index b508e8a..ee9d5d7 100644 (file)
--- a/ghc/includes/StgTicky.h
+++ b/ghc/includes/StgTicky.h
@@ -1,5 +1,5 @@
  /* ----------------------------------------------------------------------------
- * $Id: StgTicky.h,v 1.9 1999/11/11 17:50:47 simonpj Exp $
+ * $Id: StgTicky.h,v 1.10 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The AQUA project, Glasgow University, 1994-1997
   * (c) The GHC Team, 1998-1999
@@ -142,8 +142,8 @@ typedef struct _StgEntCounter {
                 arity:16,       /* arity (static info) */
                 stk_args:16;    /* # of args off stack */
                                 /* (rest of args are in registers) */
-    StgChar    *str;           /* name of the thing */
-    StgChar    *arg_kinds;     /* info about the args types */
+    char       *str;           /* name of the thing */
+    char       *arg_kinds;     /* info about the args types */
      I_         entry_count;      /* Trips to fast entry code */
      I_         slow_entry_count; /* Trips to slow entry code */
      I_          allocs;         /* number of allocations by this fun */
diff --git a/ghc/includes/StgTypes.h b/ghc/includes/StgTypes.h

index e6b87f9..d0a8104 100644 (file)
--- a/ghc/includes/StgTypes.h
+++ b/ghc/includes/StgTypes.h
@@ -1,5 +1,5 @@
  /* -----------------------------------------------------------------------------
- * $Id: StgTypes.h,v 1.12 2000/06/12 11:04:12 simonmar Exp $
+ * $Id: StgTypes.h,v 1.13 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 1998-1999
   *
@@ -89,7 +89,7 @@ typedef void*              StgAddr;
   * Other commonly-used STG datatypes.
   */
  
-typedef StgWord8           StgChar;
+typedef StgWord32          StgChar;
  typedef int                StgBool;
  /*
   * If a double fits in an StgWord, don't bother using floats.
@@ -160,7 +160,8 @@ typedef StgFunPtr StgFun(void);
  typedef union {
      StgWord        w;
      StgAddr        a;
-    StgWord        c;
+    StgChar        c;
+    StgInt8        i8;
      StgFloat       f;
      StgInt         i;
      StgPtr         p;
diff --git a/ghc/lib/std/PrelBase.lhs b/ghc/lib/std/PrelBase.lhs

index 235c41b..f79e788 100644 (file)
--- a/ghc/lib/std/PrelBase.lhs
+++ b/ghc/lib/std/PrelBase.lhs
@@ -1,5 +1,5 @@
  % -----------------------------------------------------------------------------
-% $Id: PrelBase.lhs,v 1.34 2000/08/02 14:13:27 rrt Exp $
+% $Id: PrelBase.lhs,v 1.35 2000/08/07 23:37:23 qrczak Exp $
  %
  % (c) The University of Glasgow, 1992-2000
  %
@@ -124,14 +124,14 @@ otherwise = True
  build = error "urk"
  foldr = error "urk"
  
-unpackCString#  :: Addr# -> [Char]
-unpackFoldrCString#  :: Addr# -> (Char  -> a -> a) -> a -> a 
+unpackCString# :: Addr# -> [Char]
+unpackFoldrCString# :: Addr# -> (Char  -> a -> a) -> a -> a 
  unpackAppendCString# :: Addr# -> [Char] -> [Char]
-unpackNBytes#      :: Addr# -> Int#   -> [Char]
-unpackNBytes# a b = error "urk"
+unpackCStringUtf8# :: Addr# -> [Char]
  unpackCString# a = error "urk"
  unpackFoldrCString# a = error "urk"
  unpackAppendCString# a = error "urk"
+unpackCStringUtf8# a = error "urk"
  -}
  \end{code}
  
@@ -185,7 +185,7 @@ class  (Eq a) => Ord a  where
  
  \begin{code}
  class  Functor f  where
-    fmap         :: (a -> b) -> f a -> f b
+    fmap        :: (a -> b) -> f a -> f b
  
  class  Monad m  where
      (>>=)       :: m a -> (a -> m b) -> m b
@@ -440,7 +440,11 @@ instance Ord Char where
    (C# c1) <  (C# c2) = c1 `ltChar#` c2
  
  chr :: Int -> Char
-chr (I# i) | i >=# 0# && i <=# 255# = C# (chr# i)
+chr (I# i) | i >=# 0#
+#if INT_SIZE_IN_BYTES > 4
+             && i <=# 0x7FFFFFFF#
+#endif
+             = C# (chr# i)
            | otherwise = error ("Prelude.chr: bad argument")
  
  unsafeChr :: Int -> Char
@@ -623,10 +627,10 @@ This code is needed for virtually all programs, since it's used for
  unpacking the strings of error messages.
  
  \begin{code}
-unpackCString#  :: Addr# -> [Char]
+unpackCString# :: Addr# -> [Char]
  unpackCString# a = unpackCStringList# a
  
-unpackCStringList#  :: Addr# -> [Char]
+unpackCStringList# :: Addr# -> [Char]
  unpackCStringList# addr 
    = unpack 0#
    where
@@ -646,7 +650,7 @@ unpackAppendCString# addr rest
        where
         ch = indexCharOffAddr# addr nh
  
-unpackFoldrCString#  :: Addr# -> (Char  -> a -> a) -> a -> a 
+unpackFoldrCString# :: Addr# -> (Char  -> a -> a) -> a -> a 
  unpackFoldrCString# addr f z 
    = unpack 0#
    where
@@ -656,11 +660,42 @@ unpackFoldrCString# addr f z
        where
         ch = indexCharOffAddr# addr nh
  
-unpackNBytes#      :: Addr# -> Int#   -> [Char]
-  -- This one is called by the compiler to unpack literal 
-  -- strings with NULs in them; rare. It's strict!
-  -- We don't try to do list deforestation for this one
+unpackCStringUtf8# :: Addr# -> [Char]
+unpackCStringUtf8# addr 
+  = unpack 0#
+  where
+    unpack nh
+      | ch `eqChar#` '\0'# = []
+      | ch `leChar#` '\x7F'# = C# ch : unpack (nh +# 1#)
+      | ch `leChar#` '\xDF'# = C# (chr# ((ord# ch                                  `iShiftL#`  6#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 1#))) -# 0x3080#))
+                               : unpack (nh +# 2#)
+      | ch `leChar#` '\xEF'# = C# (chr# ((ord# ch                                  `iShiftL#` 12#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#`  6#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 2#))) -# 0xE2080#))
+                               : unpack (nh +# 3#)
+      | ch `leChar#` '\xF7'# = C# (chr# ((ord# ch                                  `iShiftL#` 18#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#` 12#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 2#)) `iShiftL#`  6#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 3#))) -# 0x3C82080#))
+                               : unpack (nh +# 4#)
+      | ch `leChar#` '\xFB'# = C# (chr# ((ord# ch                                  `iShiftL#` 24#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#` 18#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 2#)) `iShiftL#` 12#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 3#)) `iShiftL#`  6#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 4#))) -# 0xFA082080#))
+                               : unpack (nh +# 5#)
+      | otherwise           = C# (chr# (((ord# ch -# 0xFC#)                        `iShiftL#` 30#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 1#)) `iShiftL#` 24#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 2#)) `iShiftL#` 18#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 3#)) `iShiftL#` 12#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 4#)) `iShiftL#`  6#) +#
+                                         (ord# (indexCharOffAddr# addr (nh +# 5#))) -# 0x82082080#))
+                               : unpack (nh +# 6#)
+      where
+       ch = indexCharOffAddr# addr nh
  
+unpackNBytes# :: Addr# -> Int# -> [Char]
  unpackNBytes# _addr 0#   = []
  unpackNBytes#  addr len# = unpack [] (len# -# 1#)
      where
diff --git a/ghc/lib/std/PrelEnum.lhs b/ghc/lib/std/PrelEnum.lhs

index c941e86..62f3167 100644 (file)
--- a/ghc/lib/std/PrelEnum.lhs
+++ b/ghc/lib/std/PrelEnum.lhs
@@ -1,5 +1,5 @@
  % -----------------------------------------------------------------------------
-% $Id: PrelEnum.lhs,v 1.11 2000/06/30 13:39:35 simonmar Exp $
+% $Id: PrelEnum.lhs,v 1.12 2000/08/07 23:37:23 qrczak Exp $
  %
  % (c) The University of Glasgow, 1992-2000
  %
@@ -180,11 +180,11 @@ instance Enum Ordering where
  \begin{code}
  instance  Bounded Char  where
      minBound =  '\0'
-    maxBound =  '\255'
+    maxBound =  '\x7FFFFFFF'
  
  instance  Enum Char  where
      succ (C# c#)
-       | not (ord# c# ==# 255#) = C# (chr# (ord# c# +# 1#))
+       | not (ord# c# ==# 0x7FFFFFFF#) = C# (chr# (ord# c# +# 1#))
         | otherwise             = error ("Prelude.Enum.Char.succ: bad argument")
      pred (C# c#)
         | not (ord# c# ==# 0#)   = C# (chr# (ord# c# -# 1#))
@@ -194,7 +194,7 @@ instance  Enum Char  where
      fromEnum = ord
  
      {-# INLINE enumFrom #-}
-    enumFrom (C# x) = eftChar (ord# x) 255#
+    enumFrom (C# x) = eftChar (ord# x) 0x7FFFFFFF#
         -- Blarg: technically I guess enumFrom isn't strict!
  
      {-# INLINE enumFromTo #-}
@@ -235,13 +235,13 @@ eftCharList x y | x ># y    = []
  
  -- For enumFromThenTo we give up on inlining
  efdCharFB c n x1 x2
-  | delta >=# 0# = go_up_char_fb c n x1 delta 255#
+  | delta >=# 0# = go_up_char_fb c n x1 delta 0x7FFFFFFF#
    | otherwise    = go_dn_char_fb c n x1 delta 0#
    where
      delta = x2 -# x1
  
  efdCharList x1 x2
-  | delta >=# 0# = go_up_char_list x1 delta 255#
+  | delta >=# 0# = go_up_char_list x1 delta 0x7FFFFFFF#
    | otherwise    = go_dn_char_list x1 delta 0#
    where
      delta = x2 -# x1
diff --git a/ghc/lib/std/PrelRead.lhs b/ghc/lib/std/PrelRead.lhs

index ac96ecb..a3bea26 100644 (file)
--- a/ghc/lib/std/PrelRead.lhs
+++ b/ghc/lib/std/PrelRead.lhs
@@ -1,5 +1,5 @@
  % ------------------------------------------------------------------------------
-% $Id: PrelRead.lhs,v 1.15 2000/06/30 13:39:36 simonmar Exp $
+% $Id: PrelRead.lhs,v 1.16 2000/08/07 23:37:23 qrczak Exp $
  %
  % (c) The University of Glasgow, 1994-2000
  %
@@ -14,7 +14,7 @@ Instances of the Read class.
  module PrelRead where
  
  import PrelErr         ( error )
-import PrelEnum                ( Enum(..) )
+import PrelEnum                ( Enum(..), maxBound )
  import PrelNum
  import PrelReal
  import PrelFloat
@@ -272,7 +272,7 @@ lexLitChar ('\\':s)     =  do
          fromAsciiLab (x:y:ls)   | isUpper y &&
                                    [x,y]   `elem` asciiEscTab = return ([x,y], ls)
          fromAsciiLab _                                       = mzero
-                                  
+
          asciiEscTab = "DEL" : asciiTab
  
          {-
@@ -284,8 +284,7 @@ lexLitChar ('\\':s)     =  do
          -}
          checkSize base f str = do
            (num, res) <- f str
-             -- Note: this is assumes that a Char is 8 bits long.
-          if (toAnInt base num) > 255 then 
+          if toAnInteger base num > toInteger (ord maxBound) then 
               mzero
             else
               case base of
@@ -293,7 +292,7 @@ lexLitChar ('\\':s)     =  do
                  16 -> return ('x':num, res)
                  _  -> return (num, res)
  
-       toAnInt base xs = foldl (\ acc n -> acc*base + n) 0 (map digitToInt xs)
+       toAnInteger base = foldl (\ acc n -> acc*base + toInteger (digitToInt n)) 0
  
  
  lexLitChar (c:s)        =  return ([c],s)
diff --git a/ghc/lib/std/cbits/filePutc.c b/ghc/lib/std/cbits/filePutc.c

index f9bacd6..9ca8083 100644 (file)
--- a/ghc/lib/std/cbits/filePutc.c
+++ b/ghc/lib/std/cbits/filePutc.c
@@ -1,7 +1,7 @@
  /* 
   * (c) The GRASP/AQUA Project, Glasgow University, 1994-1998
   *
- * $Id: filePutc.c,v 1.11 1999/12/08 15:47:07 simonmar Exp $
+ * $Id: filePutc.c,v 1.12 2000/08/07 23:37:23 qrczak Exp $
   *
   * hPutChar Runtime Support
   */
@@ -24,6 +24,7 @@ filePutc(StgForeignPtr ptr, StgChar c)
  {
      IOFileObject* fo = (IOFileObject*)ptr;
      int rc = 0;
+    unsigned char byte = (unsigned char) c;
  
      /* What filePutc needs to do:
  
@@ -46,6 +47,9 @@ filePutc(StgForeignPtr ptr, StgChar c)
       flush(i.e., empty) the buffer first. (We could be smarter about this,
       but aren't!)
  
+     Only the lower 8 bits of a character are written. The data are supposed
+     to be already converted to the stream's 8-bit encoding.
+
      */
  
      if ( FILEOBJ_READABLE(fo) && FILEOBJ_JUST_READ(fo) ) {
@@ -60,7 +64,7 @@ filePutc(StgForeignPtr ptr, StgChar c)
          ; 
      } else {
         /* We're buffered, add it to the pack */
-       ((char*)fo->buf)[fo->bufWPtr] = (char)c;
+       ((unsigned char*)fo->buf)[fo->bufWPtr] = byte;
         fo->bufWPtr++;
        /* If the buffer filled up as a result, *or*
           the added character terminated a line
@@ -79,10 +83,10 @@ filePutc(StgForeignPtr ptr, StgChar c)
      while ((rc = (
  #ifdef USE_WINSOCK
                  fo->flags & FILEOBJ_WINSOCK ?
-                send(fo->fd, &c, 1, 0) :
-                write(fo->fd, &c, 1))) <= 0) {
+                send(fo->fd, &byte, 1, 0) :
+                write(fo->fd, &byte, 1))) <= 0) {
  #else
-                write(fo->fd, &c, 1))) <= 0) {
+                write(fo->fd, &byte, 1))) <= 0) {
  #endif
  
          if ( rc == -1 && errno == EAGAIN) {
diff --git a/ghc/rts/ForeignCall.c b/ghc/rts/ForeignCall.c

index a6a4646..5b1a0c6 100644 (file)
--- a/ghc/rts/ForeignCall.c
+++ b/ghc/rts/ForeignCall.c
@@ -1,6 +1,6 @@
  
  /* -----------------------------------------------------------------------------
- * $Id: ForeignCall.c,v 1.17 2000/05/26 10:14:34 sewardj Exp $
+ * $Id: ForeignCall.c,v 1.18 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team 1994-1999.
   *
@@ -431,7 +431,7 @@ unpackArgsAndCallHaskell_x86_nocallconv_wrk ( StgStablePtr stableptr,
     while (*argp) {
        switch (*argp) {
           case CHAR_REP:
-            node = rts_apply ( node, rts_mkChar ( *(char*)args ) );
+            node = rts_apply ( node, rts_mkChar ( *(unsigned int*)args ) );
              args += 4;
              break;
           case INT_REP:
diff --git a/ghc/rts/Prelude.c b/ghc/rts/Prelude.c

index 154b046..5f8ed6d 100644 (file)
--- a/ghc/rts/Prelude.c
+++ b/ghc/rts/Prelude.c
@@ -1,6 +1,6 @@
  
  /* -----------------------------------------------------------------------------
- * $Id: Prelude.c,v 1.8 2000/06/15 13:23:52 daan Exp $
+ * $Id: Prelude.c,v 1.9 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 1998-2000
   *
@@ -193,10 +193,10 @@ void fixupRTStoPreludeRefs ( void*(*ask_hugs_dynamic_linker)(char*) )
    {
      int i;
    
-    for(i=0;i<=255;i++)
+    for(i=0; i<=MAX_CHARLIKE-MIN_CHARLIKE; i++)
        (CHARLIKE_closure[i]).header.info = Czh_static_info;
      
-    for(i=0;i<=32;i++)
+    for(i=0; i<=MAX_INTLIKE-MIN_INTLIKE; i++)
        (INTLIKE_closure[i]).header.info = Izh_static_info;
    }
  #endif
diff --git a/ghc/rts/PrimOps.hc b/ghc/rts/PrimOps.hc

index e288b32..015e34a 100644 (file)
--- a/ghc/rts/PrimOps.hc
+++ b/ghc/rts/PrimOps.hc
@@ -1,5 +1,5 @@
  /* -----------------------------------------------------------------------------
- * $Id: PrimOps.hc,v 1.52 2000/05/10 11:02:00 simonmar Exp $
+ * $Id: PrimOps.hc,v 1.53 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 1998-2000
   *
@@ -263,13 +263,14 @@ W_ GHC_ZCCReturnable_static_info[0];
     FE_                                                 \
   }
  
-newByteArray(Char,   sizeof(C_))
-newByteArray(Int,    sizeof(I_));
-newByteArray(Word,   sizeof(W_));
-newByteArray(Addr,   sizeof(P_));
-newByteArray(Float,  sizeof(StgFloat));
-newByteArray(Double, sizeof(StgDouble));
-newByteArray(StablePtr, sizeof(StgStablePtr));
+newByteArray(Char,   1)
+/* Char arrays really contain only 8-bit bytes for compatibility. */
+newByteArray(Int,    sizeof(I_))
+newByteArray(Word,   sizeof(W_))
+newByteArray(Addr,   sizeof(P_))
+newByteArray(Float,  sizeof(StgFloat))
+newByteArray(Double, sizeof(StgDouble))
+newByteArray(StablePtr, sizeof(StgStablePtr))
  
  FN_(newArrayzh_fast)
  {
diff --git a/ghc/rts/RtsAPI.c b/ghc/rts/RtsAPI.c

index 992be89..f22ec7e 100644 (file)
--- a/ghc/rts/RtsAPI.c
+++ b/ghc/rts/RtsAPI.c
@@ -1,5 +1,5 @@
  /* ----------------------------------------------------------------------------
- * $Id: RtsAPI.c,v 1.17 2000/04/26 10:17:41 simonmar Exp $
+ * $Id: RtsAPI.c,v 1.18 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 1998-2000
   *
@@ -19,11 +19,11 @@
     Building Haskell objects from C datatypes.
     ------------------------------------------------------------------------- */
  HaskellObj
-rts_mkChar (char c)
+rts_mkChar (unsigned int c)
  {
    StgClosure *p = (StgClosure *)allocate(CONSTR_sizeW(0,1));
    p->header.info = Czh_con_info;
-  p->payload[0]  = (StgClosure *)((StgInt)c);
+  p->payload[0]  = (StgClosure *)(StgChar)c;
    return p;
  }
  
@@ -207,12 +207,12 @@ rts_apply (HaskellObj f, HaskellObj arg)
     Deconstructing Haskell objects
     ------------------------------------------------------------------------- */
  
-char
+unsigned int
  rts_getChar (HaskellObj p)
  {
    if ( p->header.info == Czh_con_info || 
         p->header.info == Czh_static_info) {
-    return (char)(StgWord)(p->payload[0]);
+    return (StgChar)(StgWord)(p->payload[0]);
    } else {
      barf("getChar: not a Char");
    }
diff --git a/ghc/rts/StgCRun.c b/ghc/rts/StgCRun.c

index 09a8016..ad567da 100644 (file)
--- a/ghc/rts/StgCRun.c
+++ b/ghc/rts/StgCRun.c
@@ -1,5 +1,5 @@
  /* -----------------------------------------------------------------------------
- * $Id: StgCRun.c,v 1.18 2000/04/17 14:46:31 sewardj Exp $
+ * $Id: StgCRun.c,v 1.19 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 1998-2000
   *
@@ -238,7 +238,7 @@ EXTFUN(StgReturn)
  StgThreadReturnCode
  StgRun(StgFunPtr f, StgRegTable *basereg) {
  
-    StgChar space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
+    unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
      StgThreadReturnCode r;
  
      __asm__ volatile (
@@ -321,7 +321,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
  StgThreadReturnCode
  StgRun(StgFunPtr f, StgRegTable *basereg) {
  
-    StgChar space[RESERVED_C_STACK_BYTES+sizeof(void *)];
+    unsigned char space[RESERVED_C_STACK_BYTES+sizeof(void *)];
      register void *i7 __asm__("%i7");
      ((void **)(space))[100] = i7;
      f();
diff --git a/ghc/rts/StrHash.c b/ghc/rts/StrHash.c

index 231eaa7..7b6e66b 100644 (file)
--- a/ghc/rts/StrHash.c
+++ b/ghc/rts/StrHash.c
@@ -1,5 +1,5 @@
  /* -----------------------------------------------------------------------------
- * $Id: StrHash.c,v 1.1 2000/04/05 15:32:08 simonmar Exp $
+ * $Id: StrHash.c,v 1.2 2000/08/07 23:37:23 qrczak Exp $
   *
   * (c) The GHC Team, 1994-2000
   *
@@ -15,39 +15,39 @@
  #include "Rts.h"
  #include "StrHash.h"
  
-static const StgChar auxTable[] = {
-(StgChar)0x01, (StgChar)0x57, (StgChar)0x31, (StgChar)0x0c, (StgChar)0xb0, (StgChar)0xb2, (StgChar)0x66, (StgChar)0xa6, 
-(StgChar)0x79, (StgChar)0xc1, (StgChar)0x06, (StgChar)0x54, (StgChar)0xf9, (StgChar)0xe6, (StgChar)0x2c, (StgChar)0xa3, 
-(StgChar)0x0e, (StgChar)0xc5, (StgChar)0xd5, (StgChar)0xb5, (StgChar)0xa1, (StgChar)0x55, (StgChar)0xda, (StgChar)0x50, 
-(StgChar)0x40, (StgChar)0xef, (StgChar)0x18, (StgChar)0xe2, (StgChar)0xec, (StgChar)0x8e, (StgChar)0x26, (StgChar)0xc8, 
-(StgChar)0x6e, (StgChar)0xb1, (StgChar)0x68, (StgChar)0x67, (StgChar)0x8d, (StgChar)0xfd, (StgChar)0xff, (StgChar)0x32, 
-(StgChar)0x4d, (StgChar)0x65, (StgChar)0x51, (StgChar)0x12, (StgChar)0x2d, (StgChar)0x60, (StgChar)0x1f, (StgChar)0xde, 
-(StgChar)0x19, (StgChar)0x6b, (StgChar)0xbe, (StgChar)0x46, (StgChar)0x56, (StgChar)0xed, (StgChar)0xf0, (StgChar)0x22, 
-(StgChar)0x48, (StgChar)0xf2, (StgChar)0x14, (StgChar)0xd6, (StgChar)0xf4, (StgChar)0xe3, (StgChar)0x95, (StgChar)0xeb, 
-(StgChar)0x61, (StgChar)0xea, (StgChar)0x39, (StgChar)0x16, (StgChar)0x3c, (StgChar)0xfa, (StgChar)0x52, (StgChar)0xaf, 
-(StgChar)0xd0, (StgChar)0x05, (StgChar)0x7f, (StgChar)0xc7, (StgChar)0x6f, (StgChar)0x3e, (StgChar)0x87, (StgChar)0xf8, 
-(StgChar)0xae, (StgChar)0xa9, (StgChar)0xd3, (StgChar)0x3a, (StgChar)0x42, (StgChar)0x9a, (StgChar)0x6a, (StgChar)0xc3, 
-(StgChar)0xf5, (StgChar)0xab, (StgChar)0x11, (StgChar)0xbb, (StgChar)0xb6, (StgChar)0xb3, (StgChar)0x00, (StgChar)0xf3, 
-(StgChar)0x84, (StgChar)0x38, (StgChar)0x94, (StgChar)0x4b, (StgChar)0x80, (StgChar)0x85, (StgChar)0x9e, (StgChar)0x64, 
-(StgChar)0x82, (StgChar)0x7e, (StgChar)0x5b, (StgChar)0x0d, (StgChar)0x99, (StgChar)0xf6, (StgChar)0xd8, (StgChar)0xdb, 
-(StgChar)0x77, (StgChar)0x44, (StgChar)0xdf, (StgChar)0x4e, (StgChar)0x53, (StgChar)0x58, (StgChar)0xc9, (StgChar)0x63, 
-(StgChar)0x7a, (StgChar)0x0b, (StgChar)0x5c, (StgChar)0x20, (StgChar)0x88, (StgChar)0x72, (StgChar)0x34, (StgChar)0x0a, 
-(StgChar)0x8a, (StgChar)0x1e, (StgChar)0x30, (StgChar)0xb7, (StgChar)0x9c, (StgChar)0x23, (StgChar)0x3d, (StgChar)0x1a, 
-(StgChar)0x8f, (StgChar)0x4a, (StgChar)0xfb, (StgChar)0x5e, (StgChar)0x81, (StgChar)0xa2, (StgChar)0x3f, (StgChar)0x98, 
-(StgChar)0xaa, (StgChar)0x07, (StgChar)0x73, (StgChar)0xa7, (StgChar)0xf1, (StgChar)0xce, (StgChar)0x03, (StgChar)0x96, 
-(StgChar)0x37, (StgChar)0x3b, (StgChar)0x97, (StgChar)0xdc, (StgChar)0x5a, (StgChar)0x35, (StgChar)0x17, (StgChar)0x83, 
-(StgChar)0x7d, (StgChar)0xad, (StgChar)0x0f, (StgChar)0xee, (StgChar)0x4f, (StgChar)0x5f, (StgChar)0x59, (StgChar)0x10, 
-(StgChar)0x69, (StgChar)0x89, (StgChar)0xe1, (StgChar)0xe0, (StgChar)0xd9, (StgChar)0xa0, (StgChar)0x25, (StgChar)0x7b, 
-(StgChar)0x76, (StgChar)0x49, (StgChar)0x02, (StgChar)0x9d, (StgChar)0x2e, (StgChar)0x74, (StgChar)0x09, (StgChar)0x91, 
-(StgChar)0x86, (StgChar)0xe4, (StgChar)0xcf, (StgChar)0xd4, (StgChar)0xca, (StgChar)0xd7, (StgChar)0x45, (StgChar)0xe5, 
-(StgChar)0x1b, (StgChar)0xbc, (StgChar)0x43, (StgChar)0x7c, (StgChar)0xa8, (StgChar)0xfc, (StgChar)0x2a, (StgChar)0x04, 
-(StgChar)0x1d, (StgChar)0x6c, (StgChar)0x15, (StgChar)0xf7, (StgChar)0x13, (StgChar)0xcd, (StgChar)0x27, (StgChar)0xcb, 
-(StgChar)0xe9, (StgChar)0x28, (StgChar)0xba, (StgChar)0x93, (StgChar)0xc6, (StgChar)0xc0, (StgChar)0x9b, (StgChar)0x21, 
-(StgChar)0xa4, (StgChar)0xbf, (StgChar)0x62, (StgChar)0xcc, (StgChar)0xa5, (StgChar)0xb4, (StgChar)0x75, (StgChar)0x4c, 
-(StgChar)0x8c, (StgChar)0x24, (StgChar)0xd2, (StgChar)0xac, (StgChar)0x29, (StgChar)0x36, (StgChar)0x9f, (StgChar)0x08, 
-(StgChar)0xb9, (StgChar)0xe8, (StgChar)0x71, (StgChar)0xc4, (StgChar)0xe7, (StgChar)0x2f, (StgChar)0x92, (StgChar)0x78, 
-(StgChar)0x33, (StgChar)0x41, (StgChar)0x1c, (StgChar)0x90, (StgChar)0xfe, (StgChar)0xdd, (StgChar)0x5d, (StgChar)0xbd, 
-(StgChar)0xc2, (StgChar)0x8b, (StgChar)0x70, (StgChar)0x2b, (StgChar)0x47, (StgChar)0x6d, (StgChar)0xb8, (StgChar)0xd1};
+static const unsigned char auxTable[] = {
+0x01, 0x57, 0x31, 0x0c, 0xb0, 0xb2, 0x66, 0xa6,
+0x79, 0xc1, 0x06, 0x54, 0xf9, 0xe6, 0x2c, 0xa3,
+0x0e, 0xc5, 0xd5, 0xb5, 0xa1, 0x55, 0xda, 0x50,
+0x40, 0xef, 0x18, 0xe2, 0xec, 0x8e, 0x26, 0xc8,
+0x6e, 0xb1, 0x68, 0x67, 0x8d, 0xfd, 0xff, 0x32,
+0x4d, 0x65, 0x51, 0x12, 0x2d, 0x60, 0x1f, 0xde,
+0x19, 0x6b, 0xbe, 0x46, 0x56, 0xed, 0xf0, 0x22,
+0x48, 0xf2, 0x14, 0xd6, 0xf4, 0xe3, 0x95, 0xeb,
+0x61, 0xea, 0x39, 0x16, 0x3c, 0xfa, 0x52, 0xaf,
+0xd0, 0x05, 0x7f, 0xc7, 0x6f, 0x3e, 0x87, 0xf8,
+0xae, 0xa9, 0xd3, 0x3a, 0x42, 0x9a, 0x6a, 0xc3,
+0xf5, 0xab, 0x11, 0xbb, 0xb6, 0xb3, 0x00, 0xf3,
+0x84, 0x38, 0x94, 0x4b, 0x80, 0x85, 0x9e, 0x64,
+0x82, 0x7e, 0x5b, 0x0d, 0x99, 0xf6, 0xd8, 0xdb,
+0x77, 0x44, 0xdf, 0x4e, 0x53, 0x58, 0xc9, 0x63,
+0x7a, 0x0b, 0x5c, 0x20, 0x88, 0x72, 0x34, 0x0a,
+0x8a, 0x1e, 0x30, 0xb7, 0x9c, 0x23, 0x3d, 0x1a,
+0x8f, 0x4a, 0xfb, 0x5e, 0x81, 0xa2, 0x3f, 0x98,
+0xaa, 0x07, 0x73, 0xa7, 0xf1, 0xce, 0x03, 0x96,
+0x37, 0x3b, 0x97, 0xdc, 0x5a, 0x35, 0x17, 0x83,
+0x7d, 0xad, 0x0f, 0xee, 0x4f, 0x5f, 0x59, 0x10,
+0x69, 0x89, 0xe1, 0xe0, 0xd9, 0xa0, 0x25, 0x7b,
+0x76, 0x49, 0x02, 0x9d, 0x2e, 0x74, 0x09, 0x91,
+0x86, 0xe4, 0xcf, 0xd4, 0xca, 0xd7, 0x45, 0xe5,
+0x1b, 0xbc, 0x43, 0x7c, 0xa8, 0xfc, 0x2a, 0x04,
+0x1d, 0x6c, 0x15, 0xf7, 0x13, 0xcd, 0x27, 0xcb,
+0xe9, 0x28, 0xba, 0x93, 0xc6, 0xc0, 0x9b, 0x21,
+0xa4, 0xbf, 0x62, 0xcc, 0xa5, 0xb4, 0x75, 0x4c,
+0x8c, 0x24, 0xd2, 0xac, 0x29, 0x36, 0x9f, 0x08,
+0xb9, 0xe8, 0x71, 0xc4, 0xe7, 0x2f, 0x92, 0x78,
+0x33, 0x41, 0x1c, 0x90, 0xfe, 0xdd, 0x5d, 0xbd,
+0xc2, 0x8b, 0x70, 0x2b, 0x47, 0x6d, 0xb8, 0xd1};
  
  hash_t
  hash_str(char *str)
diff --git a/ghc/rts/parallel/Pack.c b/ghc/rts/parallel/Pack.c

index 6f6ad59..72c66bf 100644 (file)
--- a/ghc/rts/parallel/Pack.c
+++ b/ghc/rts/parallel/Pack.c
@@ -1,6 +1,6 @@
  /* 
     Time-stamp: <Thu Mar 30 2000 22:53:32 Stardate: [-30]4584.56 hwloidl>
-   $Id: Pack.c,v 1.4 2000/03/31 03:09:37 hwloidl Exp $
+   $Id: Pack.c,v 1.5 2000/08/07 23:37:24 qrczak Exp $
  
     Graph packing and unpacking code for sending it to another processor
     and retrieving the original graph structure from the packet.
@@ -722,12 +722,21 @@ StgClosure *closure;
    switch (info->type) {
  
    case CONSTR_CHARLIKE:
-    IF_PAR_DEBUG(pack,
-                belch("*>^^ Packing a charlike closure %d", 
-                      ((StgIntCharlikeClosure*)closure)->data));
-    
-    PackPLC((StgPtr)CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)closure)->data));
-    return;
+    {
+      StgChar val = ((StgIntCharlikeClosure*)closure)->data;
+      
+      if ((val <= MAX_CHARLIKE) && (val >= MIN_CHARLIKE)) {
+       IF_PAR_DEBUG(pack,
+                    belch("*>^^ Packing a small charlike %d as a PLC", val));
+       PackPLC((StgPtr)CHARLIKE_CLOSURE(val));
+      } else {
+       IF_PAR_DEBUG(pack,
+                    belch("*>^^ Packing a big charlike %d as a normal closure", 
+                          val));
+       PackGeneric(closure);
+      }
+      return;
+    }
        
    case CONSTR_INTLIKE:
      {
@@ -737,14 +746,13 @@ StgClosure *closure;
         IF_PAR_DEBUG(pack,
                      belch("*>^^ Packing a small intlike %d as a PLC", val));
         PackPLC((StgPtr)INTLIKE_CLOSURE(val));
-       return;
        } else {
         IF_PAR_DEBUG(pack,
                      belch("*>^^ Packing a big intlike %d as a normal closure", 
                            val));
         PackGeneric(closure);
-       return;
        }
+      return;
      }
  
    case CONSTR:
@@ -1200,13 +1208,23 @@ PackPAP(StgPAP *pap) {
        /* distinguish static closure (PLC) from other closures (FM) */
        switch (get_itbl((StgClosure*)q)->type) {
        case CONSTR_CHARLIKE:
-       IF_PAR_DEBUG(pack,
-                    belch("*>** PackPAP: packing a charlike closure %d", 
-                          ((StgIntCharlikeClosure*)q)->data));
-    
-       PackPLC((StgPtr)CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)q)->data));
-       p++;
-       break;
+       {
+         StgChar val = ((StgIntCharlikeClosure*)q)->data;
+      
+         if ((val <= MAX_CHARLIKE) && (val >= MIN_CHARLIKE)) {
+           IF_PAR_DEBUG(pack,
+                        belch("*>** PackPAP: Packing ptr to a small charlike %d as a PLC", val));
+           PackPLC((StgPtr)CHARLIKE_CLOSURE(val));
+         } else {
+           IF_PAR_DEBUG(pack,
+                        belch("*>** PackPAP: Packing a ptr to a big charlike %d as a FM", 
+                              val));
+           Pack((StgWord)(ARGTAG_MAX+1));
+           PackFetchMe((StgClosure *)q);
+         }
+         p++;
+         break;
+       }
        
        case CONSTR_INTLIKE:
         {
@@ -1216,17 +1234,15 @@ PackPAP(StgPAP *pap) {
             IF_PAR_DEBUG(pack,
                          belch("*>** PackPAP: Packing ptr to a small intlike %d as a PLC", val));
             PackPLC((StgPtr)INTLIKE_CLOSURE(val));
-           p++;
-           break;
           } else {
             IF_PAR_DEBUG(pack,
                          belch("*>** PackPAP: Packing a ptr to a big intlike %d as a FM", 
                                val));
             Pack((StgWord)(ARGTAG_MAX+1));
             PackFetchMe((StgClosure *)q);
-           p++;
-           break;
           }
+         p++;
+         break;
         }
         case THUNK_STATIC:       // ToDo: check whether that's ok
         case FUN_STATIC:       // ToDo: check whether that's ok
author	qrczak <unknown>
	Mon, 7 Aug 2000 23:37:24 +0000 (23:37 +0000)
committer	qrczak <unknown>
	Mon, 7 Aug 2000 23:37:24 +0000 (23:37 +0000)
ghc/compiler/HsVersions.h		patch \| blob \| history
ghc/compiler/absCSyn/AbsCSyn.lhs		patch \| blob \| history
ghc/compiler/absCSyn/CStrings.lhs		patch \| blob \| history
ghc/compiler/absCSyn/PprAbsC.lhs		patch \| blob \| history
ghc/compiler/basicTypes/Literal.lhs		patch \| blob \| history
ghc/compiler/basicTypes/MkId.lhs		patch \| blob \| history
ghc/compiler/basicTypes/OccName.lhs		patch \| blob \| history
ghc/compiler/basicTypes/Unique.lhs		patch \| blob \| history
ghc/compiler/codeGen/CgCon.lhs		patch \| blob \| history
ghc/compiler/codeGen/CgRetConv.lhs		patch \| blob \| history
ghc/compiler/deSugar/DsExpr.lhs		patch \| blob \| history
ghc/compiler/deSugar/DsUtils.lhs		patch \| blob \| history
ghc/compiler/hsSyn/HsBasic.lhs		patch \| blob \| history
ghc/compiler/hsSyn/HsCore.lhs		patch \| blob \| history
ghc/compiler/javaGen/Java.lhs		patch \| blob \| history
ghc/compiler/javaGen/JavaGen.lhs		patch \| blob \| history
ghc/compiler/javaGen/PrintJava.lhs		patch \| blob \| history
ghc/compiler/main/Constants.lhs		patch \| blob \| history
ghc/compiler/nativeGen/AbsCStixGen.lhs		patch \| blob \| history
ghc/compiler/nativeGen/MachCode.lhs		patch \| blob \| history
ghc/compiler/nativeGen/MachMisc.lhs		patch \| blob \| history
ghc/compiler/nativeGen/PprMach.lhs		patch \| blob \| history
ghc/compiler/nativeGen/StixPrim.lhs		patch \| blob \| history
ghc/compiler/parser/Lex.lhs		patch \| blob \| history
ghc/compiler/prelude/PrelInfo.lhs		patch \| blob \| history
ghc/compiler/prelude/PrelNames.lhs		patch \| blob \| history
ghc/compiler/prelude/PrimRep.lhs		patch \| blob \| history
ghc/compiler/prelude/TysPrim.lhs		patch \| blob \| history
ghc/compiler/rename/Rename.lhs		patch \| blob \| history
ghc/compiler/typecheck/TcTyDecls.lhs		patch \| blob \| history
ghc/compiler/utils/FastString.lhs		patch \| blob \| history
ghc/compiler/utils/Outputable.lhs		patch \| blob \| history
ghc/compiler/utils/UnicodeUtil.lhs	[new file with mode: 0644]	patch \| blob
ghc/docs/users_guide/glasgow_exts.sgml		patch \| blob \| history
ghc/docs/users_guide/vs_haskell.sgml		patch \| blob \| history
ghc/includes/ClosureMacros.h		patch \| blob \| history
ghc/includes/Constants.h		patch \| blob \| history
ghc/includes/HsFFI.h		patch \| blob \| history
ghc/includes/PrimOps.h		patch \| blob \| history
ghc/includes/RtsAPI.h		patch \| blob \| history
ghc/includes/StgTicky.h		patch \| blob \| history
ghc/includes/StgTypes.h		patch \| blob \| history
ghc/lib/std/PrelBase.lhs		patch \| blob \| history
ghc/lib/std/PrelEnum.lhs		patch \| blob \| history
ghc/lib/std/PrelRead.lhs		patch \| blob \| history
ghc/lib/std/cbits/filePutc.c		patch \| blob \| history
ghc/rts/ForeignCall.c		patch \| blob \| history
ghc/rts/Prelude.c		patch \| blob \| history
ghc/rts/PrimOps.hc		patch \| blob \| history
ghc/rts/RtsAPI.c		patch \| blob \| history
ghc/rts/StgCRun.c		patch \| blob \| history
ghc/rts/StrHash.c		patch \| blob \| history
ghc/rts/parallel/Pack.c		patch \| blob \| history