ghc/compiler/basicTypes/Literal.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1998
   3 %
   4 \section[Literal]{@Literal@: Machine literals (unboxed, of course)}
   5
   6 \begin{code}
   7 module Literal
   8         ( Literal(..)           -- Exported to ParseIface
   9         , mkMachInt, mkMachWord
  10         , mkMachInt64, mkMachWord64
  11         , isLitLitLit, maybeLitLit, litSize
  12         , litIsDupable, litIsTrivial
  13         , literalType, literalPrimRep
  14         , hashLiteral
  15
  16         , inIntRange, inWordRange, tARGET_MAX_INT, inCharRange
  17         , isZeroLit,
  18
  19         , word2IntLit, int2WordLit
  20         , narrow8IntLit, narrow16IntLit, narrow32IntLit
  21         , narrow8WordLit, narrow16WordLit, narrow32WordLit
  22         , char2IntLit, int2CharLit
  23         , float2IntLit, int2FloatLit, double2IntLit, int2DoubleLit
  24         , nullAddrLit, float2DoubleLit, double2FloatLit
  25         ) where
  26
  27 #include "HsVersions.h"
  28
  29 import TysPrim          ( charPrimTy, addrPrimTy, floatPrimTy, doublePrimTy,
  30                           intPrimTy, wordPrimTy, int64PrimTy, word64PrimTy
  31                         )
  32 import PrimRep          ( PrimRep(..) )
  33 import TcType           ( Type, tcCmpType )
  34 import Type             ( typePrimRep )
  35 import PprType          ( pprParendType )
  36 import CStrings         ( pprFSInCStyle )
  37
  38 import Outputable
  39 import FastTypes
  40 import FastString
  41 import Binary
  42 import Util             ( thenCmp )
  43
  44 import Ratio            ( numerator )
  45 import FastString       ( uniqueOfFS, lengthFS )
  46 import DATA_INT         ( Int8,  Int16,  Int32 )
  47 import DATA_WORD        ( Word8, Word16, Word32 )
  48 import Char             ( ord, chr )
  49 \end{code}
  50
  51
  52
  53 %************************************************************************
  54 %*                                                                      *
  55 \subsection{Sizes}
  56 %*                                                                      *
  57 %************************************************************************
  58
  59 If we're compiling with GHC (and we're not cross-compiling), then we
  60 know that minBound and maxBound :: Int are the right values for the
  61 target architecture.  Otherwise, we assume -2^31 and 2^31-1
  62 respectively (which will be wrong on a 64-bit machine).
  63
  64 \begin{code}
  65 tARGET_MIN_INT, tARGET_MAX_INT, tARGET_MAX_WORD :: Integer
  66 #if __GLASGOW_HASKELL__
  67 tARGET_MIN_INT  = toInteger (minBound :: Int)
  68 tARGET_MAX_INT  = toInteger (maxBound :: Int)
  69 #else
  70 tARGET_MIN_INT = -2147483648
  71 tARGET_MAX_INT =  2147483647
  72 #endif
  73 tARGET_MAX_WORD = (tARGET_MAX_INT * 2) + 1
  74
  75 tARGET_MAX_CHAR :: Int
  76 tARGET_MAX_CHAR = 0x10ffff
  77 \end{code}
  78
  79
  80 %************************************************************************
  81 %*                                                                      *
  82 \subsection{Literals}
  83 %*                                                                      *
  84 %************************************************************************
  85
  86 So-called @Literals@ are {\em either}:
  87 \begin{itemize}
  88 \item
  89 An unboxed (``machine'') literal (type: @IntPrim@, @FloatPrim@, etc.),
  90 which is presumed to be surrounded by appropriate constructors
  91 (@mKINT@, etc.), so that the overall thing makes sense.
  92 \item
  93 An Integer, Rational, or String literal whose representation we are
  94 {\em uncommitted} about; i.e., the surrounding with constructors,
  95 function applications, etc., etc., has not yet been done.
  96 \end{itemize}
  97
  98 \begin{code}
  99 data Literal
 100   =     ------------------
 101         -- First the primitive guys
 102     MachChar    Int             -- Char#        At least 31 bits
 103   | MachStr     FastString
 104
 105   | MachAddr    Integer -- Whatever this machine thinks is a "pointer"
 106
 107   | MachInt     Integer         -- Int#         At least WORD_SIZE_IN_BITS bits
 108   | MachInt64   Integer         -- Int64#       At least 64 bits
 109   | MachWord    Integer         -- Word#        At least WORD_SIZE_IN_BITS bits
 110   | MachWord64  Integer         -- Word64#      At least 64 bits
 111
 112   | MachFloat   Rational
 113   | MachDouble  Rational
 114
 115         -- MachLabel is used (only) for the literal derived from a
 116         -- "foreign label" declaration.
 117         -- string argument is the name of a symbol.  This literal
 118         -- refers to the *address* of the label.
 119   | MachLabel   FastString              -- always an Addr#
 120
 121         -- lit-lits only work for via-C compilation, hence they
 122         -- are deprecated.  The string is emitted verbatim into
 123         -- the C file, and can therefore be any C expression,
 124         -- macro call, #defined constant etc.
 125   | MachLitLit  FastString Type -- Type might be Addr# or Int# etc
 126 \end{code}
 127
 128 Binary instance: must do this manually, because we don't want the type
 129 arg of MachLitLit involved.
 130
 131 \begin{code}
 132 instance Binary Literal where
 133     put_ bh (MachChar aa)     = do putByte bh 0; put_ bh aa
 134     put_ bh (MachStr ab)      = do putByte bh 1; put_ bh ab
 135     put_ bh (MachAddr ac)     = do putByte bh 2; put_ bh ac
 136     put_ bh (MachInt ad)      = do putByte bh 3; put_ bh ad
 137     put_ bh (MachInt64 ae)    = do putByte bh 4; put_ bh ae
 138     put_ bh (MachWord af)     = do putByte bh 5; put_ bh af
 139     put_ bh (MachWord64 ag)   = do putByte bh 6; put_ bh ag
 140     put_ bh (MachFloat ah)    = do putByte bh 7; put_ bh ah
 141     put_ bh (MachDouble ai)   = do putByte bh 8; put_ bh ai
 142     put_ bh (MachLabel aj)    = do putByte bh 9; put_ bh aj
 143     put_ bh (MachLitLit ak _) = do putByte bh 10; put_ bh ak
 144     get bh = do
 145             h <- getByte bh
 146             case h of
 147               0 -> do
 148                     aa <- get bh
 149                     return (MachChar aa)
 150               1 -> do
 151                     ab <- get bh
 152                     return (MachStr ab)
 153               2 -> do
 154                     ac <- get bh
 155                     return (MachAddr ac)
 156               3 -> do
 157                     ad <- get bh
 158                     return (MachInt ad)
 159               4 -> do
 160                     ae <- get bh
 161                     return (MachInt64 ae)
 162               5 -> do
 163                     af <- get bh
 164                     return (MachWord af)
 165               6 -> do
 166                     ag <- get bh
 167                     return (MachWord64 ag)
 168               7 -> do
 169                     ah <- get bh
 170                     return (MachFloat ah)
 171               8 -> do
 172                     ai <- get bh
 173                     return (MachDouble ai)
 174               9 -> do
 175                     aj <- get bh
 176                     return (MachLabel aj)
 177               10 -> do
 178                     ak <- get bh
 179                     return (MachLitLit ak (error "MachLitLit: no type"))
 180 \end{code}
 181
 182 \begin{code}
 183 instance Outputable Literal where
 184     ppr lit = pprLit lit
 185
 186 instance Show Literal where
 187     showsPrec p lit = showsPrecSDoc p (ppr lit)
 188
 189 instance Eq Literal where
 190     a == b = case (a `compare` b) of { EQ -> True;   _ -> False }
 191     a /= b = case (a `compare` b) of { EQ -> False;  _ -> True  }
 192
 193 instance Ord Literal where
 194     a <= b = case (a `compare` b) of { LT -> True;  EQ -> True;  GT -> False }
 195     a <  b = case (a `compare` b) of { LT -> True;  EQ -> False; GT -> False }
 196     a >= b = case (a `compare` b) of { LT -> False; EQ -> True;  GT -> True  }
 197     a >  b = case (a `compare` b) of { LT -> False; EQ -> False; GT -> True  }
 198     compare a b = cmpLit a b
 199 \end{code}
 200
 201
 202         Construction
 203         ~~~~~~~~~~~~
 204 \begin{code}
 205 mkMachInt, mkMachWord, mkMachInt64, mkMachWord64 :: Integer -> Literal
 206
 207 mkMachInt  x   = -- ASSERT2( inIntRange x,  integer x )
 208                  -- Not true: you can write out of range Int# literals
 209                  -- For example, one can write (intToWord# 0xffff0000) to
 210                  -- get a particular Word bit-pattern, and there's no other
 211                  -- convenient way to write such literals, which is why we allow it.
 212                  MachInt x
 213 mkMachWord x   = -- ASSERT2( inWordRange x, integer x )
 214                  MachWord x
 215 mkMachInt64  x = MachInt64 x
 216 mkMachWord64 x = MachWord64 x
 217
 218 inIntRange, inWordRange :: Integer -> Bool
 219 inIntRange  x = x >= tARGET_MIN_INT && x <= tARGET_MAX_INT
 220 inWordRange x = x >= 0              && x <= tARGET_MAX_WORD
 221
 222 inCharRange :: Int -> Bool
 223 inCharRange c =  c >= 0 && c <= tARGET_MAX_CHAR
 224
 225 isZeroLit :: Literal -> Bool
 226 isZeroLit (MachInt    0) = True
 227 isZeroLit (MachInt64  0) = True
 228 isZeroLit (MachWord   0) = True
 229 isZeroLit (MachWord64 0) = True
 230 isZeroLit (MachFloat  0) = True
 231 isZeroLit (MachDouble 0) = True
 232 isZeroLit other          = False
 233 \end{code}
 234
 235         Coercions
 236         ~~~~~~~~~
 237 \begin{code}
 238 word2IntLit, int2WordLit,
 239   narrow8IntLit, narrow16IntLit, narrow32IntLit,
 240   narrow8WordLit, narrow16WordLit, narrow32WordLit,
 241   char2IntLit, int2CharLit,
 242   float2IntLit, int2FloatLit, double2IntLit, int2DoubleLit,
 243   float2DoubleLit, double2FloatLit
 244   :: Literal -> Literal
 245
 246 word2IntLit (MachWord w)
 247   | w > tARGET_MAX_INT = MachInt (w - tARGET_MAX_WORD - 1)
 248   | otherwise          = MachInt w
 249
 250 int2WordLit (MachInt i)
 251   | i < 0     = MachWord (1 + tARGET_MAX_WORD + i)      -- (-1)  --->  tARGET_MAX_WORD
 252   | otherwise = MachWord i
 253
 254 narrow8IntLit    (MachInt  i) = MachInt  (toInteger (fromInteger i :: Int8))
 255 narrow16IntLit   (MachInt  i) = MachInt  (toInteger (fromInteger i :: Int16))
 256 narrow32IntLit   (MachInt  i) = MachInt  (toInteger (fromInteger i :: Int32))
 257 narrow8WordLit   (MachWord w) = MachWord (toInteger (fromInteger w :: Word8))
 258 narrow16WordLit  (MachWord w) = MachWord (toInteger (fromInteger w :: Word16))
 259 narrow32WordLit  (MachWord w) = MachWord (toInteger (fromInteger w :: Word32))
 260
 261 char2IntLit (MachChar c) = MachInt  (toInteger c)
 262 int2CharLit (MachInt  i) = MachChar (fromInteger i)
 263
 264 float2IntLit (MachFloat f) = MachInt   (truncate    f)
 265 int2FloatLit (MachInt   i) = MachFloat (fromInteger i)
 266
 267 double2IntLit (MachDouble f) = MachInt    (truncate    f)
 268 int2DoubleLit (MachInt   i) = MachDouble (fromInteger i)
 269
 270 float2DoubleLit (MachFloat  f) = MachDouble f
 271 double2FloatLit (MachDouble d) = MachFloat  d
 272
 273 nullAddrLit :: Literal
 274 nullAddrLit = MachAddr 0
 275 \end{code}
 276
 277         Predicates
 278         ~~~~~~~~~~
 279 \begin{code}
 280 isLitLitLit (MachLitLit _ _) = True
 281 isLitLitLit _                = False
 282
 283 maybeLitLit (MachLitLit s t) = Just (s,t)
 284 maybeLitLit _                = Nothing
 285
 286 litIsTrivial :: Literal -> Bool
 287 -- True if there is absolutely no penalty to duplicating the literal
 288 --      c.f. CoreUtils.exprIsTrivial
 289 -- False principally of strings
 290 litIsTrivial (MachStr _) = False
 291 litIsTrivial other       = True
 292
 293 litIsDupable :: Literal -> Bool
 294 -- True if code space does not go bad if we duplicate this literal
 295 --      c.f. CoreUtils.exprIsDupable
 296 -- Currently we treat it just like litIsTrivial
 297 litIsDupable (MachStr _) = False
 298 litIsDupable other       = True
 299
 300 litSize :: Literal -> Int
 301 -- Used by CoreUnfold.sizeExpr
 302 litSize (MachStr str) = 1 + (lengthFS str `div` 4)
 303         -- Every literal has size at least 1, otherwise
 304         --      f "x"
 305         -- might be too small
 306 litSize _other        = 1
 307 \end{code}
 308
 309         Types
 310         ~~~~~
 311 \begin{code}
 312 literalType :: Literal -> Type
 313 literalType (MachChar _)          = charPrimTy
 314 literalType (MachStr  _)          = addrPrimTy
 315 literalType (MachAddr _)          = addrPrimTy
 316 literalType (MachInt  _)          = intPrimTy
 317 literalType (MachWord  _)         = wordPrimTy
 318 literalType (MachInt64  _)        = int64PrimTy
 319 literalType (MachWord64  _)       = word64PrimTy
 320 literalType (MachFloat _)         = floatPrimTy
 321 literalType (MachDouble _)        = doublePrimTy
 322 literalType (MachLabel _)         = addrPrimTy
 323 literalType (MachLitLit _ ty)     = ty
 324 \end{code}
 325
 326 \begin{code}
 327 literalPrimRep :: Literal -> PrimRep
 328
 329 literalPrimRep (MachChar _)       = CharRep
 330 literalPrimRep (MachStr _)        = AddrRep  -- specifically: "char *"
 331 literalPrimRep (MachAddr  _)      = AddrRep
 332 literalPrimRep (MachInt _)        = IntRep
 333 literalPrimRep (MachWord _)       = WordRep
 334 literalPrimRep (MachInt64 _)      = Int64Rep
 335 literalPrimRep (MachWord64 _)     = Word64Rep
 336 literalPrimRep (MachFloat _)      = FloatRep
 337 literalPrimRep (MachDouble _)     = DoubleRep
 338 literalPrimRep (MachLabel _)      = AddrRep
 339 literalPrimRep (MachLitLit _ ty)  = typePrimRep ty
 340 \end{code}
 341
 342
 343         Comparison
 344         ~~~~~~~~~~
 345 \begin{code}
 346 cmpLit (MachChar      a)   (MachChar       b)   = a `compare` b
 347 cmpLit (MachStr       a)   (MachStr        b)   = a `compare` b
 348 cmpLit (MachAddr      a)   (MachAddr       b)   = a `compare` b
 349 cmpLit (MachInt       a)   (MachInt        b)   = a `compare` b
 350 cmpLit (MachWord      a)   (MachWord       b)   = a `compare` b
 351 cmpLit (MachInt64     a)   (MachInt64      b)   = a `compare` b
 352 cmpLit (MachWord64    a)   (MachWord64     b)   = a `compare` b
 353 cmpLit (MachFloat     a)   (MachFloat      b)   = a `compare` b
 354 cmpLit (MachDouble    a)   (MachDouble     b)   = a `compare` b
 355 cmpLit (MachLabel     a)   (MachLabel      b)   = a `compare` b
 356 cmpLit (MachLitLit    a b) (MachLitLit    c d)  = (a `compare` c) `thenCmp` (b `tcCmpType` d)
 357 cmpLit lit1                lit2                 | litTag lit1 <# litTag lit2 = LT
 358                                                 | otherwise                    = GT
 359
 360 litTag (MachChar      _)   = _ILIT(1)
 361 litTag (MachStr       _)   = _ILIT(2)
 362 litTag (MachAddr      _)   = _ILIT(3)
 363 litTag (MachInt       _)   = _ILIT(4)
 364 litTag (MachWord      _)   = _ILIT(5)
 365 litTag (MachInt64     _)   = _ILIT(6)
 366 litTag (MachWord64    _)   = _ILIT(7)
 367 litTag (MachFloat     _)   = _ILIT(8)
 368 litTag (MachDouble    _)   = _ILIT(9)
 369 litTag (MachLabel     _)   = _ILIT(10)
 370 litTag (MachLitLit    _ _) = _ILIT(11)
 371 \end{code}
 372
 373         Printing
 374         ~~~~~~~~
 375 * MachX (i.e. unboxed) things are printed unadornded (e.g. 3, 'a', "foo")
 376   exceptions: MachFloat and MachAddr get an initial keyword prefix
 377
 378 \begin{code}
 379 pprLit lit
 380   = getPprStyle $ \ sty ->
 381     let
 382       code_style  = codeStyle  sty
 383     in
 384     case lit of
 385       MachChar ch | code_style -> hcat [ptext SLIT("(C_)"), text (show ch)]
 386                   | otherwise  -> pprHsChar ch
 387
 388       MachStr s | code_style -> pprFSInCStyle s
 389                 | otherwise  -> pprHsString s
 390       -- Warning: printing MachStr in code_style assumes it contains
 391       -- only characters '\0'..'\xFF'!
 392
 393       MachInt i | code_style && i == tARGET_MIN_INT -> parens (integer (i+1) <> text "-1")
 394                                 -- Avoid a problem whereby gcc interprets
 395                                 -- the constant minInt as unsigned.
 396                 | otherwise -> pprIntVal i
 397
 398       MachInt64 i | code_style -> pprIntVal i           -- Same problem with gcc???
 399                   | otherwise -> ptext SLIT("__int64") <+> integer i
 400
 401       MachWord w | code_style -> pprHexVal w
 402                  | otherwise  -> ptext SLIT("__word") <+> integer w
 403
 404       MachWord64 w | code_style -> pprHexVal w
 405                    | otherwise  -> ptext SLIT("__word64") <+> integer w
 406
 407       MachFloat f | code_style -> ptext SLIT("(StgFloat)") <> code_rational f
 408                   | otherwise  -> ptext SLIT("__float") <+> rational f
 409
 410       MachDouble d | code_style -> code_rational d
 411                    | otherwise  -> rational d
 412
 413       MachAddr p | code_style -> ptext SLIT("(void*)") <> integer p
 414                  | otherwise  -> ptext SLIT("__addr") <+> integer p
 415
 416       MachLabel l | code_style -> ptext SLIT("(&") <> ftext l <> char ')'
 417                   | otherwise  -> ptext SLIT("__label") <+> pprHsString l
 418
 419       MachLitLit s ty | code_style  -> ftext s
 420                       | otherwise   -> parens (hsep [ptext SLIT("__litlit"),
 421                                                      pprHsString s,
 422                                                      pprParendType ty])
 423
 424 -- negative floating literals in code style need parentheses to avoid
 425 -- interacting with surrounding syntax.
 426 code_rational d | d < 0     = parens (rational d)
 427                 | otherwise = rational d
 428
 429 pprIntVal :: Integer -> SDoc
 430 -- Print negative integers with parens to be sure it's unambiguous
 431 pprIntVal i | i < 0     = parens (integer i)
 432             | otherwise = integer i
 433
 434 pprHexVal :: Integer -> SDoc
 435 -- Print in C hex format: 0x13fa
 436 pprHexVal 0 = ptext SLIT("0x0")
 437 pprHexVal w = ptext SLIT("0x") <> go w
 438             where
 439               go 0 = empty
 440               go w = go quot <> dig
 441                    where
 442                      (quot,rem) = w `quotRem` 16
 443                      dig | rem < 10  = char (chr (fromInteger rem + ord '0'))
 444                          | otherwise = char (chr (fromInteger rem - 10 + ord 'a'))
 445 \end{code}
 446
 447
 448 %************************************************************************
 449 %*                                                                      *
 450 \subsection{Hashing}
 451 %*                                                                      *
 452 %************************************************************************
 453
 454 Hash values should be zero or a positive integer.  No negatives please.
 455 (They mess up the UniqFM for some reason.)
 456
 457 \begin{code}
 458 hashLiteral :: Literal -> Int
 459 hashLiteral (MachChar c)        = c + 1000      -- Keep it out of range of common ints
 460 hashLiteral (MachStr s)         = hashFS s
 461 hashLiteral (MachAddr i)        = hashInteger i
 462 hashLiteral (MachInt i)         = hashInteger i
 463 hashLiteral (MachInt64 i)       = hashInteger i
 464 hashLiteral (MachWord i)        = hashInteger i
 465 hashLiteral (MachWord64 i)      = hashInteger i
 466 hashLiteral (MachFloat r)       = hashRational r
 467 hashLiteral (MachDouble r)      = hashRational r
 468 hashLiteral (MachLabel s)       = hashFS s
 469 hashLiteral (MachLitLit s _)    = hashFS s
 470
 471 hashRational :: Rational -> Int
 472 hashRational r = hashInteger (numerator r)
 473
 474 hashInteger :: Integer -> Int
 475 hashInteger i = 1 + abs (fromInteger (i `rem` 10000))
 476                 -- The 1+ is to avoid zero, which is a Bad Number
 477                 -- since we use * to combine hash values
 478
 479 hashFS :: FastString -> Int
 480 hashFS s = iBox (uniqueOfFS s)
 481 \end{code}