compiler/cmm/CmmType.hs

   1
   2 module CmmType
   3     ( CmmType   -- Abstract
   4     , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
   5     , cInt, cLong
   6     , cmmBits, cmmFloat
   7     , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
   8     , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
   9
  10     , Width(..)
  11     , widthInBits, widthInBytes, widthInLog, widthFromBytes
  12     , wordWidth, halfWordWidth, cIntWidth, cLongWidth
  13     , narrowU, narrowS
  14    )
  15 where
  16
  17 #include "HsVersions.h"
  18
  19 import Constants
  20 import FastString
  21 import Outputable
  22
  23 import Data.Word
  24 import Data.Int
  25
  26 -----------------------------------------------------------------------------
  27 --              CmmType
  28 -----------------------------------------------------------------------------
  29
  30   -- NOTE: CmmType is an abstract type, not exported from this
  31   --       module so you can easily change its representation
  32   --
  33   -- However Width is exported in a concrete way,
  34   -- and is used extensively in pattern-matching
  35
  36 data CmmType    -- The important one!
  37   = CmmType CmmCat Width
  38
  39 data CmmCat     -- "Category" (not exported)
  40    = GcPtrCat   -- GC pointer
  41    | BitsCat    -- Non-pointer
  42    | FloatCat   -- Float
  43    deriving( Eq )
  44         -- See Note [Signed vs unsigned] at the end
  45
  46 instance Outputable CmmType where
  47   ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
  48
  49 instance Outputable CmmCat where
  50   ppr FloatCat  = ptext $ sLit("F")
  51   ppr _         = ptext $ sLit("I")
  52 -- Temp Jan 08
  53 --  ppr FloatCat        = ptext $ sLit("float")
  54 --  ppr BitsCat   = ptext $ sLit("bits")
  55 --  ppr GcPtrCat  = ptext $ sLit("gcptr")
  56
  57 -- Why is CmmType stratified?  For native code generation,
  58 -- most of the time you just want to know what sort of register
  59 -- to put the thing in, and for this you need to know how
  60 -- many bits thing has and whether it goes in a floating-point
  61 -- register.  By contrast, the distinction between GcPtr and
  62 -- GcNonPtr is of interest to only a few parts of the code generator.
  63
  64 -------- Equality on CmmType --------------
  65 -- CmmType is *not* an instance of Eq; sometimes we care about the
  66 -- Gc/NonGc distinction, and sometimes we don't
  67 -- So we use an explicit function to force you to think about it
  68 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
  69 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
  70
  71 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
  72   -- This equality is temporary; used in CmmLint
  73   -- but the RTS files are not yet well-typed wrt pointers
  74 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
  75    = c1 `weak_eq` c2 && w1==w2
  76    where
  77       FloatCat `weak_eq` FloatCat = True
  78       FloatCat `weak_eq` _other   = False
  79       _other   `weak_eq` FloatCat = False
  80       _word1   `weak_eq` _word2   = True        -- Ignores GcPtr
  81
  82 --- Simple operations on CmmType -----
  83 typeWidth :: CmmType -> Width
  84 typeWidth (CmmType _ w) = w
  85
  86 cmmBits, cmmFloat :: Width -> CmmType
  87 cmmBits  = CmmType BitsCat
  88 cmmFloat = CmmType FloatCat
  89
  90 -------- Common CmmTypes ------------
  91 -- Floats and words of specific widths
  92 b8, b16, b32, b64, f32, f64 :: CmmType
  93 b8     = cmmBits W8
  94 b16    = cmmBits W16
  95 b32    = cmmBits W32
  96 b64    = cmmBits W64
  97 f32    = cmmFloat W32
  98 f64    = cmmFloat W64
  99
 100 -- CmmTypes of native word widths
 101 bWord, bHalfWord, gcWord :: CmmType
 102 bWord     = cmmBits wordWidth
 103 bHalfWord = cmmBits halfWordWidth
 104 gcWord    = CmmType GcPtrCat wordWidth
 105
 106 cInt, cLong :: CmmType
 107 cInt  = cmmBits cIntWidth
 108 cLong = cmmBits cLongWidth
 109
 110
 111 ------------ Predicates ----------------
 112 isFloatType, isGcPtrType :: CmmType -> Bool
 113 isFloatType (CmmType FloatCat    _) = True
 114 isFloatType _other                  = False
 115
 116 isGcPtrType (CmmType GcPtrCat _) = True
 117 isGcPtrType _other               = False
 118
 119 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
 120 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
 121 -- isFloat32 and 64 are obvious
 122
 123 isWord64 (CmmType BitsCat  W64) = True
 124 isWord64 (CmmType GcPtrCat W64) = True
 125 isWord64 _other                 = False
 126
 127 isWord32 (CmmType BitsCat  W32) = True
 128 isWord32 (CmmType GcPtrCat W32) = True
 129 isWord32 _other                 = False
 130
 131 isFloat32 (CmmType FloatCat W32) = True
 132 isFloat32 _other                 = False
 133
 134 isFloat64 (CmmType FloatCat W64) = True
 135 isFloat64 _other                 = False
 136
 137 -----------------------------------------------------------------------------
 138 --              Width
 139 -----------------------------------------------------------------------------
 140
 141 data Width   = W8 | W16 | W32 | W64
 142              | W80      -- Extended double-precision float,
 143                         -- used in x86 native codegen only.
 144                         -- (we use Ord, so it'd better be in this order)
 145              | W128
 146              deriving (Eq, Ord, Show)
 147
 148 instance Outputable Width where
 149    ppr rep = ptext (mrStr rep)
 150
 151 mrStr :: Width -> LitString
 152 mrStr W8   = sLit("W8")
 153 mrStr W16  = sLit("W16")
 154 mrStr W32  = sLit("W32")
 155 mrStr W64  = sLit("W64")
 156 mrStr W128 = sLit("W128")
 157 mrStr W80  = sLit("W80")
 158
 159
 160 -------- Common Widths  ------------
 161 wordWidth, halfWordWidth :: Width
 162 wordWidth | wORD_SIZE == 4 = W32
 163           | wORD_SIZE == 8 = W64
 164           | otherwise      = panic "MachOp.wordRep: Unknown word size"
 165
 166 halfWordWidth | wORD_SIZE == 4 = W16
 167               | wORD_SIZE == 8 = W32
 168               | otherwise      = panic "MachOp.halfWordRep: Unknown word size"
 169
 170 -- cIntRep is the Width for a C-language 'int'
 171 cIntWidth, cLongWidth :: Width
 172 #if SIZEOF_INT == 4
 173 cIntWidth = W32
 174 #elif  SIZEOF_INT == 8
 175 cIntWidth = W64
 176 #endif
 177
 178 #if SIZEOF_LONG == 4
 179 cLongWidth = W32
 180 #elif  SIZEOF_LONG == 8
 181 cLongWidth = W64
 182 #endif
 183
 184 widthInBits :: Width -> Int
 185 widthInBits W8   = 8
 186 widthInBits W16  = 16
 187 widthInBits W32  = 32
 188 widthInBits W64  = 64
 189 widthInBits W128 = 128
 190 widthInBits W80  = 80
 191
 192 widthInBytes :: Width -> Int
 193 widthInBytes W8   = 1
 194 widthInBytes W16  = 2
 195 widthInBytes W32  = 4
 196 widthInBytes W64  = 8
 197 widthInBytes W128 = 16
 198 widthInBytes W80  = 10
 199
 200 widthFromBytes :: Int -> Width
 201 widthFromBytes 1  = W8
 202 widthFromBytes 2  = W16
 203 widthFromBytes 4  = W32
 204 widthFromBytes 8  = W64
 205 widthFromBytes 16 = W128
 206 widthFromBytes 10 = W80
 207 widthFromBytes n  = pprPanic "no width for given number of bytes" (ppr n)
 208
 209 -- log_2 of the width in bytes, useful for generating shifts.
 210 widthInLog :: Width -> Int
 211 widthInLog W8   = 0
 212 widthInLog W16  = 1
 213 widthInLog W32  = 2
 214 widthInLog W64  = 3
 215 widthInLog W128 = 4
 216 widthInLog W80  = panic "widthInLog: F80"
 217
 218 -- widening / narrowing
 219
 220 narrowU :: Width -> Integer -> Integer
 221 narrowU W8  x = fromIntegral (fromIntegral x :: Word8)
 222 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
 223 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
 224 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
 225 narrowU _ _ = panic "narrowTo"
 226
 227 narrowS :: Width -> Integer -> Integer
 228 narrowS W8  x = fromIntegral (fromIntegral x :: Int8)
 229 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
 230 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
 231 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
 232 narrowS _ _ = panic "narrowTo"
 233
 234 -------------------------------------------------------------------------
 235 {-      Note [Signed vs unsigned]
 236         ~~~~~~~~~~~~~~~~~~~~~~~~~
 237 Should a CmmType include a signed vs. unsigned distinction?
 238
 239 This is very much like a "hint" in C-- terminology: it isn't necessary
 240 in order to generate correct code, but it might be useful in that the
 241 compiler can generate better code if it has access to higher-level
 242 hints about data.  This is important at call boundaries, because the
 243 definition of a function is not visible at all of its call sites, so
 244 the compiler cannot infer the hints.
 245
 246 Here in Cmm, we're taking a slightly different approach.  We include
 247 the int vs. float hint in the MachRep, because (a) the majority of
 248 platforms have a strong distinction between float and int registers,
 249 and (b) we don't want to do any heavyweight hint-inference in the
 250 native code backend in order to get good code.  We're treating the
 251 hint more like a type: our Cmm is always completely consistent with
 252 respect to hints.  All coercions between float and int are explicit.
 253
 254 What about the signed vs. unsigned hint?  This information might be
 255 useful if we want to keep sub-word-sized values in word-size
 256 registers, which we must do if we only have word-sized registers.
 257
 258 On such a system, there are two straightforward conventions for
 259 representing sub-word-sized values:
 260
 261 (a) Leave the upper bits undefined.  Comparison operations must
 262     sign- or zero-extend both operands before comparing them,
 263     depending on whether the comparison is signed or unsigned.
 264
 265 (b) Always keep the values sign- or zero-extended as appropriate.
 266     Arithmetic operations must narrow the result to the appropriate
 267     size.
 268
 269 A clever compiler might not use either (a) or (b) exclusively, instead
 270 it would attempt to minimize the coercions by analysis: the same kind
 271 of analysis that propagates hints around.  In Cmm we don't want to
 272 have to do this, so we plump for having richer types and keeping the
 273 type information consistent.
 274
 275 If signed/unsigned hints are missing from MachRep, then the only
 276 choice we have is (a), because we don't know whether the result of an
 277 operation should be sign- or zero-extended.
 278
 279 Many architectures have extending load operations, which work well
 280 with (b).  To make use of them with (a), you need to know whether the
 281 value is going to be sign- or zero-extended by an enclosing comparison
 282 (for example), which involves knowing above the context.  This is
 283 doable but more complex.
 284
 285 Further complicating the issue is foreign calls: a foreign calling
 286 convention can specify that signed 8-bit quantities are passed as
 287 sign-extended 32 bit quantities, for example (this is the case on the
 288 PowerPC).  So we *do* need sign information on foreign call arguments.
 289
 290 Pros for adding signed vs. unsigned to MachRep:
 291
 292   - It would let us use convention (b) above, and get easier
 293     code generation for extending loads.
 294
 295   - Less information required on foreign calls.
 296
 297   - MachOp type would be simpler
 298
 299 Cons:
 300
 301   - More complexity
 302
 303   - What is the MachRep for a VanillaReg?  Currently it is
 304     always wordRep, but now we have to decide whether it is
 305     signed or unsigned.  The same VanillaReg can thus have
 306     different MachReps in different parts of the program.
 307
 308   - Extra coercions cluttering up expressions.
 309
 310 Currently for GHC, the foreign call point is moot, because we do our
 311 own promotion of sub-word-sized values to word-sized values.  The Int8
 312 type is represnted by an Int# which is kept sign-extended at all times
 313 (this is slightly naughty, because we're making assumptions about the
 314 C calling convention rather early on in the compiler).  However, given
 315 this, the cons outweigh the pros.
 316
 317 -}
 318