4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
17 #include "HsVersions.h"
26 -----------------------------------------------------------------------------
28 -----------------------------------------------------------------------------
30 -- NOTE: CmmType is an abstract type, not exported from this
31 -- module so you can easily change its representation
33 -- However Width is exported in a concrete way,
34 -- and is used extensively in pattern-matching
36 data CmmType -- The important one!
37 = CmmType CmmCat Width
39 data CmmCat -- "Category" (not exported)
40 = GcPtrCat -- GC pointer
41 | BitsCat -- Non-pointer
44 -- See Note [Signed vs unsigned] at the end
46 instance Outputable CmmType where
47 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
49 instance Outputable CmmCat where
50 ppr FloatCat = ptext $ sLit("F")
51 ppr _ = ptext $ sLit("I")
53 -- ppr FloatCat = ptext $ sLit("float")
54 -- ppr BitsCat = ptext $ sLit("bits")
55 -- ppr GcPtrCat = ptext $ sLit("gcptr")
57 -- Why is CmmType stratified? For native code generation,
58 -- most of the time you just want to know what sort of register
59 -- to put the thing in, and for this you need to know how
60 -- many bits thing has and whether it goes in a floating-point
61 -- register. By contrast, the distinction between GcPtr and
62 -- GcNonPtr is of interest to only a few parts of the code generator.
64 -------- Equality on CmmType --------------
65 -- CmmType is *not* an instance of Eq; sometimes we care about the
66 -- Gc/NonGc distinction, and sometimes we don't
67 -- So we use an explicit function to force you to think about it
68 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
69 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
71 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
72 -- This equality is temporary; used in CmmLint
73 -- but the RTS files are not yet well-typed wrt pointers
74 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
75 = c1 `weak_eq` c2 && w1==w2
77 FloatCat `weak_eq` FloatCat = True
78 FloatCat `weak_eq` _other = False
79 _other `weak_eq` FloatCat = False
80 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
82 --- Simple operations on CmmType -----
83 typeWidth :: CmmType -> Width
84 typeWidth (CmmType _ w) = w
86 cmmBits, cmmFloat :: Width -> CmmType
87 cmmBits = CmmType BitsCat
88 cmmFloat = CmmType FloatCat
90 -------- Common CmmTypes ------------
91 -- Floats and words of specific widths
92 b8, b16, b32, b64, f32, f64 :: CmmType
100 -- CmmTypes of native word widths
101 bWord, bHalfWord, gcWord :: CmmType
102 bWord = cmmBits wordWidth
103 bHalfWord = cmmBits halfWordWidth
104 gcWord = CmmType GcPtrCat wordWidth
106 cInt, cLong :: CmmType
107 cInt = cmmBits cIntWidth
108 cLong = cmmBits cLongWidth
111 ------------ Predicates ----------------
112 isFloatType, isGcPtrType :: CmmType -> Bool
113 isFloatType (CmmType FloatCat _) = True
114 isFloatType _other = False
116 isGcPtrType (CmmType GcPtrCat _) = True
117 isGcPtrType _other = False
119 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
120 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
121 -- isFloat32 and 64 are obvious
123 isWord64 (CmmType BitsCat W64) = True
124 isWord64 (CmmType GcPtrCat W64) = True
125 isWord64 _other = False
127 isWord32 (CmmType BitsCat W32) = True
128 isWord32 (CmmType GcPtrCat W32) = True
129 isWord32 _other = False
131 isFloat32 (CmmType FloatCat W32) = True
132 isFloat32 _other = False
134 isFloat64 (CmmType FloatCat W64) = True
135 isFloat64 _other = False
137 -----------------------------------------------------------------------------
139 -----------------------------------------------------------------------------
141 data Width = W8 | W16 | W32 | W64
142 | W80 -- Extended double-precision float,
143 -- used in x86 native codegen only.
144 -- (we use Ord, so it'd better be in this order)
146 deriving (Eq, Ord, Show)
148 instance Outputable Width where
149 ppr rep = ptext (mrStr rep)
151 mrStr :: Width -> LitString
152 mrStr W8 = sLit("W8")
153 mrStr W16 = sLit("W16")
154 mrStr W32 = sLit("W32")
155 mrStr W64 = sLit("W64")
156 mrStr W128 = sLit("W128")
157 mrStr W80 = sLit("W80")
160 -------- Common Widths ------------
161 wordWidth, halfWordWidth :: Width
162 wordWidth | wORD_SIZE == 4 = W32
163 | wORD_SIZE == 8 = W64
164 | otherwise = panic "MachOp.wordRep: Unknown word size"
166 halfWordWidth | wORD_SIZE == 4 = W16
167 | wORD_SIZE == 8 = W32
168 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
170 -- cIntRep is the Width for a C-language 'int'
171 cIntWidth, cLongWidth :: Width
174 #elif SIZEOF_INT == 8
180 #elif SIZEOF_LONG == 8
184 widthInBits :: Width -> Int
189 widthInBits W128 = 128
192 widthInBytes :: Width -> Int
197 widthInBytes W128 = 16
198 widthInBytes W80 = 10
200 widthFromBytes :: Int -> Width
201 widthFromBytes 1 = W8
202 widthFromBytes 2 = W16
203 widthFromBytes 4 = W32
204 widthFromBytes 8 = W64
205 widthFromBytes 16 = W128
206 widthFromBytes 10 = W80
207 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
209 -- log_2 of the width in bytes, useful for generating shifts.
210 widthInLog :: Width -> Int
216 widthInLog W80 = panic "widthInLog: F80"
218 -- widening / narrowing
220 narrowU :: Width -> Integer -> Integer
221 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
222 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
223 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
224 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
225 narrowU _ _ = panic "narrowTo"
227 narrowS :: Width -> Integer -> Integer
228 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
229 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
230 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
231 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
232 narrowS _ _ = panic "narrowTo"
234 -------------------------------------------------------------------------
235 {- Note [Signed vs unsigned]
236 ~~~~~~~~~~~~~~~~~~~~~~~~~
237 Should a CmmType include a signed vs. unsigned distinction?
239 This is very much like a "hint" in C-- terminology: it isn't necessary
240 in order to generate correct code, but it might be useful in that the
241 compiler can generate better code if it has access to higher-level
242 hints about data. This is important at call boundaries, because the
243 definition of a function is not visible at all of its call sites, so
244 the compiler cannot infer the hints.
246 Here in Cmm, we're taking a slightly different approach. We include
247 the int vs. float hint in the MachRep, because (a) the majority of
248 platforms have a strong distinction between float and int registers,
249 and (b) we don't want to do any heavyweight hint-inference in the
250 native code backend in order to get good code. We're treating the
251 hint more like a type: our Cmm is always completely consistent with
252 respect to hints. All coercions between float and int are explicit.
254 What about the signed vs. unsigned hint? This information might be
255 useful if we want to keep sub-word-sized values in word-size
256 registers, which we must do if we only have word-sized registers.
258 On such a system, there are two straightforward conventions for
259 representing sub-word-sized values:
261 (a) Leave the upper bits undefined. Comparison operations must
262 sign- or zero-extend both operands before comparing them,
263 depending on whether the comparison is signed or unsigned.
265 (b) Always keep the values sign- or zero-extended as appropriate.
266 Arithmetic operations must narrow the result to the appropriate
269 A clever compiler might not use either (a) or (b) exclusively, instead
270 it would attempt to minimize the coercions by analysis: the same kind
271 of analysis that propagates hints around. In Cmm we don't want to
272 have to do this, so we plump for having richer types and keeping the
273 type information consistent.
275 If signed/unsigned hints are missing from MachRep, then the only
276 choice we have is (a), because we don't know whether the result of an
277 operation should be sign- or zero-extended.
279 Many architectures have extending load operations, which work well
280 with (b). To make use of them with (a), you need to know whether the
281 value is going to be sign- or zero-extended by an enclosing comparison
282 (for example), which involves knowing above the context. This is
283 doable but more complex.
285 Further complicating the issue is foreign calls: a foreign calling
286 convention can specify that signed 8-bit quantities are passed as
287 sign-extended 32 bit quantities, for example (this is the case on the
288 PowerPC). So we *do* need sign information on foreign call arguments.
290 Pros for adding signed vs. unsigned to MachRep:
292 - It would let us use convention (b) above, and get easier
293 code generation for extending loads.
295 - Less information required on foreign calls.
297 - MachOp type would be simpler
303 - What is the MachRep for a VanillaReg? Currently it is
304 always wordRep, but now we have to decide whether it is
305 signed or unsigned. The same VanillaReg can thus have
306 different MachReps in different parts of the program.
308 - Extra coercions cluttering up expressions.
310 Currently for GHC, the foreign call point is moot, because we do our
311 own promotion of sub-word-sized values to word-sized values. The Int8
312 type is represnted by an Int# which is kept sign-extended at all times
313 (this is slightly naughty, because we're making assumptions about the
314 C calling convention rather early on in the compiler). However, given
315 this, the cons outweigh the pros.