4 , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord
7 , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
8 , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32
11 , widthInBits, widthInBytes, widthInLog, widthFromBytes
12 , wordWidth, halfWordWidth, cIntWidth, cLongWidth
15 , CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr
16 , CmmReg(..), cmmRegType
17 , CmmLit(..), cmmLitType
18 , LocalReg(..), localRegType
19 , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node
20 , VGcPtr(..), vgcFlag -- Temporary!
21 , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed
22 , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed
23 , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
24 , plusRegSet, minusRegSet, timesRegSet
25 , Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf
29 , pprMachOp, isCommutableMachOp, isAssociativeMachOp
30 , isComparisonMachOp, machOpResultType
31 , machOpArgReps, maybeInvertComparison
34 , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
35 , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
36 , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
37 , mo_wordULe, mo_wordUGt, mo_wordULt
38 , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
39 , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
40 , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
41 , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32
45 #include "HsVersions.h"
59 -----------------------------------------------------------------------------
61 -- An expression. Expressions have no side effects.
62 -----------------------------------------------------------------------------
65 = CmmLit CmmLit -- Literal
66 | CmmLoad CmmExpr CmmType -- Read memory location
67 | CmmReg CmmReg -- Contents of register
68 | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
69 | CmmStackSlot Area Int -- addressing expression of a stack slot
70 | CmmRegOff CmmReg Int
72 -- ** is shorthand only, meaning **
73 -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep)))
74 -- where rep = cmmRegType reg
76 instance Eq CmmExpr where -- Equality ignores the types
77 CmmLit l1 == CmmLit l2 = l1==l2
78 CmmLoad e1 _ == CmmLoad e2 _ = e1==e2
79 CmmReg r1 == CmmReg r2 = r1==r2
80 CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2
81 CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2
82 CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2
90 -- | A stack area is either the stack slot where a variable is spilled
91 -- or the stack space where function arguments and results are passed.
98 = Old -- See Note [Old Area]
104 There is a single call area 'Old', allocated at the extreme old
105 end of the stack frame (ie just younger than the return address)
107 * incoming (overflow) parameters,
108 * outgoing (overflow) parameter to tail calls,
109 * outgoing (overflow) result values
110 * the update frame (if any)
112 Its size is the max of all these requirements. On entry, the stack
113 pointer will point to the youngest incoming parameter, which is not
114 necessarily at the young end of the Old area.
118 type SubArea = (Area, Int, Int) -- area, offset, width
119 type SubAreaSet = FiniteMap Area [SubArea]
120 type AreaMap = FiniteMap Area Int
123 = CmmInt Integer Width
124 -- Interpretation: the 2's complement representation of the value
125 -- is truncated to the specified size. This is easier than trying
126 -- to keep the value within range, because we don't know whether
127 -- it will be used as a signed or unsigned value (the CmmType doesn't
128 -- distinguish between signed & unsigned).
129 | CmmFloat Rational Width
130 | CmmLabel CLabel -- Address of label
131 | CmmLabelOff CLabel Int -- Address of label + byte offset
133 -- Due to limitations in the C backend, the following
134 -- MUST ONLY be used inside the info table indicated by label2
135 -- (label2 must be the info label), and label1 must be an
136 -- SRT, a slow entrypoint or a large bitmap (see the Mangler)
137 -- Don't use it at all unless tablesNextToCode.
138 -- It is also used inside the NCG during when generating
139 -- position-independent code.
140 | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset
141 | CmmBlock BlockId -- Code label
142 | CmmHighStackMark -- stands for the max stack space used during a procedure
145 cmmExprType :: CmmExpr -> CmmType
146 cmmExprType (CmmLit lit) = cmmLitType lit
147 cmmExprType (CmmLoad _ rep) = rep
148 cmmExprType (CmmReg reg) = cmmRegType reg
149 cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args)
150 cmmExprType (CmmRegOff reg _) = cmmRegType reg
151 cmmExprType (CmmStackSlot _ _) = bWord -- an address
153 cmmLitType :: CmmLit -> CmmType
154 cmmLitType (CmmInt _ width) = cmmBits width
155 cmmLitType (CmmFloat _ width) = cmmFloat width
156 cmmLitType (CmmLabel lbl) = cmmLabelType lbl
157 cmmLitType (CmmLabelOff lbl _) = cmmLabelType lbl
158 cmmLitType (CmmLabelDiffOff {}) = bWord
159 cmmLitType (CmmBlock _) = bWord
160 cmmLitType (CmmHighStackMark) = bWord
162 cmmLabelType :: CLabel -> CmmType
163 cmmLabelType lbl | isGcPtrLabel lbl = gcWord
166 cmmExprWidth :: CmmExpr -> Width
167 cmmExprWidth e = typeWidth (cmmExprType e)
170 --- Negation for conditional branches
172 maybeInvertCmmExpr :: CmmExpr -> Maybe CmmExpr
173 maybeInvertCmmExpr (CmmMachOp op args) = do op' <- maybeInvertComparison op
174 return (CmmMachOp op' args)
175 maybeInvertCmmExpr _ = Nothing
177 -----------------------------------------------------------------------------
179 -----------------------------------------------------------------------------
182 = LocalReg !Unique CmmType
187 instance Eq LocalReg where
188 (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2
190 instance Ord LocalReg where
191 compare (LocalReg u1 _) (LocalReg u2 _) = compare u1 u2
193 instance Uniquable LocalReg where
194 getUnique (LocalReg uniq _) = uniq
196 cmmRegType :: CmmReg -> CmmType
197 cmmRegType (CmmLocal reg) = localRegType reg
198 cmmRegType (CmmGlobal reg) = globalRegType reg
200 localRegType :: LocalReg -> CmmType
201 localRegType (LocalReg _ rep) = rep
203 -----------------------------------------------------------------------------
204 -- Register-use information for expressions and other types
205 -----------------------------------------------------------------------------
207 -- | Sets of local registers
208 type RegSet = UniqSet LocalReg
209 emptyRegSet :: RegSet
210 elemRegSet :: LocalReg -> RegSet -> Bool
211 extendRegSet :: RegSet -> LocalReg -> RegSet
212 deleteFromRegSet :: RegSet -> LocalReg -> RegSet
213 mkRegSet :: [LocalReg] -> RegSet
214 minusRegSet, plusRegSet, timesRegSet :: RegSet -> RegSet -> RegSet
216 emptyRegSet = emptyUniqSet
217 elemRegSet = elementOfUniqSet
218 extendRegSet = addOneToUniqSet
219 deleteFromRegSet = delOneFromUniqSet
221 minusRegSet = minusUniqSet
222 plusRegSet = unionUniqSets
223 timesRegSet = intersectUniqSets
225 class UserOfLocalRegs a where
226 foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b
228 class DefinerOfLocalRegs a where
229 foldRegsDefd :: (b -> LocalReg -> b) -> b -> a -> b
231 filterRegsUsed :: UserOfLocalRegs e => (LocalReg -> Bool) -> e -> RegSet
233 foldRegsUsed (\regs r -> if p r then extendRegSet regs r else regs)
236 instance UserOfLocalRegs CmmReg where
237 foldRegsUsed f z (CmmLocal reg) = f z reg
238 foldRegsUsed _ z (CmmGlobal _) = z
240 instance DefinerOfLocalRegs CmmReg where
241 foldRegsDefd f z (CmmLocal reg) = f z reg
242 foldRegsDefd _ z (CmmGlobal _) = z
244 instance UserOfLocalRegs LocalReg where
245 foldRegsUsed f z r = f z r
247 instance DefinerOfLocalRegs LocalReg where
248 foldRegsDefd f z r = f z r
250 instance UserOfLocalRegs RegSet where
251 foldRegsUsed f = foldUniqSet (flip f)
253 instance UserOfLocalRegs CmmExpr where
254 foldRegsUsed f z e = expr z e
255 where expr z (CmmLit _) = z
256 expr z (CmmLoad addr _) = foldRegsUsed f z addr
257 expr z (CmmReg r) = foldRegsUsed f z r
258 expr z (CmmMachOp _ exprs) = foldRegsUsed f z exprs
259 expr z (CmmRegOff r _) = foldRegsUsed f z r
260 expr z (CmmStackSlot _ _) = z
262 instance UserOfLocalRegs a => UserOfLocalRegs [a] where
263 foldRegsUsed _ set [] = set
264 foldRegsUsed f set (x:xs) = foldRegsUsed f (foldRegsUsed f set x) xs
266 instance DefinerOfLocalRegs a => DefinerOfLocalRegs [a] where
267 foldRegsDefd _ set [] = set
268 foldRegsDefd f set (x:xs) = foldRegsDefd f (foldRegsDefd f set x) xs
270 instance DefinerOfLocalRegs a => DefinerOfLocalRegs (Maybe a) where
271 foldRegsDefd _ set Nothing = set
272 foldRegsDefd f set (Just x) = foldRegsDefd f set x
275 -----------------------------------------------------------------------------
277 -----------------------------------------------------------------------------
279 isStackSlotOf :: CmmExpr -> LocalReg -> Bool
280 isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r'
281 isStackSlotOf _ _ = False
283 -----------------------------------------------------------------------------
284 -- Stack slot use information for expressions and other types [_$_]
285 -----------------------------------------------------------------------------
287 -- Fold over the area, the offset into the area, and the width of the subarea.
288 class UserOfSlots a where
289 foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b
291 class DefinerOfSlots a where
292 foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b
294 instance UserOfSlots CmmExpr where
295 foldSlotsUsed f z e = expr z e
296 where expr z (CmmLit _) = z
297 expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty)
298 expr z (CmmLoad addr _) = foldSlotsUsed f z addr
299 expr z (CmmReg _) = z
300 expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs
301 expr z (CmmRegOff _ _) = z
302 expr z (CmmStackSlot _ _) = z
304 instance UserOfSlots a => UserOfSlots [a] where
305 foldSlotsUsed _ set [] = set
306 foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs
309 -----------------------------------------------------------------------------
310 -- Global STG registers
311 -----------------------------------------------------------------------------
313 data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
316 -----------------------------------------------------------------------------
317 -- Global STG registers
318 -----------------------------------------------------------------------------
319 vgcFlag :: CmmType -> VGcPtr
320 vgcFlag ty | isGcPtrType ty = VGcPtr
321 | otherwise = VNonGcPtr
324 -- Argument and return registers
325 = VanillaReg -- pointers, unboxed ints and chars
326 {-# UNPACK #-} !Int -- its number
329 | FloatReg -- single-precision floating-point registers
330 {-# UNPACK #-} !Int -- its number
332 | DoubleReg -- double-precision floating-point registers
333 {-# UNPACK #-} !Int -- its number
335 | LongReg -- long int registers (64-bit, really)
336 {-# UNPACK #-} !Int -- its number
339 | Sp -- Stack ptr; points to last occupied stack location.
340 | SpLim -- Stack limit
341 | Hp -- Heap ptr; points to last occupied heap location.
342 | HpLim -- Heap limit register
343 | CurrentTSO -- pointer to current thread's TSO
344 | CurrentNursery -- pointer to allocation area
345 | HpAlloc -- allocation count for heap check failure
347 -- We keep the address of some commonly-called
348 -- functions in the register table, to keep code
350 | EagerBlackholeInfo -- stg_EAGER_BLACKHOLE_info
351 | GCEnter1 -- stg_gc_enter_1
352 | GCFun -- stg_gc_fun
354 -- Base offset for the register table, used for accessing registers
355 -- which do not have real registers assigned to them. This register
356 -- will only appear after we have expanded GlobalReg into memory accesses
357 -- (where necessary) in the native code generator.
360 -- Base Register for PIC (position-independent code) calculations
361 -- Only used inside the native code generator. It's exact meaning differs
362 -- from platform to platform (see module PositionIndependentCode).
367 instance Eq GlobalReg where
368 VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes
369 FloatReg i == FloatReg j = i==j
370 DoubleReg i == DoubleReg j = i==j
371 LongReg i == LongReg j = i==j
373 SpLim == SpLim = True
375 HpLim == HpLim = True
376 CurrentTSO == CurrentTSO = True
377 CurrentNursery == CurrentNursery = True
378 HpAlloc == HpAlloc = True
379 GCEnter1 == GCEnter1 = True
380 GCFun == GCFun = True
381 BaseReg == BaseReg = True
382 PicBaseReg == PicBaseReg = True
385 instance Ord GlobalReg where
386 compare (VanillaReg i _) (VanillaReg j _) = compare i j
387 -- Ignore type when seeking clashes
388 compare (FloatReg i) (FloatReg j) = compare i j
389 compare (DoubleReg i) (DoubleReg j) = compare i j
390 compare (LongReg i) (LongReg j) = compare i j
392 compare SpLim SpLim = EQ
394 compare HpLim HpLim = EQ
395 compare CurrentTSO CurrentTSO = EQ
396 compare CurrentNursery CurrentNursery = EQ
397 compare HpAlloc HpAlloc = EQ
398 compare EagerBlackholeInfo EagerBlackholeInfo = EQ
399 compare GCEnter1 GCEnter1 = EQ
400 compare GCFun GCFun = EQ
401 compare BaseReg BaseReg = EQ
402 compare PicBaseReg PicBaseReg = EQ
403 compare (VanillaReg _ _) _ = LT
404 compare _ (VanillaReg _ _) = GT
405 compare (FloatReg _) _ = LT
406 compare _ (FloatReg _) = GT
407 compare (DoubleReg _) _ = LT
408 compare _ (DoubleReg _) = GT
409 compare (LongReg _) _ = LT
410 compare _ (LongReg _) = GT
419 compare CurrentTSO _ = LT
420 compare _ CurrentTSO = GT
421 compare CurrentNursery _ = LT
422 compare _ CurrentNursery = GT
423 compare HpAlloc _ = LT
424 compare _ HpAlloc = GT
425 compare GCEnter1 _ = LT
426 compare _ GCEnter1 = GT
429 compare BaseReg _ = LT
430 compare _ BaseReg = GT
431 compare EagerBlackholeInfo _ = LT
432 compare _ EagerBlackholeInfo = GT
434 -- convenient aliases
435 spReg, hpReg, spLimReg, nodeReg :: CmmReg
438 spLimReg = CmmGlobal SpLim
439 nodeReg = CmmGlobal node
442 node = VanillaReg 1 VGcPtr
444 globalRegType :: GlobalReg -> CmmType
445 globalRegType (VanillaReg _ VGcPtr) = gcWord
446 globalRegType (VanillaReg _ VNonGcPtr) = bWord
447 globalRegType (FloatReg _) = cmmFloat W32
448 globalRegType (DoubleReg _) = cmmFloat W64
449 globalRegType (LongReg _) = cmmBits W64
450 globalRegType Hp = gcWord -- The initialiser for all
451 -- dynamically allocated closures
452 globalRegType _ = bWord
455 -----------------------------------------------------------------------------
457 -----------------------------------------------------------------------------
459 -- NOTE: CmmType is an abstract type, not exported from this
460 -- module so you can easily change its representation
462 -- However Width is exported in a concrete way,
463 -- and is used extensively in pattern-matching
465 data CmmType -- The important one!
466 = CmmType CmmCat Width
468 data CmmCat -- "Category" (not exported)
469 = GcPtrCat -- GC pointer
470 | BitsCat -- Non-pointer
473 -- See Note [Signed vs unsigned] at the end
475 instance Outputable CmmType where
476 ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid)
478 instance Outputable CmmCat where
479 ppr FloatCat = ptext $ sLit("F")
480 ppr _ = ptext $ sLit("I")
482 -- ppr FloatCat = ptext $ sLit("float")
483 -- ppr BitsCat = ptext $ sLit("bits")
484 -- ppr GcPtrCat = ptext $ sLit("gcptr")
486 -- Why is CmmType stratified? For native code generation,
487 -- most of the time you just want to know what sort of register
488 -- to put the thing in, and for this you need to know how
489 -- many bits thing has and whether it goes in a floating-point
490 -- register. By contrast, the distinction between GcPtr and
491 -- GcNonPtr is of interest to only a few parts of the code generator.
493 -------- Equality on CmmType --------------
494 -- CmmType is *not* an instance of Eq; sometimes we care about the
495 -- Gc/NonGc distinction, and sometimes we don't
496 -- So we use an explicit function to force you to think about it
497 cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality
498 cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2
500 cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool
501 -- This equality is temporary; used in CmmLint
502 -- but the RTS files are not yet well-typed wrt pointers
503 cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2)
504 = c1 `weak_eq` c2 && w1==w2
506 FloatCat `weak_eq` FloatCat = True
507 FloatCat `weak_eq` _other = False
508 _other `weak_eq` FloatCat = False
509 _word1 `weak_eq` _word2 = True -- Ignores GcPtr
511 --- Simple operations on CmmType -----
512 typeWidth :: CmmType -> Width
513 typeWidth (CmmType _ w) = w
515 cmmBits, cmmFloat :: Width -> CmmType
516 cmmBits = CmmType BitsCat
517 cmmFloat = CmmType FloatCat
519 -------- Common CmmTypes ------------
520 -- Floats and words of specific widths
521 b8, b16, b32, b64, f32, f64 :: CmmType
529 -- CmmTypes of native word widths
530 bWord, bHalfWord, gcWord :: CmmType
531 bWord = cmmBits wordWidth
532 bHalfWord = cmmBits halfWordWidth
533 gcWord = CmmType GcPtrCat wordWidth
535 cInt, cLong :: CmmType
536 cInt = cmmBits cIntWidth
537 cLong = cmmBits cLongWidth
540 ------------ Predicates ----------------
541 isFloatType, isGcPtrType :: CmmType -> Bool
542 isFloatType (CmmType FloatCat _) = True
543 isFloatType _other = False
545 isGcPtrType (CmmType GcPtrCat _) = True
546 isGcPtrType _other = False
548 isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
549 -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
550 -- isFloat32 and 64 are obvious
552 isWord64 (CmmType BitsCat W64) = True
553 isWord64 (CmmType GcPtrCat W64) = True
554 isWord64 _other = False
556 isWord32 (CmmType BitsCat W32) = True
557 isWord32 (CmmType GcPtrCat W32) = True
558 isWord32 _other = False
560 isFloat32 (CmmType FloatCat W32) = True
561 isFloat32 _other = False
563 isFloat64 (CmmType FloatCat W64) = True
564 isFloat64 _other = False
566 -----------------------------------------------------------------------------
568 -----------------------------------------------------------------------------
570 data Width = W8 | W16 | W32 | W64
571 | W80 -- Extended double-precision float,
572 -- used in x86 native codegen only.
573 -- (we use Ord, so it'd better be in this order)
575 deriving (Eq, Ord, Show)
577 instance Outputable Width where
578 ppr rep = ptext (mrStr rep)
580 mrStr :: Width -> LitString
581 mrStr W8 = sLit("W8")
582 mrStr W16 = sLit("W16")
583 mrStr W32 = sLit("W32")
584 mrStr W64 = sLit("W64")
585 mrStr W128 = sLit("W128")
586 mrStr W80 = sLit("W80")
589 -------- Common Widths ------------
590 wordWidth, halfWordWidth :: Width
591 wordWidth | wORD_SIZE == 4 = W32
592 | wORD_SIZE == 8 = W64
593 | otherwise = panic "MachOp.wordRep: Unknown word size"
595 halfWordWidth | wORD_SIZE == 4 = W16
596 | wORD_SIZE == 8 = W32
597 | otherwise = panic "MachOp.halfWordRep: Unknown word size"
599 -- cIntRep is the Width for a C-language 'int'
600 cIntWidth, cLongWidth :: Width
603 #elif SIZEOF_INT == 8
609 #elif SIZEOF_LONG == 8
613 widthInBits :: Width -> Int
618 widthInBits W128 = 128
621 widthInBytes :: Width -> Int
626 widthInBytes W128 = 16
627 widthInBytes W80 = 10
629 widthFromBytes :: Int -> Width
630 widthFromBytes 1 = W8
631 widthFromBytes 2 = W16
632 widthFromBytes 4 = W32
633 widthFromBytes 8 = W64
634 widthFromBytes 16 = W128
635 widthFromBytes 10 = W80
636 widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n)
638 -- log_2 of the width in bytes, useful for generating shifts.
639 widthInLog :: Width -> Int
645 widthInLog W80 = panic "widthInLog: F80"
647 -- widening / narrowing
649 narrowU :: Width -> Integer -> Integer
650 narrowU W8 x = fromIntegral (fromIntegral x :: Word8)
651 narrowU W16 x = fromIntegral (fromIntegral x :: Word16)
652 narrowU W32 x = fromIntegral (fromIntegral x :: Word32)
653 narrowU W64 x = fromIntegral (fromIntegral x :: Word64)
654 narrowU _ _ = panic "narrowTo"
656 narrowS :: Width -> Integer -> Integer
657 narrowS W8 x = fromIntegral (fromIntegral x :: Int8)
658 narrowS W16 x = fromIntegral (fromIntegral x :: Int16)
659 narrowS W32 x = fromIntegral (fromIntegral x :: Int32)
660 narrowS W64 x = fromIntegral (fromIntegral x :: Int64)
661 narrowS _ _ = panic "narrowTo"
663 -----------------------------------------------------------------------------
665 -----------------------------------------------------------------------------
668 Implementation notes:
670 It might suffice to keep just a width, without distinguishing between
671 floating and integer types. However, keeping the distinction will
672 help the native code generator to assign registers more easily.
677 Machine-level primops; ones which we can reasonably delegate to the
678 native code generators to handle. Basically contains C's primops
681 Nomenclature: all ops indicate width and signedness, where
682 appropriate. Widths: 8\/16\/32\/64 means the given size, obviously.
683 Nat means the operation works on STG word sized objects.
684 Signedness: S means signed, U means unsigned. For operations where
685 signedness is irrelevant or makes no difference (for example
686 integer add), the signedness component is omitted.
688 An exception: NatP is a ptr-typed native word. From the point of
689 view of the native code generators this distinction is irrelevant,
690 but the C code generator sometimes needs this info to emit the
695 -- Integer operations (insensitive to signed/unsigned)
700 | MO_Mul Width -- low word of multiply
702 -- Signed multiply/divide
703 | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows
704 | MO_S_Quot Width -- signed / (same semantics as IntQuotOp)
705 | MO_S_Rem Width -- signed % (same semantics as IntRemOp)
706 | MO_S_Neg Width -- unary -
708 -- Unsigned multiply/divide
709 | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows
710 | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp)
711 | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp)
713 -- Signed comparisons
719 -- Unsigned comparisons
725 -- Floating point arithmetic
728 | MO_F_Neg Width -- unary -
732 -- Floating point comparison
740 -- Bitwise operations. Not all of these may be supported
741 -- at all sizes, and only integral Widths are valid.
747 | MO_U_Shr Width -- unsigned shift right
748 | MO_S_Shr Width -- signed shift right
750 -- Conversions. Some of these will be NOPs.
751 -- Floating-point conversions use the signed variant.
752 | MO_SF_Conv Width Width -- Signed int -> Float
753 | MO_FS_Conv Width Width -- Float -> Signed int
754 | MO_SS_Conv Width Width -- Signed int -> Signed int
755 | MO_UU_Conv Width Width -- unsigned int -> unsigned int
756 | MO_FF_Conv Width Width -- Float -> Float
759 pprMachOp :: MachOp -> SDoc
760 pprMachOp mo = text (show mo)
764 -- -----------------------------------------------------------------------------
765 -- Some common MachReps
767 -- A 'wordRep' is a machine word on the target architecture
768 -- Specifically, it is the size of an Int#, Word#, Addr#
769 -- and the unit of allocation on the stack and the heap
770 -- Any pointer is also guaranteed to be a wordRep.
772 mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
773 , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem
774 , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe
775 , mo_wordULe, mo_wordUGt, mo_wordULt
776 , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr
777 , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32
778 , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord
779 , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32
782 mo_wordAdd = MO_Add wordWidth
783 mo_wordSub = MO_Sub wordWidth
784 mo_wordEq = MO_Eq wordWidth
785 mo_wordNe = MO_Ne wordWidth
786 mo_wordMul = MO_Mul wordWidth
787 mo_wordSQuot = MO_S_Quot wordWidth
788 mo_wordSRem = MO_S_Rem wordWidth
789 mo_wordSNeg = MO_S_Neg wordWidth
790 mo_wordUQuot = MO_U_Quot wordWidth
791 mo_wordURem = MO_U_Rem wordWidth
793 mo_wordSGe = MO_S_Ge wordWidth
794 mo_wordSLe = MO_S_Le wordWidth
795 mo_wordSGt = MO_S_Gt wordWidth
796 mo_wordSLt = MO_S_Lt wordWidth
798 mo_wordUGe = MO_U_Ge wordWidth
799 mo_wordULe = MO_U_Le wordWidth
800 mo_wordUGt = MO_U_Gt wordWidth
801 mo_wordULt = MO_U_Lt wordWidth
803 mo_wordAnd = MO_And wordWidth
804 mo_wordOr = MO_Or wordWidth
805 mo_wordXor = MO_Xor wordWidth
806 mo_wordNot = MO_Not wordWidth
807 mo_wordShl = MO_Shl wordWidth
808 mo_wordSShr = MO_S_Shr wordWidth
809 mo_wordUShr = MO_U_Shr wordWidth
811 mo_u_8To32 = MO_UU_Conv W8 W32
812 mo_s_8To32 = MO_SS_Conv W8 W32
813 mo_u_16To32 = MO_UU_Conv W16 W32
814 mo_s_16To32 = MO_SS_Conv W16 W32
816 mo_u_8ToWord = MO_UU_Conv W8 wordWidth
817 mo_s_8ToWord = MO_SS_Conv W8 wordWidth
818 mo_u_16ToWord = MO_UU_Conv W16 wordWidth
819 mo_s_16ToWord = MO_SS_Conv W16 wordWidth
820 mo_s_32ToWord = MO_SS_Conv W32 wordWidth
821 mo_u_32ToWord = MO_UU_Conv W32 wordWidth
823 mo_WordTo8 = MO_UU_Conv wordWidth W8
824 mo_WordTo16 = MO_UU_Conv wordWidth W16
825 mo_WordTo32 = MO_UU_Conv wordWidth W32
827 mo_32To8 = MO_UU_Conv W32 W8
828 mo_32To16 = MO_UU_Conv W32 W16
831 -- ----------------------------------------------------------------------------
832 -- isCommutableMachOp
835 Returns 'True' if the MachOp has commutable arguments. This is used
836 in the platform-independent Cmm optimisations.
838 If in doubt, return 'False'. This generates worse code on the
839 native routes, but is otherwise harmless.
841 isCommutableMachOp :: MachOp -> Bool
842 isCommutableMachOp mop =
848 MO_S_MulMayOflo _ -> True
849 MO_U_MulMayOflo _ -> True
855 -- ----------------------------------------------------------------------------
856 -- isAssociativeMachOp
859 Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@)
860 This is used in the platform-independent Cmm optimisations.
862 If in doubt, return 'False'. This generates worse code on the
863 native routes, but is otherwise harmless.
865 isAssociativeMachOp :: MachOp -> Bool
866 isAssociativeMachOp mop =
868 MO_Add {} -> True -- NB: does not include
869 MO_Mul {} -> True -- floatint point!
875 -- ----------------------------------------------------------------------------
876 -- isComparisonMachOp
879 Returns 'True' if the MachOp is a comparison.
881 If in doubt, return False. This generates worse code on the
882 native routes, but is otherwise harmless.
884 isComparisonMachOp :: MachOp -> Bool
885 isComparisonMachOp mop =
905 -- -----------------------------------------------------------------------------
906 -- Inverting conditions
908 -- Sometimes it's useful to be able to invert the sense of a
909 -- condition. Not all conditional tests are invertible: in
910 -- particular, floating point conditionals cannot be inverted, because
911 -- there exist floating-point values which return False for both senses
912 -- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)).
914 maybeInvertComparison :: MachOp -> Maybe MachOp
915 maybeInvertComparison op
916 = case op of -- None of these Just cases include floating point
917 MO_Eq r -> Just (MO_Ne r)
918 MO_Ne r -> Just (MO_Eq r)
919 MO_U_Lt r -> Just (MO_U_Ge r)
920 MO_U_Gt r -> Just (MO_U_Le r)
921 MO_U_Le r -> Just (MO_U_Gt r)
922 MO_U_Ge r -> Just (MO_U_Lt r)
923 MO_S_Lt r -> Just (MO_S_Ge r)
924 MO_S_Gt r -> Just (MO_S_Le r)
925 MO_S_Le r -> Just (MO_S_Gt r)
926 MO_S_Ge r -> Just (MO_S_Lt r)
927 MO_F_Eq r -> Just (MO_F_Ne r)
928 MO_F_Ne r -> Just (MO_F_Eq r)
929 MO_F_Ge r -> Just (MO_F_Le r)
930 MO_F_Le r -> Just (MO_F_Ge r)
931 MO_F_Gt r -> Just (MO_F_Lt r)
932 MO_F_Lt r -> Just (MO_F_Gt r)
935 -- ----------------------------------------------------------------------------
939 Returns the MachRep of the result of a MachOp.
941 machOpResultType :: MachOp -> [CmmType] -> CmmType
942 machOpResultType mop tys =
944 MO_Add {} -> ty1 -- Preserve GC-ptr-hood
945 MO_Sub {} -> ty1 -- of first arg
946 MO_Mul r -> cmmBits r
947 MO_S_MulMayOflo r -> cmmBits r
948 MO_S_Quot r -> cmmBits r
949 MO_S_Rem r -> cmmBits r
950 MO_S_Neg r -> cmmBits r
951 MO_U_MulMayOflo r -> cmmBits r
952 MO_U_Quot r -> cmmBits r
953 MO_U_Rem r -> cmmBits r
955 MO_Eq {} -> comparisonResultRep
956 MO_Ne {} -> comparisonResultRep
957 MO_S_Ge {} -> comparisonResultRep
958 MO_S_Le {} -> comparisonResultRep
959 MO_S_Gt {} -> comparisonResultRep
960 MO_S_Lt {} -> comparisonResultRep
962 MO_U_Ge {} -> comparisonResultRep
963 MO_U_Le {} -> comparisonResultRep
964 MO_U_Gt {} -> comparisonResultRep
965 MO_U_Lt {} -> comparisonResultRep
967 MO_F_Add r -> cmmFloat r
968 MO_F_Sub r -> cmmFloat r
969 MO_F_Mul r -> cmmFloat r
970 MO_F_Quot r -> cmmFloat r
971 MO_F_Neg r -> cmmFloat r
972 MO_F_Eq {} -> comparisonResultRep
973 MO_F_Ne {} -> comparisonResultRep
974 MO_F_Ge {} -> comparisonResultRep
975 MO_F_Le {} -> comparisonResultRep
976 MO_F_Gt {} -> comparisonResultRep
977 MO_F_Lt {} -> comparisonResultRep
979 MO_And {} -> ty1 -- Used for pointer masking
982 MO_Not r -> cmmBits r
983 MO_Shl r -> cmmBits r
984 MO_U_Shr r -> cmmBits r
985 MO_S_Shr r -> cmmBits r
987 MO_SS_Conv _ to -> cmmBits to
988 MO_UU_Conv _ to -> cmmBits to
989 MO_FS_Conv _ to -> cmmBits to
990 MO_SF_Conv _ to -> cmmFloat to
991 MO_FF_Conv _ to -> cmmFloat to
995 comparisonResultRep :: CmmType
996 comparisonResultRep = bWord -- is it?
999 -- -----------------------------------------------------------------------------
1002 -- | This function is used for debugging only: we can check whether an
1003 -- application of a MachOp is "type-correct" by checking that the MachReps of
1004 -- its arguments are the same as the MachOp expects. This is used when
1005 -- linting a CmmExpr.
1007 machOpArgReps :: MachOp -> [Width]
1015 MO_S_MulMayOflo r -> [r,r]
1016 MO_S_Quot r -> [r,r]
1019 MO_U_MulMayOflo r -> [r,r]
1020 MO_U_Quot r -> [r,r]
1036 MO_F_Quot r -> [r,r]
1049 MO_Shl r -> [r,wordWidth]
1050 MO_U_Shr r -> [r,wordWidth]
1051 MO_S_Shr r -> [r,wordWidth]
1053 MO_SS_Conv from _ -> [from]
1054 MO_UU_Conv from _ -> [from]
1055 MO_SF_Conv from _ -> [from]
1056 MO_FS_Conv from _ -> [from]
1057 MO_FF_Conv from _ -> [from]
1060 -------------------------------------------------------------------------
1061 {- Note [Signed vs unsigned]
1062 ~~~~~~~~~~~~~~~~~~~~~~~~~
1063 Should a CmmType include a signed vs. unsigned distinction?
1065 This is very much like a "hint" in C-- terminology: it isn't necessary
1066 in order to generate correct code, but it might be useful in that the
1067 compiler can generate better code if it has access to higher-level
1068 hints about data. This is important at call boundaries, because the
1069 definition of a function is not visible at all of its call sites, so
1070 the compiler cannot infer the hints.
1072 Here in Cmm, we're taking a slightly different approach. We include
1073 the int vs. float hint in the MachRep, because (a) the majority of
1074 platforms have a strong distinction between float and int registers,
1075 and (b) we don't want to do any heavyweight hint-inference in the
1076 native code backend in order to get good code. We're treating the
1077 hint more like a type: our Cmm is always completely consistent with
1078 respect to hints. All coercions between float and int are explicit.
1080 What about the signed vs. unsigned hint? This information might be
1081 useful if we want to keep sub-word-sized values in word-size
1082 registers, which we must do if we only have word-sized registers.
1084 On such a system, there are two straightforward conventions for
1085 representing sub-word-sized values:
1087 (a) Leave the upper bits undefined. Comparison operations must
1088 sign- or zero-extend both operands before comparing them,
1089 depending on whether the comparison is signed or unsigned.
1091 (b) Always keep the values sign- or zero-extended as appropriate.
1092 Arithmetic operations must narrow the result to the appropriate
1095 A clever compiler might not use either (a) or (b) exclusively, instead
1096 it would attempt to minimize the coercions by analysis: the same kind
1097 of analysis that propagates hints around. In Cmm we don't want to
1098 have to do this, so we plump for having richer types and keeping the
1099 type information consistent.
1101 If signed/unsigned hints are missing from MachRep, then the only
1102 choice we have is (a), because we don't know whether the result of an
1103 operation should be sign- or zero-extended.
1105 Many architectures have extending load operations, which work well
1106 with (b). To make use of them with (a), you need to know whether the
1107 value is going to be sign- or zero-extended by an enclosing comparison
1108 (for example), which involves knowing above the context. This is
1109 doable but more complex.
1111 Further complicating the issue is foreign calls: a foreign calling
1112 convention can specify that signed 8-bit quantities are passed as
1113 sign-extended 32 bit quantities, for example (this is the case on the
1114 PowerPC). So we *do* need sign information on foreign call arguments.
1116 Pros for adding signed vs. unsigned to MachRep:
1118 - It would let us use convention (b) above, and get easier
1119 code generation for extending loads.
1121 - Less information required on foreign calls.
1123 - MachOp type would be simpler
1129 - What is the MachRep for a VanillaReg? Currently it is
1130 always wordRep, but now we have to decide whether it is
1131 signed or unsigned. The same VanillaReg can thus have
1132 different MachReps in different parts of the program.
1134 - Extra coercions cluttering up expressions.
1136 Currently for GHC, the foreign call point is moot, because we do our
1137 own promotion of sub-word-sized values to word-sized values. The Int8
1138 type is represnted by an Int# which is kept sign-extended at all times
1139 (this is slightly naughty, because we're making assumptions about the
1140 C calling convention rather early on in the compiler). However, given
1141 this, the cons outweigh the pros.