X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fcmm%2FCmmExpr.hs;h=5893843a208fbd976576b2beea6974ea136a902b;hp=69a4952ed654e0f2511eb2cca0586062bff179f5;hb=176fa33f17dd78355cc572e006d2ab26898e2c69;hpb=e06951a75a1f519e8f015880c363a8dedc08ff9c diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 69a4952..5893843 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -1,22 +1,56 @@ module CmmExpr - ( CmmExpr(..), cmmExprRep, maybeInvertCmmExpr - , CmmReg(..), cmmRegRep - , CmmLit(..), cmmLitRep - , LocalReg(..), localRegRep, localRegGCFollow, GCKind(..) - , GlobalReg(..), globalRegRep, spReg, hpReg, spLimReg, nodeReg, node + ( CmmType -- Abstract + , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord + , cInt, cLong + , cmmBits, cmmFloat + , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood + , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32 + + , Width(..) + , widthInBits, widthInBytes, widthInLog + , wordWidth, halfWordWidth, cIntWidth, cLongWidth + + , CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr + , CmmReg(..), cmmRegType + , CmmLit(..), cmmLitType + , LocalReg(..), localRegType + , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node + , VGcPtr(..), vgcFlag -- Temporary! , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed + , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet , plusRegSet, minusRegSet, timesRegSet - , Area(..), StackSlotMap, getSlot, mkCallArea, outgoingSlot, areaId, areaSize - ) where + , Area(..), AreaId(..), SubArea, StackSlotMap, getSlot + + -- MachOp + , MachOp(..) + , pprMachOp, isCommutableMachOp, isAssociativeMachOp + , isComparisonMachOp, machOpResultType + , machOpArgReps, maybeInvertComparison + + -- MachOp builders + , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32 + ) +where + +#include "HsVersions.h" import BlockId import CLabel +import Constants +import FastString import FiniteMap -import MachOp import Maybes import Monad +import Outputable import Panic import Unique import UniqSet @@ -28,16 +62,24 @@ import UniqSet data CmmExpr = CmmLit CmmLit -- Literal - | CmmLoad CmmExpr MachRep -- Read memory location + | CmmLoad CmmExpr CmmType -- Read memory location | CmmReg CmmReg -- Contents of register | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) + | CmmStackSlot Area Int -- addressing expression of a stack slot | CmmRegOff CmmReg Int -- CmmRegOff reg i -- ** is shorthand only, meaning ** -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep))) - -- where rep = cmmRegRep reg - | CmmStackSlot Area Int - deriving Eq + -- where rep = cmmRegType reg + +instance Eq CmmExpr where -- Equality ignores the types + CmmLit l1 == CmmLit l2 = l1==l2 + CmmLoad e1 _ == CmmLoad e2 _ = e1==e2 + CmmReg r1 == CmmReg r2 = r1==r2 + CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2 + CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2 + CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2 + _e1 == _e2 = False data CmmReg = CmmLocal LocalReg @@ -48,17 +90,24 @@ data CmmReg -- or the stack space where function arguments and results are passed. data Area = RegSlot LocalReg - | CallArea BlockId Int Int + | CallArea AreaId deriving (Eq, Ord) +data AreaId + = Old -- entry parameters, jumps, and returns share one call area at old end of stack + | Young BlockId + deriving (Eq, Ord) + +type SubArea = (Area, Int, Int) -- area, offset, width + data CmmLit - = CmmInt Integer MachRep + = CmmInt Integer Width -- Interpretation: the 2's complement representation of the value -- is truncated to the specified size. This is easier than trying -- to keep the value within range, because we don't know whether - -- it will be used as a signed or unsigned value (the MachRep doesn't + -- it will be used as a signed or unsigned value (the CmmType doesn't -- distinguish between signed & unsigned). - | CmmFloat Rational MachRep + | CmmFloat Rational Width | CmmLabel CLabel -- Address of label | CmmLabelOff CLabel Int -- Address of label + byte offset @@ -72,14 +121,27 @@ data CmmLit | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset deriving Eq -instance Eq LocalReg where - (LocalReg u1 _ _) == (LocalReg u2 _ _) = u1 == u2 +cmmExprType :: CmmExpr -> CmmType +cmmExprType (CmmLit lit) = cmmLitType lit +cmmExprType (CmmLoad _ rep) = rep +cmmExprType (CmmReg reg) = cmmRegType reg +cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args) +cmmExprType (CmmRegOff reg _) = cmmRegType reg +cmmExprType (CmmStackSlot _ _) = bWord -- an address -instance Ord LocalReg where - compare (LocalReg u1 _ _) (LocalReg u2 _ _) = compare u1 u2 +cmmLitType :: CmmLit -> CmmType +cmmLitType (CmmInt _ width) = cmmBits width +cmmLitType (CmmFloat _ width) = cmmFloat width +cmmLitType (CmmLabel lbl) = cmmLabelType lbl +cmmLitType (CmmLabelOff lbl _) = cmmLabelType lbl +cmmLitType (CmmLabelDiffOff {}) = bWord -instance Uniquable LocalReg where - getUnique (LocalReg uniq _ _) = uniq +cmmLabelType :: CLabel -> CmmType +cmmLabelType lbl | isGcPtrLabel lbl = gcWord + | otherwise = bWord + +cmmExprWidth :: CmmExpr -> Width +cmmExprWidth e = typeWidth (cmmExprType e) -------- --- Negation for conditional branches @@ -93,17 +155,33 @@ maybeInvertCmmExpr _ = Nothing -- Local registers ----------------------------------------------------------------------------- --- | Whether a 'LocalReg' is a GC followable pointer -data GCKind = GCKindPtr | GCKindNonPtr deriving (Eq) - data LocalReg - = LocalReg !Unique MachRep GCKind + = LocalReg !Unique CmmType -- ^ Parameters: -- 1. Identifier -- 2. Type - -- 3. Should the GC follow as a pointer --- Sets of local registers +instance Eq LocalReg where + (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2 + +instance Ord LocalReg where + compare (LocalReg u1 _) (LocalReg u2 _) = compare u1 u2 + +instance Uniquable LocalReg where + getUnique (LocalReg uniq _) = uniq + +cmmRegType :: CmmReg -> CmmType +cmmRegType (CmmLocal reg) = localRegType reg +cmmRegType (CmmGlobal reg) = globalRegType reg + +localRegType :: LocalReg -> CmmType +localRegType (LocalReg _ rep) = rep + +----------------------------------------------------------------------------- +-- Register-use information for expressions and other types +----------------------------------------------------------------------------- + +-- | Sets of local registers type RegSet = UniqSet LocalReg emptyRegSet :: RegSet elemRegSet :: LocalReg -> RegSet -> Bool @@ -121,45 +199,6 @@ minusRegSet = minusUniqSet plusRegSet = unionUniqSets timesRegSet = intersectUniqSets ------------------------------------------------------------------------------ --- Stack slots ------------------------------------------------------------------------------ - -mkVarSlot :: LocalReg -> CmmExpr -mkVarSlot r = CmmStackSlot (RegSlot r) 0 - --- Usually, we either want to lookup a variable's spill slot in an environment --- or else allocate it and add it to the environment. --- For a variable, we just need a single area of the appropriate size. -type StackSlotMap = FiniteMap LocalReg CmmExpr -getSlot :: StackSlotMap -> LocalReg -> (StackSlotMap, CmmExpr) -getSlot map r = case lookupFM map r of - Just s -> (map, s) - Nothing -> (addToFM map r s, s) where s = mkVarSlot r - --- Eventually, we'll want something proper that takes arguments and formals --- and gives you back the calling convention code, as well as the stack area. -mkCallArea :: BlockId -> [a] -> Maybe [b] -> Area -mkCallArea id as fs = CallArea id (length as) (liftM length fs `orElse` 0) - --- Return the last slot in the outgoing parameter area. -outgoingSlot :: Area -> CmmExpr -outgoingSlot a@(RegSlot _) = CmmStackSlot a 0 -outgoingSlot a@(CallArea _ outN _) = CmmStackSlot a outN - -areaId :: Area -> BlockId -areaId (RegSlot _) = panic "Register stack slots don't have IDs!" -areaId (CallArea id _ _) = id - -areaSize :: Area -> Int -areaSize (RegSlot _) = 1 -areaSize (CallArea _ outN inN) = max outN inN - - ------------------------------------------------------------------------------ --- Register-use information for expressions and other types ------------------------------------------------------------------------------ - class UserOfLocalRegs a where foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b @@ -205,46 +244,69 @@ instance DefinerOfLocalRegs a => DefinerOfLocalRegs [a] where foldRegsDefd _ set [] = set foldRegsDefd f set (x:xs) = foldRegsDefd f (foldRegsDefd f set x) xs + ----------------------------------------------------------------------------- --- MachRep +-- Stack slots ----------------------------------------------------------------------------- +mkVarSlot :: LocalReg -> CmmExpr +mkVarSlot r = CmmStackSlot (RegSlot r) 0 +-- Usually, we either want to lookup a variable's spill slot in an environment +-- or else allocate it and add it to the environment. +-- For a variable, we just need a single area of the appropriate size. +type StackSlotMap = FiniteMap LocalReg CmmExpr +getSlot :: StackSlotMap -> LocalReg -> (StackSlotMap, CmmExpr) +getSlot map r = case lookupFM map r of + Just s -> (map, s) + Nothing -> (addToFM map r s, s) where s = mkVarSlot r -cmmExprRep :: CmmExpr -> MachRep -cmmExprRep (CmmLit lit) = cmmLitRep lit -cmmExprRep (CmmLoad _ rep) = rep -cmmExprRep (CmmReg reg) = cmmRegRep reg -cmmExprRep (CmmMachOp op _) = resultRepOfMachOp op -cmmExprRep (CmmRegOff reg _) = cmmRegRep reg -cmmExprRep (CmmStackSlot _ _) = wordRep +----------------------------------------------------------------------------- +-- Stack slot use information for expressions and other types [_$_] +----------------------------------------------------------------------------- -cmmRegRep :: CmmReg -> MachRep -cmmRegRep (CmmLocal reg) = localRegRep reg -cmmRegRep (CmmGlobal reg) = globalRegRep reg -localRegRep :: LocalReg -> MachRep -localRegRep (LocalReg _ rep _) = rep +-- Fold over the area, the offset into the area, and the width of the subarea. +class UserOfSlots a where + foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b +class DefinerOfSlots a where + foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b -localRegGCFollow :: LocalReg -> GCKind -localRegGCFollow (LocalReg _ _ p) = p +instance UserOfSlots CmmExpr where + foldSlotsUsed f z e = expr z e + where expr z (CmmLit _) = z + expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty) + expr z (CmmLoad addr _) = foldSlotsUsed f z addr + expr z (CmmReg _) = z + expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs + expr z (CmmRegOff _ _) = z + expr z (CmmStackSlot _ _) = z + +instance UserOfSlots a => UserOfSlots [a] where + foldSlotsUsed _ set [] = set + foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs + + +----------------------------------------------------------------------------- +-- Global STG registers +----------------------------------------------------------------------------- -cmmLitRep :: CmmLit -> MachRep -cmmLitRep (CmmInt _ rep) = rep -cmmLitRep (CmmFloat _ rep) = rep -cmmLitRep (CmmLabel _) = wordRep -cmmLitRep (CmmLabelOff _ _) = wordRep -cmmLitRep (CmmLabelDiffOff _ _ _) = wordRep +data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show ) + -- TEMPORARY!!! ----------------------------------------------------------------------------- -- Global STG registers ----------------------------------------------------------------------------- +vgcFlag :: CmmType -> VGcPtr +vgcFlag ty | isGcPtrType ty = VGcPtr + | otherwise = VNonGcPtr data GlobalReg -- Argument and return registers = VanillaReg -- pointers, unboxed ints and chars {-# UNPACK #-} !Int -- its number + VGcPtr | FloatReg -- single-precision floating-point registers {-# UNPACK #-} !Int -- its number @@ -282,7 +344,71 @@ data GlobalReg -- from platform to platform (see module PositionIndependentCode). | PicBaseReg - deriving( Eq, Ord, Show ) + deriving( Show ) + +instance Eq GlobalReg where + VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes + FloatReg i == FloatReg j = i==j + DoubleReg i == DoubleReg j = i==j + LongReg i == LongReg j = i==j + Sp == Sp = True + SpLim == SpLim = True + Hp == Hp = True + HpLim == HpLim = True + CurrentTSO == CurrentTSO = True + CurrentNursery == CurrentNursery = True + HpAlloc == HpAlloc = True + GCEnter1 == GCEnter1 = True + GCFun == GCFun = True + BaseReg == BaseReg = True + PicBaseReg == PicBaseReg = True + _r1 == _r2 = False + +instance Ord GlobalReg where + compare (VanillaReg i _) (VanillaReg j _) = compare i j + -- Ignore type when seeking clashes + compare (FloatReg i) (FloatReg j) = compare i j + compare (DoubleReg i) (DoubleReg j) = compare i j + compare (LongReg i) (LongReg j) = compare i j + compare Sp Sp = EQ + compare SpLim SpLim = EQ + compare Hp Hp = EQ + compare HpLim HpLim = EQ + compare CurrentTSO CurrentTSO = EQ + compare CurrentNursery CurrentNursery = EQ + compare HpAlloc HpAlloc = EQ + compare GCEnter1 GCEnter1 = EQ + compare GCFun GCFun = EQ + compare BaseReg BaseReg = EQ + compare PicBaseReg PicBaseReg = EQ + compare (VanillaReg _ _) _ = LT + compare _ (VanillaReg _ _) = GT + compare (FloatReg _) _ = LT + compare _ (FloatReg _) = GT + compare (DoubleReg _) _ = LT + compare _ (DoubleReg _) = GT + compare (LongReg _) _ = LT + compare _ (LongReg _) = GT + compare Sp _ = LT + compare _ Sp = GT + compare SpLim _ = LT + compare _ SpLim = GT + compare Hp _ = LT + compare _ Hp = GT + compare HpLim _ = LT + compare _ HpLim = GT + compare CurrentTSO _ = LT + compare _ CurrentTSO = GT + compare CurrentNursery _ = LT + compare _ CurrentNursery = GT + compare HpAlloc _ = LT + compare _ HpAlloc = GT + compare GCEnter1 _ = LT + compare _ GCEnter1 = GT + compare GCFun _ = LT + compare _ GCFun = GT + compare BaseReg _ = LT + compare _ BaseReg = GT -- convenient aliases spReg, hpReg, spLimReg, nodeReg :: CmmReg @@ -292,11 +418,682 @@ spLimReg = CmmGlobal SpLim nodeReg = CmmGlobal node node :: GlobalReg -node = VanillaReg 1 - -globalRegRep :: GlobalReg -> MachRep -globalRegRep (VanillaReg _) = wordRep -globalRegRep (FloatReg _) = F32 -globalRegRep (DoubleReg _) = F64 -globalRegRep (LongReg _) = I64 -globalRegRep _ = wordRep +node = VanillaReg 1 VGcPtr + +globalRegType :: GlobalReg -> CmmType +globalRegType (VanillaReg _ VGcPtr) = gcWord +globalRegType (VanillaReg _ VNonGcPtr) = bWord +globalRegType (FloatReg _) = cmmFloat W32 +globalRegType (DoubleReg _) = cmmFloat W64 +globalRegType (LongReg _) = cmmBits W64 +globalRegType Hp = gcWord -- The initialiser for all + -- dynamically allocated closures +globalRegType _ = bWord + + +----------------------------------------------------------------------------- +-- CmmType +----------------------------------------------------------------------------- + + -- NOTE: CmmType is an abstract type, not exported from this + -- module so you can easily change its representation + -- + -- However Width is exported in a concrete way, + -- and is used extensively in pattern-matching + +data CmmType -- The important one! + = CmmType CmmCat Width + +data CmmCat -- "Category" (not exported) + = GcPtrCat -- GC pointer + | BitsCat -- Non-pointer + | FloatCat -- Float + deriving( Eq ) + -- See Note [Signed vs unsigned] at the end + +instance Outputable CmmType where + ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid) + +instance Outputable CmmCat where + ppr FloatCat = ptext $ sLit("F") + ppr _ = ptext $ sLit("I") +-- Temp Jan 08 +-- ppr FloatCat = ptext $ sLit("float") +-- ppr BitsCat = ptext $ sLit("bits") +-- ppr GcPtrCat = ptext $ sLit("gcptr") + +-- Why is CmmType stratified? For native code generation, +-- most of the time you just want to know what sort of register +-- to put the thing in, and for this you need to know how +-- many bits thing has and whether it goes in a floating-point +-- register. By contrast, the distinction between GcPtr and +-- GcNonPtr is of interest to only a few parts of the code generator. + +-------- Equality on CmmType -------------- +-- CmmType is *not* an instance of Eq; sometimes we care about the +-- Gc/NonGc distinction, and sometimes we don't +-- So we use an explicit function to force you to think about it +cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality +cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2 + +cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool + -- This equality is temporary; used in CmmLint + -- but the RTS files are not yet well-typed wrt pointers +cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2) + = c1 `weak_eq` c2 && w1==w2 + where + FloatCat `weak_eq` FloatCat = True + FloatCat `weak_eq` _other = False + _other `weak_eq` FloatCat = False + _word1 `weak_eq` _word2 = True -- Ignores GcPtr + +--- Simple operations on CmmType ----- +typeWidth :: CmmType -> Width +typeWidth (CmmType _ w) = w + +cmmBits, cmmFloat :: Width -> CmmType +cmmBits = CmmType BitsCat +cmmFloat = CmmType FloatCat + +-------- Common CmmTypes ------------ +-- Floats and words of specific widths +b8, b16, b32, b64, f32, f64 :: CmmType +b8 = cmmBits W8 +b16 = cmmBits W16 +b32 = cmmBits W32 +b64 = cmmBits W64 +f32 = cmmFloat W32 +f64 = cmmFloat W64 + +-- CmmTypes of native word widths +bWord, bHalfWord, gcWord :: CmmType +bWord = cmmBits wordWidth +bHalfWord = cmmBits halfWordWidth +gcWord = CmmType GcPtrCat wordWidth + +cInt, cLong :: CmmType +cInt = cmmBits cIntWidth +cLong = cmmBits cLongWidth + + +------------ Predicates ---------------- +isFloatType, isGcPtrType :: CmmType -> Bool +isFloatType (CmmType FloatCat _) = True +isFloatType _other = False + +isGcPtrType (CmmType GcPtrCat _) = True +isGcPtrType _other = False + +isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool +-- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise) +-- isFloat32 and 64 are obvious + +isWord64 (CmmType BitsCat W64) = True +isWord64 (CmmType GcPtrCat W64) = True +isWord64 _other = False + +isWord32 (CmmType BitsCat W32) = True +isWord32 (CmmType GcPtrCat W32) = True +isWord32 _other = False + +isFloat32 (CmmType FloatCat W32) = True +isFloat32 _other = False + +isFloat64 (CmmType FloatCat W64) = True +isFloat64 _other = False + +----------------------------------------------------------------------------- +-- Width +----------------------------------------------------------------------------- + +data Width = W8 | W16 | W32 | W64 + | W80 -- Extended double-precision float, + -- used in x86 native codegen only. + -- (we use Ord, so it'd better be in this order) + | W128 + deriving (Eq, Ord, Show) + +instance Outputable Width where + ppr rep = ptext (mrStr rep) + +mrStr :: Width -> LitString +mrStr W8 = sLit("W8") +mrStr W16 = sLit("W16") +mrStr W32 = sLit("W32") +mrStr W64 = sLit("W64") +mrStr W128 = sLit("W128") +mrStr W80 = sLit("W80") + + +-------- Common Widths ------------ +wordWidth, halfWordWidth :: Width +wordWidth | wORD_SIZE == 4 = W32 + | wORD_SIZE == 8 = W64 + | otherwise = panic "MachOp.wordRep: Unknown word size" + +halfWordWidth | wORD_SIZE == 4 = W16 + | wORD_SIZE == 8 = W32 + | otherwise = panic "MachOp.halfWordRep: Unknown word size" + +-- cIntRep is the Width for a C-language 'int' +cIntWidth, cLongWidth :: Width +#if SIZEOF_INT == 4 +cIntWidth = W32 +#elif SIZEOF_INT == 8 +cIntWidth = W64 +#endif + +#if SIZEOF_LONG == 4 +cLongWidth = W32 +#elif SIZEOF_LONG == 8 +cLongWidth = W64 +#endif + +widthInBits :: Width -> Int +widthInBits W8 = 8 +widthInBits W16 = 16 +widthInBits W32 = 32 +widthInBits W64 = 64 +widthInBits W128 = 128 +widthInBits W80 = 80 + +widthInBytes :: Width -> Int +widthInBytes W8 = 1 +widthInBytes W16 = 2 +widthInBytes W32 = 4 +widthInBytes W64 = 8 +widthInBytes W128 = 16 +widthInBytes W80 = 10 + +-- log_2 of the width in bytes, useful for generating shifts. +widthInLog :: Width -> Int +widthInLog W8 = 0 +widthInLog W16 = 1 +widthInLog W32 = 2 +widthInLog W64 = 3 +widthInLog W128 = 4 +widthInLog W80 = panic "widthInLog: F80" + + +----------------------------------------------------------------------------- +-- MachOp +----------------------------------------------------------------------------- + +{- +Implementation notes: + +It might suffice to keep just a width, without distinguishing between +floating and integer types. However, keeping the distinction will +help the native code generator to assign registers more easily. +-} + + +{- | +Machine-level primops; ones which we can reasonably delegate to the +native code generators to handle. Basically contains C's primops +and no others. + +Nomenclature: all ops indicate width and signedness, where +appropriate. Widths: 8\/16\/32\/64 means the given size, obviously. +Nat means the operation works on STG word sized objects. +Signedness: S means signed, U means unsigned. For operations where +signedness is irrelevant or makes no difference (for example +integer add), the signedness component is omitted. + +An exception: NatP is a ptr-typed native word. From the point of +view of the native code generators this distinction is irrelevant, +but the C code generator sometimes needs this info to emit the +right casts. +-} + +data MachOp + -- Integer operations (insensitive to signed/unsigned) + = MO_Add Width + | MO_Sub Width + | MO_Eq Width + | MO_Ne Width + | MO_Mul Width -- low word of multiply + + -- Signed multiply/divide + | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows + | MO_S_Quot Width -- signed / (same semantics as IntQuotOp) + | MO_S_Rem Width -- signed % (same semantics as IntRemOp) + | MO_S_Neg Width -- unary - + + -- Unsigned multiply/divide + | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows + | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp) + | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp) + + -- Signed comparisons + | MO_S_Ge Width + | MO_S_Le Width + | MO_S_Gt Width + | MO_S_Lt Width + + -- Unsigned comparisons + | MO_U_Ge Width + | MO_U_Le Width + | MO_U_Gt Width + | MO_U_Lt Width + + -- Floating point arithmetic + | MO_F_Add Width + | MO_F_Sub Width + | MO_F_Neg Width -- unary - + | MO_F_Mul Width + | MO_F_Quot Width + + -- Floating point comparison + | MO_F_Eq Width + | MO_F_Ne Width + | MO_F_Ge Width + | MO_F_Le Width + | MO_F_Gt Width + | MO_F_Lt Width + + -- Bitwise operations. Not all of these may be supported + -- at all sizes, and only integral Widths are valid. + | MO_And Width + | MO_Or Width + | MO_Xor Width + | MO_Not Width + | MO_Shl Width + | MO_U_Shr Width -- unsigned shift right + | MO_S_Shr Width -- signed shift right + + -- Conversions. Some of these will be NOPs. + -- Floating-point conversions use the signed variant. + | MO_SF_Conv Width Width -- Signed int -> Float + | MO_FS_Conv Width Width -- Float -> Signed int + | MO_SS_Conv Width Width -- Signed int -> Signed int + | MO_UU_Conv Width Width -- unsigned int -> unsigned int + | MO_FF_Conv Width Width -- Float -> Float + deriving (Eq, Show) + +pprMachOp :: MachOp -> SDoc +pprMachOp mo = text (show mo) + + + +-- ----------------------------------------------------------------------------- +-- Some common MachReps + +-- A 'wordRep' is a machine word on the target architecture +-- Specifically, it is the size of an Int#, Word#, Addr# +-- and the unit of allocation on the stack and the heap +-- Any pointer is also guaranteed to be a wordRep. + +mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32 + :: MachOp + +mo_wordAdd = MO_Add wordWidth +mo_wordSub = MO_Sub wordWidth +mo_wordEq = MO_Eq wordWidth +mo_wordNe = MO_Ne wordWidth +mo_wordMul = MO_Mul wordWidth +mo_wordSQuot = MO_S_Quot wordWidth +mo_wordSRem = MO_S_Rem wordWidth +mo_wordSNeg = MO_S_Neg wordWidth +mo_wordUQuot = MO_U_Quot wordWidth +mo_wordURem = MO_U_Rem wordWidth + +mo_wordSGe = MO_S_Ge wordWidth +mo_wordSLe = MO_S_Le wordWidth +mo_wordSGt = MO_S_Gt wordWidth +mo_wordSLt = MO_S_Lt wordWidth + +mo_wordUGe = MO_U_Ge wordWidth +mo_wordULe = MO_U_Le wordWidth +mo_wordUGt = MO_U_Gt wordWidth +mo_wordULt = MO_U_Lt wordWidth + +mo_wordAnd = MO_And wordWidth +mo_wordOr = MO_Or wordWidth +mo_wordXor = MO_Xor wordWidth +mo_wordNot = MO_Not wordWidth +mo_wordShl = MO_Shl wordWidth +mo_wordSShr = MO_S_Shr wordWidth +mo_wordUShr = MO_U_Shr wordWidth + +mo_u_8To32 = MO_UU_Conv W8 W32 +mo_s_8To32 = MO_SS_Conv W8 W32 +mo_u_16To32 = MO_UU_Conv W16 W32 +mo_s_16To32 = MO_SS_Conv W16 W32 + +mo_u_8ToWord = MO_UU_Conv W8 wordWidth +mo_s_8ToWord = MO_SS_Conv W8 wordWidth +mo_u_16ToWord = MO_UU_Conv W16 wordWidth +mo_s_16ToWord = MO_SS_Conv W16 wordWidth +mo_s_32ToWord = MO_SS_Conv W32 wordWidth +mo_u_32ToWord = MO_UU_Conv W32 wordWidth + +mo_WordTo8 = MO_UU_Conv wordWidth W8 +mo_WordTo16 = MO_UU_Conv wordWidth W16 +mo_WordTo32 = MO_UU_Conv wordWidth W32 + +mo_32To8 = MO_UU_Conv W32 W8 +mo_32To16 = MO_UU_Conv W32 W16 + + +-- ---------------------------------------------------------------------------- +-- isCommutableMachOp + +{- | +Returns 'True' if the MachOp has commutable arguments. This is used +in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isCommutableMachOp :: MachOp -> Bool +isCommutableMachOp mop = + case mop of + MO_Add _ -> True + MO_Eq _ -> True + MO_Ne _ -> True + MO_Mul _ -> True + MO_S_MulMayOflo _ -> True + MO_U_MulMayOflo _ -> True + MO_And _ -> True + MO_Or _ -> True + MO_Xor _ -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isAssociativeMachOp + +{- | +Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@) +This is used in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isAssociativeMachOp :: MachOp -> Bool +isAssociativeMachOp mop = + case mop of + MO_Add {} -> True -- NB: does not include + MO_Mul {} -> True -- floatint point! + MO_And {} -> True + MO_Or {} -> True + MO_Xor {} -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isComparisonMachOp + +{- | +Returns 'True' if the MachOp is a comparison. + +If in doubt, return False. This generates worse code on the +native routes, but is otherwise harmless. +-} +isComparisonMachOp :: MachOp -> Bool +isComparisonMachOp mop = + case mop of + MO_Eq _ -> True + MO_Ne _ -> True + MO_S_Ge _ -> True + MO_S_Le _ -> True + MO_S_Gt _ -> True + MO_S_Lt _ -> True + MO_U_Ge _ -> True + MO_U_Le _ -> True + MO_U_Gt _ -> True + MO_U_Lt _ -> True + MO_F_Eq {} -> True + MO_F_Ne {} -> True + MO_F_Ge {} -> True + MO_F_Le {} -> True + MO_F_Gt {} -> True + MO_F_Lt {} -> True + _other -> False + +-- ----------------------------------------------------------------------------- +-- Inverting conditions + +-- Sometimes it's useful to be able to invert the sense of a +-- condition. Not all conditional tests are invertible: in +-- particular, floating point conditionals cannot be inverted, because +-- there exist floating-point values which return False for both senses +-- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)). + +maybeInvertComparison :: MachOp -> Maybe MachOp +maybeInvertComparison op + = case op of -- None of these Just cases include floating point + MO_Eq r -> Just (MO_Ne r) + MO_Ne r -> Just (MO_Eq r) + MO_U_Lt r -> Just (MO_U_Ge r) + MO_U_Gt r -> Just (MO_U_Le r) + MO_U_Le r -> Just (MO_U_Gt r) + MO_U_Ge r -> Just (MO_U_Lt r) + MO_S_Lt r -> Just (MO_S_Ge r) + MO_S_Gt r -> Just (MO_S_Le r) + MO_S_Le r -> Just (MO_S_Gt r) + MO_S_Ge r -> Just (MO_S_Lt r) + MO_F_Eq r -> Just (MO_F_Ne r) + MO_F_Ne r -> Just (MO_F_Eq r) + MO_F_Ge r -> Just (MO_F_Le r) + MO_F_Le r -> Just (MO_F_Ge r) + MO_F_Gt r -> Just (MO_F_Lt r) + MO_F_Lt r -> Just (MO_F_Gt r) + _other -> Nothing + +-- ---------------------------------------------------------------------------- +-- machOpResultType + +{- | +Returns the MachRep of the result of a MachOp. +-} +machOpResultType :: MachOp -> [CmmType] -> CmmType +machOpResultType mop tys = + case mop of + MO_Add {} -> ty1 -- Preserve GC-ptr-hood + MO_Sub {} -> ty1 -- of first arg + MO_Mul r -> cmmBits r + MO_S_MulMayOflo r -> cmmBits r + MO_S_Quot r -> cmmBits r + MO_S_Rem r -> cmmBits r + MO_S_Neg r -> cmmBits r + MO_U_MulMayOflo r -> cmmBits r + MO_U_Quot r -> cmmBits r + MO_U_Rem r -> cmmBits r + + MO_Eq {} -> comparisonResultRep + MO_Ne {} -> comparisonResultRep + MO_S_Ge {} -> comparisonResultRep + MO_S_Le {} -> comparisonResultRep + MO_S_Gt {} -> comparisonResultRep + MO_S_Lt {} -> comparisonResultRep + + MO_U_Ge {} -> comparisonResultRep + MO_U_Le {} -> comparisonResultRep + MO_U_Gt {} -> comparisonResultRep + MO_U_Lt {} -> comparisonResultRep + + MO_F_Add r -> cmmFloat r + MO_F_Sub r -> cmmFloat r + MO_F_Mul r -> cmmFloat r + MO_F_Quot r -> cmmFloat r + MO_F_Neg r -> cmmFloat r + MO_F_Eq {} -> comparisonResultRep + MO_F_Ne {} -> comparisonResultRep + MO_F_Ge {} -> comparisonResultRep + MO_F_Le {} -> comparisonResultRep + MO_F_Gt {} -> comparisonResultRep + MO_F_Lt {} -> comparisonResultRep + + MO_And {} -> ty1 -- Used for pointer masking + MO_Or {} -> ty1 + MO_Xor {} -> ty1 + MO_Not r -> cmmBits r + MO_Shl r -> cmmBits r + MO_U_Shr r -> cmmBits r + MO_S_Shr r -> cmmBits r + + MO_SS_Conv _ to -> cmmBits to + MO_UU_Conv _ to -> cmmBits to + MO_FS_Conv _ to -> cmmBits to + MO_SF_Conv _ to -> cmmFloat to + MO_FF_Conv _ to -> cmmFloat to + where + (ty1:_) = tys + +comparisonResultRep :: CmmType +comparisonResultRep = bWord -- is it? + + +-- ----------------------------------------------------------------------------- +-- machOpArgReps + +-- | This function is used for debugging only: we can check whether an +-- application of a MachOp is "type-correct" by checking that the MachReps of +-- its arguments are the same as the MachOp expects. This is used when +-- linting a CmmExpr. + +machOpArgReps :: MachOp -> [Width] +machOpArgReps op = + case op of + MO_Add r -> [r,r] + MO_Sub r -> [r,r] + MO_Eq r -> [r,r] + MO_Ne r -> [r,r] + MO_Mul r -> [r,r] + MO_S_MulMayOflo r -> [r,r] + MO_S_Quot r -> [r,r] + MO_S_Rem r -> [r,r] + MO_S_Neg r -> [r] + MO_U_MulMayOflo r -> [r,r] + MO_U_Quot r -> [r,r] + MO_U_Rem r -> [r,r] + + MO_S_Ge r -> [r,r] + MO_S_Le r -> [r,r] + MO_S_Gt r -> [r,r] + MO_S_Lt r -> [r,r] + + MO_U_Ge r -> [r,r] + MO_U_Le r -> [r,r] + MO_U_Gt r -> [r,r] + MO_U_Lt r -> [r,r] + + MO_F_Add r -> [r,r] + MO_F_Sub r -> [r,r] + MO_F_Mul r -> [r,r] + MO_F_Quot r -> [r,r] + MO_F_Neg r -> [r] + MO_F_Eq r -> [r,r] + MO_F_Ne r -> [r,r] + MO_F_Ge r -> [r,r] + MO_F_Le r -> [r,r] + MO_F_Gt r -> [r,r] + MO_F_Lt r -> [r,r] + + MO_And r -> [r,r] + MO_Or r -> [r,r] + MO_Xor r -> [r,r] + MO_Not r -> [r] + MO_Shl r -> [r,wordWidth] + MO_U_Shr r -> [r,wordWidth] + MO_S_Shr r -> [r,wordWidth] + + MO_SS_Conv from _ -> [from] + MO_UU_Conv from _ -> [from] + MO_SF_Conv from _ -> [from] + MO_FS_Conv from _ -> [from] + MO_FF_Conv from _ -> [from] + + +------------------------------------------------------------------------- +{- Note [Signed vs unsigned] + ~~~~~~~~~~~~~~~~~~~~~~~~~ +Should a CmmType include a signed vs. unsigned distinction? + +This is very much like a "hint" in C-- terminology: it isn't necessary +in order to generate correct code, but it might be useful in that the +compiler can generate better code if it has access to higher-level +hints about data. This is important at call boundaries, because the +definition of a function is not visible at all of its call sites, so +the compiler cannot infer the hints. + +Here in Cmm, we're taking a slightly different approach. We include +the int vs. float hint in the MachRep, because (a) the majority of +platforms have a strong distinction between float and int registers, +and (b) we don't want to do any heavyweight hint-inference in the +native code backend in order to get good code. We're treating the +hint more like a type: our Cmm is always completely consistent with +respect to hints. All coercions between float and int are explicit. + +What about the signed vs. unsigned hint? This information might be +useful if we want to keep sub-word-sized values in word-size +registers, which we must do if we only have word-sized registers. + +On such a system, there are two straightforward conventions for +representing sub-word-sized values: + +(a) Leave the upper bits undefined. Comparison operations must + sign- or zero-extend both operands before comparing them, + depending on whether the comparison is signed or unsigned. + +(b) Always keep the values sign- or zero-extended as appropriate. + Arithmetic operations must narrow the result to the appropriate + size. + +A clever compiler might not use either (a) or (b) exclusively, instead +it would attempt to minimize the coercions by analysis: the same kind +of analysis that propagates hints around. In Cmm we don't want to +have to do this, so we plump for having richer types and keeping the +type information consistent. + +If signed/unsigned hints are missing from MachRep, then the only +choice we have is (a), because we don't know whether the result of an +operation should be sign- or zero-extended. + +Many architectures have extending load operations, which work well +with (b). To make use of them with (a), you need to know whether the +value is going to be sign- or zero-extended by an enclosing comparison +(for example), which involves knowing above the context. This is +doable but more complex. + +Further complicating the issue is foreign calls: a foreign calling +convention can specify that signed 8-bit quantities are passed as +sign-extended 32 bit quantities, for example (this is the case on the +PowerPC). So we *do* need sign information on foreign call arguments. + +Pros for adding signed vs. unsigned to MachRep: + + - It would let us use convention (b) above, and get easier + code generation for extending loads. + + - Less information required on foreign calls. + + - MachOp type would be simpler + +Cons: + + - More complexity + + - What is the MachRep for a VanillaReg? Currently it is + always wordRep, but now we have to decide whether it is + signed or unsigned. The same VanillaReg can thus have + different MachReps in different parts of the program. + + - Extra coercions cluttering up expressions. + +Currently for GHC, the foreign call point is moot, because we do our +own promotion of sub-word-sized values to word-sized values. The Int8 +type is represnted by an Int# which is kept sign-extended at all times +(this is slightly naughty, because we're making assumptions about the +C calling convention rather early on in the compiler). However, given +this, the cons outweigh the pros. + +-} +