From 176fa33f17dd78355cc572e006d2ab26898e2c69 Mon Sep 17 00:00:00 2001 From: "dias@eecs.harvard.edu" Date: Thu, 14 Aug 2008 12:40:27 +0000 Subject: [PATCH] Merging in the new codegen branch This merge does not turn on the new codegen (which only compiles a select few programs at this point), but it does introduce some changes to the old code generator. The high bits: 1. The Rep Swamp patch is finally here. The highlight is that the representation of types at the machine level has changed. Consequently, this patch contains updates across several back ends. 2. The new Stg -> Cmm path is here, although it appears to have a fair number of bugs lurking. 3. Many improvements along the CmmCPSZ path, including: o stack layout o some code for infotables, half of which is right and half wrong o proc-point splitting --- compiler/basicTypes/IdInfo.lhs | 1 + compiler/cmm/BlockId.hs | 15 +- compiler/cmm/CLabel.hs | 124 +- compiler/cmm/Cmm.hs | 127 +- compiler/cmm/CmmBrokenBlock.hs | 30 +- compiler/cmm/CmmCPS.hs | 23 +- compiler/cmm/CmmCPSGen.hs | 65 +- compiler/cmm/CmmCPSZ.hs | 58 +- compiler/cmm/CmmCallConv.hs | 99 +- compiler/cmm/CmmCommonBlockElimZ.hs | 38 +- compiler/cmm/CmmContFlowOpt.hs | 108 +- compiler/cmm/CmmCvt.hs | 160 ++- compiler/cmm/CmmExpr.hs | 991 ++++++++++++++-- compiler/cmm/CmmInfo.hs | 23 +- compiler/cmm/CmmLex.x | 14 +- compiler/cmm/CmmLint.hs | 90 +- compiler/cmm/CmmLive.hs | 10 +- compiler/cmm/CmmLiveZ.hs | 14 +- compiler/cmm/CmmOpt.hs | 103 +- compiler/cmm/CmmParse.y | 231 ++-- compiler/cmm/CmmProcPointZ.hs | 717 +++++++++-- compiler/cmm/CmmSpillReload.hs | 245 ++-- compiler/cmm/CmmUtils.hs | 94 +- compiler/cmm/CmmZipUtil.hs | 2 +- compiler/cmm/DFMonad.hs | 4 - compiler/cmm/MachOp.hs | 661 ----------- compiler/cmm/MkZipCfg.hs | 34 +- compiler/cmm/MkZipCfgCmm.hs | 137 ++- compiler/cmm/OptimizationFuel.hs | 6 +- compiler/cmm/PprC.hs | 326 ++--- compiler/cmm/PprCmm.hs | 135 ++- compiler/cmm/PprCmmZ.hs | 66 +- compiler/cmm/README | 3 - compiler/cmm/StackColor.hs | 31 +- compiler/cmm/ZipCfg.hs | 100 +- compiler/cmm/ZipCfgCmmRep.hs | 591 +++++----- compiler/cmm/ZipCfgExtras.hs | 12 +- compiler/cmm/ZipDataflow.hs | 129 +- compiler/codeGen/CgBindery.lhs | 7 +- compiler/codeGen/CgCallConv.hs | 23 +- compiler/codeGen/CgCase.lhs | 9 +- compiler/codeGen/CgClosure.lhs | 15 +- compiler/codeGen/CgCon.lhs | 15 +- compiler/codeGen/CgExpr.lhs | 35 +- compiler/codeGen/CgForeignCall.hs | 55 +- compiler/codeGen/CgHeapery.lhs | 40 +- compiler/codeGen/CgHpc.hs | 25 +- compiler/codeGen/CgInfoTbls.hs | 34 +- compiler/codeGen/CgMonad.lhs | 6 +- compiler/codeGen/CgPrimOp.hs | 331 +++--- compiler/codeGen/CgProf.hs | 45 +- compiler/codeGen/CgStackery.lhs | 1 - compiler/codeGen/CgTailCall.lhs | 7 +- compiler/codeGen/CgTicky.hs | 37 +- compiler/codeGen/CgUtils.hs | 189 ++- compiler/codeGen/ClosureInfo.lhs | 52 +- compiler/codeGen/CodeGen.lhs | 12 +- compiler/codeGen/SMRep.lhs | 33 +- compiler/codeGen/StgCmm.hs | 400 +++++++ compiler/codeGen/StgCmmBind.hs | 615 ++++++++++ compiler/codeGen/StgCmmBind.hs-boot | 6 + compiler/codeGen/StgCmmClosure.hs | 1100 +++++++++++++++++ compiler/codeGen/StgCmmCon.hs | 216 ++++ compiler/codeGen/StgCmmEnv.hs | 209 ++++ compiler/codeGen/StgCmmExpr.hs | 451 +++++++ compiler/codeGen/StgCmmForeign.hs | 316 +++++ compiler/codeGen/StgCmmGran.hs | 131 ++ compiler/codeGen/StgCmmHeap.hs | 519 ++++++++ compiler/codeGen/StgCmmHpc.hs | 83 ++ compiler/codeGen/StgCmmLayout.hs | 618 ++++++++++ compiler/codeGen/StgCmmMonad.hs | 601 ++++++++++ compiler/codeGen/StgCmmPrim.hs | 662 +++++++++++ compiler/codeGen/StgCmmProf.hs | 553 +++++++++ compiler/codeGen/StgCmmTicky.hs | 397 +++++++ compiler/codeGen/StgCmmUtils.hs | 902 ++++++++++++++ compiler/deSugar/DsForeign.lhs | 46 +- compiler/ghc.cabal.in | 17 +- compiler/main/DynFlags.hs | 8 +- compiler/main/HscMain.lhs | 58 +- compiler/nativeGen/AsmCodeGen.lhs | 15 +- compiler/nativeGen/MachCodeGen.hs | 1573 ++++++++++++------------- compiler/nativeGen/MachInstrs.hs | 172 +-- compiler/nativeGen/MachRegs.lhs | 154 ++- compiler/nativeGen/NCGMonad.hs | 7 +- compiler/nativeGen/PositionIndependentCode.hs | 17 +- compiler/nativeGen/PprMach.hs | 281 ++--- compiler/nativeGen/RegAllocInfo.hs | 47 +- compiler/prelude/ForeignCall.lhs | 2 + compiler/typecheck/TcForeign.lhs | 3 +- compiler/typecheck/TcRnDriver.lhs | 3 +- includes/Cmm.h | 9 +- includes/mkDerivedConstants.c | 35 +- rts/Exception.cmm | 4 +- rts/HeapStackCheck.cmm | 12 +- rts/PrimOps.cmm | 8 +- rts/Updates.cmm | 4 +- utils/genapply/GenApply.hs | 4 +- utils/runstdtest/runstdtest.prl | 4 +- 98 files changed, 12795 insertions(+), 4055 deletions(-) delete mode 100644 compiler/cmm/MachOp.hs create mode 100644 compiler/codeGen/StgCmm.hs create mode 100644 compiler/codeGen/StgCmmBind.hs create mode 100644 compiler/codeGen/StgCmmBind.hs-boot create mode 100644 compiler/codeGen/StgCmmClosure.hs create mode 100644 compiler/codeGen/StgCmmCon.hs create mode 100644 compiler/codeGen/StgCmmEnv.hs create mode 100644 compiler/codeGen/StgCmmExpr.hs create mode 100644 compiler/codeGen/StgCmmForeign.hs create mode 100644 compiler/codeGen/StgCmmGran.hs create mode 100644 compiler/codeGen/StgCmmHeap.hs create mode 100644 compiler/codeGen/StgCmmHpc.hs create mode 100644 compiler/codeGen/StgCmmLayout.hs create mode 100644 compiler/codeGen/StgCmmMonad.hs create mode 100644 compiler/codeGen/StgCmmPrim.hs create mode 100644 compiler/codeGen/StgCmmProf.hs create mode 100644 compiler/codeGen/StgCmmTicky.hs create mode 100644 compiler/codeGen/StgCmmUtils.hs diff --git a/compiler/basicTypes/IdInfo.lhs b/compiler/basicTypes/IdInfo.lhs index 1ebfcf9..26fe453 100644 --- a/compiler/basicTypes/IdInfo.lhs +++ b/compiler/basicTypes/IdInfo.lhs @@ -623,6 +623,7 @@ data CafInfo | NoCafRefs -- ^ A function or static constructor -- that refers to no CAFs. + deriving (Eq, Ord) -- | Assumes that the 'Id' has CAF references: definitely safe vanillaCafInfo :: CafInfo diff --git a/compiler/cmm/BlockId.hs b/compiler/cmm/BlockId.hs index fb9b7ca..2e4d452 100644 --- a/compiler/cmm/BlockId.hs +++ b/compiler/cmm/BlockId.hs @@ -1,9 +1,13 @@ module BlockId ( BlockId(..), mkBlockId -- ToDo: BlockId should be abstract, but it isn't yet - , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, mkBlockEnv + , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, mkBlockEnv, mapBlockEnv , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet, sizeBlockSet, mkBlockSet + , foldBlockEnv, blockLbl, infoTblLbl ) where +import CLabel +import IdInfo +import Name import Outputable import UniqFM import Unique @@ -36,6 +40,11 @@ instance Show BlockId where instance Outputable BlockId where ppr = ppr . getUnique +blockLbl :: BlockId -> CLabel +blockLbl id = mkEntryLabel (mkFCallName (getUnique id) "block") NoCafRefs + +infoTblLbl :: BlockId -> CLabel +infoTblLbl id = mkInfoTableLabel (mkFCallName (getUnique id) "block") NoCafRefs type BlockEnv a = UniqFM {- BlockId -} a emptyBlockEnv :: BlockEnv a @@ -46,6 +55,10 @@ lookupBlockEnv :: BlockEnv a -> BlockId -> Maybe a lookupBlockEnv = lookupUFM extendBlockEnv :: BlockEnv a -> BlockId -> a -> BlockEnv a extendBlockEnv = addToUFM +mapBlockEnv :: (a -> b) -> BlockEnv a -> BlockEnv b +mapBlockEnv = mapUFM +foldBlockEnv :: (BlockId -> a -> b -> b) -> b -> BlockEnv a -> b +foldBlockEnv f = foldUFM_Directly (\u x y -> f (mkBlockId u) x y) type BlockSet = UniqSet BlockId emptyBlockSet :: BlockSet diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index 1c33824..ffa93fb 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -51,6 +51,7 @@ module CLabel ( mkModuleInitLabel, mkPlainModuleInitLabel, + mkModuleInitTableLabel, mkSplitMarkerLabel, mkDirty_MUT_VAR_Label, @@ -67,6 +68,7 @@ module CLabel ( mkRtsSlowTickyCtrLabel, moduleRegdLabel, + moduleRegTableLabel, mkSelectorInfoLabel, mkSelectorEntryLabel, @@ -77,6 +79,7 @@ module CLabel ( mkRtsRetLabel, mkRtsCodeLabel, mkRtsDataLabel, + mkRtsGcPtrLabel, mkRtsInfoLabelFS, mkRtsEntryLabelFS, @@ -103,16 +106,18 @@ module CLabel ( mkHpcTicksLabel, mkHpcModuleNameLabel, + hasCAF, infoLblToEntryLbl, entryLblToInfoLbl, needsCDecl, isAsmTemp, maybeAsmTemp, externallyVisibleCLabel, isMathFun, - CLabelType(..), labelType, labelDynamic, + isCFunctionLabel, isGcPtrLabel, labelDynamic, pprCLabel ) where #include "HsVersions.h" +import IdInfo import StaticFlags import Packages import DataCon @@ -155,6 +160,7 @@ CLabel is an abstract type that supports the following operations: data CLabel = IdLabel -- A family of labels related to the Name -- definition of a particular Id or Con + CafInfo IdLabelInfo | CaseLabel -- A family of labels related to a particular @@ -177,7 +183,10 @@ data CLabel -- because we don't always recompile modules which depend on a module -- whose version has changed. - | PlainModuleInitLabel -- without the vesrion & way info + | PlainModuleInitLabel -- without the version & way info + Module + + | ModuleInitTableLabel -- table of imported modules to init Module | ModuleRegdLabel @@ -262,7 +271,8 @@ data RtsLabelInfo | RtsEntry LitString -- misc rts entry points | RtsRetInfo LitString -- misc rts ret info tables | RtsRet LitString -- misc rts return points - | RtsData LitString -- misc rts data bits, eg CHARLIKE_closure + | RtsData LitString -- misc rts data bits + | RtsGcPtr LitString -- GcPtrs eg CHARLIKE_closure | RtsCode LitString -- misc rts code | RtsInfoFS FastString -- misc rts info tables @@ -292,29 +302,29 @@ data DynamicLinkerLabelInfo -- Constructing CLabels -- These are always local: -mkSRTLabel name = IdLabel name SRT -mkSlowEntryLabel name = IdLabel name Slow -mkRednCountsLabel name = IdLabel name RednCounts +mkSRTLabel name c = IdLabel name c SRT +mkSlowEntryLabel name c = IdLabel name c Slow +mkRednCountsLabel name c = IdLabel name c RednCounts -- These have local & (possibly) external variants: -mkLocalClosureLabel name = IdLabel name Closure -mkLocalInfoTableLabel name = IdLabel name InfoTable -mkLocalEntryLabel name = IdLabel name Entry -mkLocalClosureTableLabel name = IdLabel name ClosureTable - -mkClosureLabel name = IdLabel name Closure -mkInfoTableLabel name = IdLabel name InfoTable -mkEntryLabel name = IdLabel name Entry -mkClosureTableLabel name = IdLabel name ClosureTable -mkLocalConInfoTableLabel con = IdLabel con ConInfoTable -mkLocalConEntryLabel con = IdLabel con ConEntry -mkLocalStaticInfoTableLabel con = IdLabel con StaticInfoTable -mkLocalStaticConEntryLabel con = IdLabel con StaticConEntry -mkConInfoTableLabel name = IdLabel name ConInfoTable -mkStaticInfoTableLabel name = IdLabel name StaticInfoTable - -mkConEntryLabel name = IdLabel name ConEntry -mkStaticConEntryLabel name = IdLabel name StaticConEntry +mkLocalClosureLabel name c = IdLabel name c Closure +mkLocalInfoTableLabel name c = IdLabel name c InfoTable +mkLocalEntryLabel name c = IdLabel name c Entry +mkLocalClosureTableLabel name c = IdLabel name c ClosureTable + +mkClosureLabel name c = IdLabel name c Closure +mkInfoTableLabel name c = IdLabel name c InfoTable +mkEntryLabel name c = IdLabel name c Entry +mkClosureTableLabel name c = IdLabel name c ClosureTable +mkLocalConInfoTableLabel c con = IdLabel con c ConInfoTable +mkLocalConEntryLabel c con = IdLabel con c ConEntry +mkLocalStaticInfoTableLabel c con = IdLabel con c StaticInfoTable +mkLocalStaticConEntryLabel c con = IdLabel con c StaticConEntry +mkConInfoTableLabel name c = IdLabel name c ConInfoTable +mkStaticInfoTableLabel name c = IdLabel name c StaticInfoTable + +mkConEntryLabel name c = IdLabel name c ConEntry +mkStaticConEntryLabel name c = IdLabel name c StaticConEntry mkLargeSRTLabel uniq = LargeSRTLabel uniq mkBitmapLabel uniq = LargeBitmapLabel uniq @@ -334,6 +344,9 @@ mkModuleInitLabel mod way = ModuleInitLabel mod way mkPlainModuleInitLabel :: Module -> CLabel mkPlainModuleInitLabel mod = PlainModuleInitLabel mod +mkModuleInitTableLabel :: Module -> CLabel +mkModuleInitTableLabel mod = ModuleInitTableLabel mod + -- Some fixed runtime system labels mkSplitMarkerLabel = RtsLabel (RtsCode (sLit "__stg_split_marker")) @@ -350,6 +363,7 @@ mkCAFBlackHoleInfoTableLabel = RtsLabel (RtsInfo (sLit "stg_CAF_BLACKHOLE")) mkRtsPrimOpLabel primop = RtsLabel (RtsPrimOp primop) moduleRegdLabel = ModuleRegdLabel +moduleRegTableLabel = ModuleInitTableLabel mkSelectorInfoLabel upd off = RtsLabel (RtsSelectorInfoTable upd off) mkSelectorEntryLabel upd off = RtsLabel (RtsSelectorEntry upd off) @@ -383,6 +397,7 @@ mkRtsRetInfoLabel str = RtsLabel (RtsRetInfo str) mkRtsRetLabel str = RtsLabel (RtsRet str) mkRtsCodeLabel str = RtsLabel (RtsCode str) mkRtsDataLabel str = RtsLabel (RtsData str) +mkRtsGcPtrLabel str = RtsLabel (RtsGcPtr str) mkRtsInfoLabelFS str = RtsLabel (RtsInfoFS str) mkRtsEntryLabelFS str = RtsLabel (RtsEntryFS str) @@ -422,9 +437,9 @@ mkDeadStripPreventer lbl = DeadStripPreventer lbl -- Converting between info labels and entry/ret labels. infoLblToEntryLbl :: CLabel -> CLabel -infoLblToEntryLbl (IdLabel n InfoTable) = IdLabel n Entry -infoLblToEntryLbl (IdLabel n ConInfoTable) = IdLabel n ConEntry -infoLblToEntryLbl (IdLabel n StaticInfoTable) = IdLabel n StaticConEntry +infoLblToEntryLbl (IdLabel n c InfoTable) = IdLabel n c Entry +infoLblToEntryLbl (IdLabel n c ConInfoTable) = IdLabel n c ConEntry +infoLblToEntryLbl (IdLabel n c StaticInfoTable) = IdLabel n c StaticConEntry infoLblToEntryLbl (CaseLabel n CaseReturnInfo) = CaseLabel n CaseReturnPt infoLblToEntryLbl (RtsLabel (RtsInfo s)) = RtsLabel (RtsEntry s) infoLblToEntryLbl (RtsLabel (RtsRetInfo s)) = RtsLabel (RtsRet s) @@ -433,9 +448,9 @@ infoLblToEntryLbl (RtsLabel (RtsRetInfoFS s)) = RtsLabel (RtsRetFS s) infoLblToEntryLbl _ = panic "CLabel.infoLblToEntryLbl" entryLblToInfoLbl :: CLabel -> CLabel -entryLblToInfoLbl (IdLabel n Entry) = IdLabel n InfoTable -entryLblToInfoLbl (IdLabel n ConEntry) = IdLabel n ConInfoTable -entryLblToInfoLbl (IdLabel n StaticConEntry) = IdLabel n StaticInfoTable +entryLblToInfoLbl (IdLabel n c Entry) = IdLabel n c InfoTable +entryLblToInfoLbl (IdLabel n c ConEntry) = IdLabel n c ConInfoTable +entryLblToInfoLbl (IdLabel n c StaticConEntry) = IdLabel n c StaticInfoTable entryLblToInfoLbl (CaseLabel n CaseReturnPt) = CaseLabel n CaseReturnInfo entryLblToInfoLbl (RtsLabel (RtsEntry s)) = RtsLabel (RtsInfo s) entryLblToInfoLbl (RtsLabel (RtsRet s)) = RtsLabel (RtsRetInfo s) @@ -444,6 +459,12 @@ entryLblToInfoLbl (RtsLabel (RtsRetFS s)) = RtsLabel (RtsRetInfoFS s) entryLblToInfoLbl l = pprPanic "CLabel.entryLblToInfoLbl" (pprCLabel l) -- ----------------------------------------------------------------------------- +-- Does a CLabel refer to a CAF? +hasCAF :: CLabel -> Bool +hasCAF (IdLabel _ MayHaveCafRefs Closure) = True +hasCAF _ = False + +-- ----------------------------------------------------------------------------- -- Does a CLabel need declaring before use or not? -- -- See wiki:Commentary/Compiler/Backends/PprC#Prototypes @@ -452,13 +473,14 @@ needsCDecl :: CLabel -> Bool -- False <=> it's pre-declared; don't bother -- don't bother declaring SRT & Bitmap labels, we always make sure -- they are defined before use. -needsCDecl (IdLabel _ SRT) = False +needsCDecl (IdLabel _ _ SRT) = False needsCDecl (LargeSRTLabel _) = False needsCDecl (LargeBitmapLabel _) = False -needsCDecl (IdLabel _ _) = True +needsCDecl (IdLabel _ _ _) = True needsCDecl (CaseLabel _ _) = True needsCDecl (ModuleInitLabel _ _) = True needsCDecl (PlainModuleInitLabel _) = True +needsCDecl (ModuleInitTableLabel _) = True needsCDecl ModuleRegdLabel = False needsCDecl (StringLitLabel _) = False @@ -520,12 +542,11 @@ externallyVisibleCLabel (StringLitLabel _) = False externallyVisibleCLabel (AsmTempLabel _) = False externallyVisibleCLabel (ModuleInitLabel _ _) = True externallyVisibleCLabel (PlainModuleInitLabel _)= True +externallyVisibleCLabel (ModuleInitTableLabel _)= False externallyVisibleCLabel ModuleRegdLabel = False externallyVisibleCLabel (RtsLabel _) = True externallyVisibleCLabel (ForeignLabel _ _ _) = True -externallyVisibleCLabel (IdLabel name SRT) = False - -- SRTs don't need to be external -externallyVisibleCLabel (IdLabel name _) = isExternalName name +externallyVisibleCLabel (IdLabel name _ _) = isExternalName name externallyVisibleCLabel (CC_Label _) = True externallyVisibleCLabel (CCS_Label _) = True externallyVisibleCLabel (DynamicLinkerLabel _ _) = False @@ -540,13 +561,25 @@ externallyVisibleCLabel (LargeSRTLabel _) = False -- For generating correct types in label declarations: data CLabelType - = CodeLabel - | DataLabel + = CodeLabel -- Address of some executable instructions + | DataLabel -- Address of data, not a GC ptr + | GcPtrLabel -- Address of a (presumably static) GC object + +isCFunctionLabel :: CLabel -> Bool +isCFunctionLabel lbl = case labelType lbl of + CodeLabel -> True + _other -> False + +isGcPtrLabel :: CLabel -> Bool +isGcPtrLabel lbl = case labelType lbl of + GcPtrLabel -> True + _other -> False labelType :: CLabel -> CLabelType labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel labelType (RtsLabel (RtsData _)) = DataLabel +labelType (RtsLabel (RtsGcPtr _)) = GcPtrLabel labelType (RtsLabel (RtsCode _)) = CodeLabel labelType (RtsLabel (RtsInfo _)) = DataLabel labelType (RtsLabel (RtsEntry _)) = CodeLabel @@ -563,20 +596,19 @@ labelType (CaseLabel _ CaseReturnInfo) = DataLabel labelType (CaseLabel _ _) = CodeLabel labelType (ModuleInitLabel _ _) = CodeLabel labelType (PlainModuleInitLabel _) = CodeLabel +labelType (ModuleInitTableLabel _) = DataLabel labelType (LargeSRTLabel _) = DataLabel labelType (LargeBitmapLabel _) = DataLabel - -labelType (IdLabel _ info) = idInfoLabelType info -labelType _ = DataLabel +labelType (IdLabel _ _ info) = idInfoLabelType info +labelType _ = DataLabel idInfoLabelType info = case info of InfoTable -> DataLabel - Closure -> DataLabel + Closure -> GcPtrLabel ConInfoTable -> DataLabel StaticInfoTable -> DataLabel ClosureTable -> DataLabel --- krc: aie! a ticky counter label is data RednCounts -> DataLabel _ -> CodeLabel @@ -593,7 +625,7 @@ labelDynamic :: PackageId -> CLabel -> Bool labelDynamic this_pkg lbl = case lbl of RtsLabel _ -> not opt_Static && (this_pkg /= rtsPackageId) -- i.e., is the RTS in a DLL or not? - IdLabel n k -> isDllName this_pkg n + IdLabel n _ k -> isDllName this_pkg n #if mingw32_TARGET_OS ForeignLabel _ _ d -> d #else @@ -603,6 +635,7 @@ labelDynamic this_pkg lbl = #endif ModuleInitLabel m _ -> not opt_Static && this_pkg /= (modulePackageId m) PlainModuleInitLabel m -> not opt_Static && this_pkg /= (modulePackageId m) + ModuleInitTableLabel m -> not opt_Static && this_pkg /= (modulePackageId m) -- Note that DynamicLinkerLabels do NOT require dynamic linking themselves. _ -> False @@ -720,6 +753,7 @@ pprCLbl (LargeBitmapLabel u) = text "b" <> pprUnique u <> pp_cSEP <> ptext (sLi pprCLbl (RtsLabel (RtsCode str)) = ptext str pprCLbl (RtsLabel (RtsData str)) = ptext str +pprCLbl (RtsLabel (RtsGcPtr str)) = ptext str pprCLbl (RtsLabel (RtsCodeFS str)) = ftext str pprCLbl (RtsLabel (RtsDataFS str)) = ftext str @@ -789,7 +823,7 @@ pprCLbl ModuleRegdLabel pprCLbl (ForeignLabel str _ _) = ftext str -pprCLbl (IdLabel name flavor) = ppr name <> ppIdFlavor flavor +pprCLbl (IdLabel name _ flavor) = ppr name <> ppIdFlavor flavor pprCLbl (CC_Label cc) = ppr cc pprCLbl (CCS_Label ccs) = ppr ccs @@ -799,6 +833,8 @@ pprCLbl (ModuleInitLabel mod way) <> char '_' <> text way pprCLbl (PlainModuleInitLabel mod) = ptext (sLit "__stginit_") <> ppr mod +pprCLbl (ModuleInitTableLabel mod) + = ptext (sLit "__stginittable_") <> ppr mod pprCLbl (HpcTicksLabel mod) = ptext (sLit "_hpc_tickboxes_") <> ppr mod <> ptext (sLit "_hpc") diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index 9dcaf84..5e52a57 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -7,21 +7,21 @@ ----------------------------------------------------------------------------- module Cmm ( - GenCmm(..), Cmm, RawCmm, - GenCmmTop(..), CmmTop, RawCmmTop, - ListGraph(..), + GenCmm(..), Cmm, RawCmm, + GenCmmTop(..), CmmTop, RawCmmTop, + ListGraph(..), cmmMapGraph, cmmTopMapGraph, cmmMapGraphM, cmmTopMapGraphM, - CmmInfo(..), UpdateFrame(..), + CmmInfo(..), UpdateFrame(..), CmmInfoTable(..), ClosureTypeInfo(..), ProfilingInfo(..), ClosureTypeTag, GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts, mapBlockStmts, CmmReturnInfo(..), - CmmStmt(..), CmmActual, CmmActuals, CmmFormal, CmmFormals, CmmKind, - CmmFormalsWithoutKinds, CmmFormalWithoutKind, - CmmKinded(..), + CmmStmt(..), CmmActual, CmmActuals, CmmFormal, CmmFormals, + HintedCmmFormal, HintedCmmFormals, HintedCmmActual, HintedCmmActuals, CmmSafety(..), - CmmCallTarget(..), - CmmStatic(..), Section(..), + CmmCallTarget(..), CallishMachOp(..), pprCallishMachOp, + ForeignHint(..), CmmHinted(..), + CmmStatic(..), Section(..), module CmmExpr, ) where @@ -29,10 +29,10 @@ module Cmm ( import BlockId import CmmExpr -import MachOp import CLabel import ForeignCall import SMRep + import ClosureInfo import Outputable import FastString @@ -46,7 +46,7 @@ import Data.Word -- with assembly-language labels. ----------------------------------------------------------------------------- --- Cmm, CmmTop, CmmBasicBlock +-- Cmm, CmmTop, CmmBasicBlock ----------------------------------------------------------------------------- -- A file is a list of top-level chunks. These may be arbitrarily @@ -59,7 +59,7 @@ import Data.Word -- -- We expect there to be two main instances of this type: -- (a) C--, i.e. populated with various C-- constructs --- (Cmm and RawCmm below) +-- (Cmm and RawCmm below) -- (b) Native code, populated with data/instructions -- -- A second family of instances based on ZipCfg is work in progress. @@ -72,7 +72,7 @@ data GenCmmTop d h g = CmmProc -- A procedure h -- Extra header such as the info table CLabel -- Used to generate both info & entry labels - CmmFormalsWithoutKinds -- Argument locals live on entry (C-- procedure params) + CmmFormals -- Argument locals live on entry (C-- procedure params) -- XXX Odd that there are no kinds, but there you are ---NR g -- Control-flow graph for the procedure's code @@ -164,11 +164,11 @@ data CmmInfoTable data ClosureTypeInfo = ConstrInfo ClosureLayout ConstrTag ConstrDescription - | FunInfo ClosureLayout C_SRT FunType FunArity ArgDescr SlowEntry - | ThunkInfo ClosureLayout C_SRT + | FunInfo ClosureLayout C_SRT FunArity ArgDescr SlowEntry + | ThunkInfo ClosureLayout C_SRT | ThunkSelectorInfo SelectorOffset C_SRT | ContInfo - [Maybe LocalReg] -- Forced stack parameters + [Maybe LocalReg] -- stack layout C_SRT data CmmReturnInfo = CmmMayReturn @@ -180,7 +180,6 @@ type ClosureTypeTag = StgHalfWord type ClosureLayout = (StgHalfWord, StgHalfWord) -- ptrs, nptrs type ConstrTag = StgHalfWord type ConstrDescription = CmmLit -type FunType = StgHalfWord type FunArity = StgHalfWord type SlowEntry = CmmLit -- We would like this to be a CLabel but @@ -201,19 +200,19 @@ data UpdateFrame = -- control to a new function. ----------------------------------------------------------------------------- -data CmmStmt +data CmmStmt -- Old-style = CmmNop | CmmComment FastString | CmmAssign CmmReg CmmExpr -- Assign to register | CmmStore CmmExpr CmmExpr -- Assign to memory location. Size is - -- given by cmmExprRep of the rhs. + -- given by cmmExprType of the rhs. | CmmCall -- A call (forign, native or primitive), with CmmCallTarget - CmmFormals -- zero or more results - CmmActuals -- zero or more arguments + HintedCmmFormals -- zero or more results + HintedCmmActuals -- zero or more arguments CmmSafety -- whether to build a continuation CmmReturnInfo @@ -228,27 +227,27 @@ data CmmStmt -- Undefined outside range, and when there's a Nothing | CmmJump CmmExpr -- Jump to another C-- function, - CmmActuals -- with these parameters. + HintedCmmActuals -- with these parameters. (parameters never used) | CmmReturn -- Return from a native C-- function, - CmmActuals -- with these return values. + HintedCmmActuals -- with these return values. (parameters never used) -type CmmKind = MachHint -data CmmKinded a = CmmKinded { kindlessCmm :: a, cmmKind :: CmmKind } - deriving (Eq) -type CmmActual = CmmKinded CmmExpr -type CmmFormal = CmmKinded LocalReg +type CmmActual = CmmExpr +type CmmFormal = LocalReg type CmmActuals = [CmmActual] type CmmFormals = [CmmFormal] -type CmmFormalWithoutKind = LocalReg -type CmmFormalsWithoutKinds = [CmmFormalWithoutKind] + +data CmmHinted a = CmmHinted { hintlessCmm :: a, cmmHint :: ForeignHint } + deriving( Eq ) + +type HintedCmmActuals = [HintedCmmActual] +type HintedCmmFormals = [HintedCmmFormal] +type HintedCmmFormal = CmmHinted CmmFormal +type HintedCmmActual = CmmHinted CmmActual data CmmSafety = CmmUnsafe | CmmSafe C_SRT -- | enable us to fold used registers over 'CmmActuals' and 'CmmFormals' -instance UserOfLocalRegs a => UserOfLocalRegs (CmmKinded a) where - foldRegsUsed f set (CmmKinded a _) = foldRegsUsed f set a - instance UserOfLocalRegs CmmStmt where foldRegsUsed f set s = stmt s set where stmt (CmmNop) = id @@ -267,13 +266,18 @@ instance UserOfLocalRegs CmmCallTarget where foldRegsUsed f set (CmmCallee e _) = foldRegsUsed f set e foldRegsUsed _ set (CmmPrim {}) = set -instance DefinerOfLocalRegs a => DefinerOfLocalRegs (CmmKinded a) where - foldRegsDefd f z (CmmKinded x _) = foldRegsDefd f z x +instance UserOfSlots CmmCallTarget where + foldSlotsUsed f set (CmmCallee e _) = foldSlotsUsed f set e + foldSlotsUsed _ set (CmmPrim {}) = set ---just look like a tuple, since it was a tuple before --- ... is that a good idea? --Isaac Dupree -instance (Outputable a) => Outputable (CmmKinded a) where - ppr (CmmKinded a k) = ppr (a, k) +instance UserOfLocalRegs a => UserOfLocalRegs (CmmHinted a) where + foldRegsUsed f set a = foldRegsUsed f set (hintlessCmm a) + +instance UserOfSlots a => UserOfSlots (CmmHinted a) where + foldSlotsUsed f set a = foldSlotsUsed f set (hintlessCmm a) + +instance DefinerOfLocalRegs a => DefinerOfLocalRegs (CmmHinted a) where + foldRegsDefd f set a = foldRegsDefd f set (hintlessCmm a) {- Discussion @@ -332,6 +336,51 @@ data CmmCallTarget -- code by the backend. deriving Eq + +data ForeignHint + = NoHint | AddrHint | SignedHint + deriving( Eq ) + -- Used to give extra per-argument or per-result + -- information needed by foreign calling conventions + + +-- CallishMachOps tend to be implemented by foreign calls in some backends, +-- so we separate them out. In Cmm, these can only occur in a +-- statement position, in contrast to an ordinary MachOp which can occur +-- anywhere in an expression. +data CallishMachOp + = MO_F64_Pwr + | MO_F64_Sin + | MO_F64_Cos + | MO_F64_Tan + | MO_F64_Sinh + | MO_F64_Cosh + | MO_F64_Tanh + | MO_F64_Asin + | MO_F64_Acos + | MO_F64_Atan + | MO_F64_Log + | MO_F64_Exp + | MO_F64_Sqrt + | MO_F32_Pwr + | MO_F32_Sin + | MO_F32_Cos + | MO_F32_Tan + | MO_F32_Sinh + | MO_F32_Cosh + | MO_F32_Tanh + | MO_F32_Asin + | MO_F32_Acos + | MO_F32_Atan + | MO_F32_Log + | MO_F32_Exp + | MO_F32_Sqrt + | MO_WriteBarrier + deriving (Eq, Show) + +pprCallishMachOp :: CallishMachOp -> SDoc +pprCallishMachOp mo = text (show mo) + ----------------------------------------------------------------------------- -- Static Data ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmBrokenBlock.hs b/compiler/cmm/CmmBrokenBlock.hs index aa8dbf8..ffb7f02 100644 --- a/compiler/cmm/CmmBrokenBlock.hs +++ b/compiler/cmm/CmmBrokenBlock.hs @@ -15,7 +15,7 @@ module CmmBrokenBlock ( adaptBlockToFormat, selectContinuations, ContFormat, - makeContinuationEntries, + makeContinuationEntries ) where #include "HsVersions.h" @@ -24,7 +24,6 @@ import BlockId import Cmm import CmmUtils import CLabel -import MachOp (MachHint(..)) import CgUtils (callerSaveVolatileRegs) import ClosureInfo @@ -69,14 +68,14 @@ data BrokenBlock -- | How a block could be entered -- See Note [An example of CPS conversion] data BlockEntryInfo - = FunctionEntry CmmInfo CLabel CmmFormalsWithoutKinds + = FunctionEntry CmmInfo CLabel CmmFormals -- ^ Block is the beginning of a function, parameters are: -- 1. Function header info -- 2. The function name -- 3. Aguments to function -- Only the formal parameters are live - | ContinuationEntry CmmFormalsWithoutKinds C_SRT Bool + | ContinuationEntry CmmFormals C_SRT Bool -- ^ Return point of a function call, parameters are: -- 1. return values (argument to continuation) -- 2. SRT for the continuation's info table @@ -124,7 +123,7 @@ f2(x, y) { // ProcPointEntry -} -data ContFormat = ContFormat CmmFormals C_SRT Bool +data ContFormat = ContFormat HintedCmmFormals C_SRT Bool -- ^ Arguments -- 1. return values (argument to continuation) -- 2. SRT for the continuation's info table @@ -138,15 +137,15 @@ data FinalStmt = FinalBranch BlockId -- ^ Same as 'CmmBranch'. Target must be a ControlEntry - | FinalReturn CmmActuals + | FinalReturn HintedCmmActuals -- ^ Same as 'CmmReturn'. Parameter is the return values. - | FinalJump CmmExpr CmmActuals + | FinalJump CmmExpr HintedCmmActuals -- ^ Same as 'CmmJump'. Parameters: -- 1. The function to call, -- 2. Arguments of the call - | FinalCall BlockId CmmCallTarget CmmFormals CmmActuals + | FinalCall BlockId CmmCallTarget HintedCmmFormals HintedCmmActuals C_SRT CmmReturnInfo Bool -- ^ Same as 'CmmCallee' followed by 'CmmGoto'. Parameters: -- 1. Target of the 'CmmGoto' (must be a 'ContinuationEntry') @@ -195,7 +194,7 @@ breakProc :: -- to create names of the new blocks with -> CmmInfo -- ^ Info table for the procedure -> CLabel -- ^ Name of the procedure - -> CmmFormalsWithoutKinds -- ^ Parameters of the procedure + -> CmmFormals -- ^ Parameters of the procedure -> [CmmBasicBlock] -- ^ Blocks of the procecure -- (First block is the entry block) -> [BrokenBlock] @@ -353,7 +352,7 @@ makeContinuationEntries formats case lookup ident formats of Nothing -> block Just (ContFormat formals srt is_gc) -> - BrokenBlock ident (ContinuationEntry (map kindlessCmm formals) srt is_gc) + BrokenBlock ident (ContinuationEntry (map hintlessCmm formals) srt is_gc) stmts targets exit adaptBlockToFormat :: [(BlockId, ContFormat)] @@ -383,20 +382,19 @@ adaptBlockToFormat formats unique target formals actuals srt ret is_gc adaptor_block = mk_adaptor_block adaptor_ident - (ContinuationEntry (map kindlessCmm formals) srt is_gc) - next format_formals + (ContinuationEntry (map hintlessCmm formals) srt is_gc) next adaptor_ident = BlockId unique - mk_adaptor_block :: BlockId -> BlockEntryInfo -> BlockId -> CmmFormals -> BrokenBlock - mk_adaptor_block ident entry next formals = + mk_adaptor_block :: BlockId -> BlockEntryInfo -> BlockId -> BrokenBlock + mk_adaptor_block ident entry next = BrokenBlock ident entry [] [next] exit where exit = FinalJump (CmmLit (CmmLabel (mkReturnPtLabel (getUnique next)))) (map formal_to_actual format_formals) - formal_to_actual (CmmKinded reg hint) - = (CmmKinded (CmmReg (CmmLocal reg)) hint) + formal_to_actual (CmmHinted reg hint) + = (CmmHinted (CmmReg (CmmLocal reg)) hint) -- TODO: Check if NoHint is right. We're -- jumping to a C-- function not a foreign one -- so it might always be right. diff --git a/compiler/cmm/CmmCPS.hs b/compiler/cmm/CmmCPS.hs index 025c127..f00a93c 100644 --- a/compiler/cmm/CmmCPS.hs +++ b/compiler/cmm/CmmCPS.hs @@ -26,7 +26,6 @@ import CmmCPSGen import CmmUtils import ClosureInfo -import MachOp import CLabel import SMRep import Constants @@ -118,7 +117,7 @@ cpsProc uniqSupply (CmmProc info ident params (ListGraph blocks)) = cps_procs block_uniques = uniques proc_uniques = map (map (map uniqsFromSupply . listSplitUniqSupply) . listSplitUniqSupply) $ listSplitUniqSupply uniqSupply2 - stack_use = CmmLocal (LocalReg stack_use_unique (cmmRegRep spReg) GCKindPtr) + stack_use = CmmLocal (LocalReg stack_use_unique (cmmRegType spReg)) stack_check_block_id = BlockId stack_check_block_unique stack_check_block = make_stack_check stack_check_block_id info stack_use (blockId $ head blocks) @@ -171,7 +170,7 @@ cpsProc uniqSupply (CmmProc info ident params (ListGraph blocks)) = cps_procs -- This is an association list instead of a UniqFM because -- CLabel's don't have a 'Uniqueable' instance. formats :: [(CLabel, -- key - (CmmFormalsWithoutKinds, -- arguments + (CmmFormals, -- arguments Maybe CLabel, -- label in top slot [Maybe LocalReg]))] -- slots formats = selectContinuationFormat live continuations @@ -200,7 +199,7 @@ make_stack_check stack_check_block_id info stack_use next_block_id = -- then great, well check the stack. CmmInfo (Just gc_block) _ _ -> [CmmCondBranch - (CmmMachOp (MO_U_Lt $ cmmRegRep spReg) + (CmmMachOp (MO_U_Lt (typeWidth (cmmRegType spReg))) [CmmReg stack_use, CmmReg spLimReg]) gc_block] -- If we aren't given a stack check handler, @@ -277,7 +276,7 @@ gatherBlocksIntoContinuation live proc_points blocks start = selectContinuationFormat :: BlockEnv CmmLive -> [Continuation (Either C_SRT CmmInfo)] - -> [(CLabel, (CmmFormalsWithoutKinds, Maybe CLabel, [Maybe LocalReg]))] + -> [(CLabel, (CmmFormals, Maybe CLabel, [Maybe LocalReg]))] selectContinuationFormat live continuations = map (\c -> (continuationLabel c, selectContinuationFormat' c)) continuations where @@ -301,7 +300,7 @@ selectContinuationFormat live continuations = unknown_block = panic "unknown BlockId in selectContinuationFormat" -processFormats :: [(CLabel, (CmmFormalsWithoutKinds, Maybe CLabel, [Maybe LocalReg]))] +processFormats :: [(CLabel, (CmmFormals, Maybe CLabel, [Maybe LocalReg]))] -> Maybe UpdateFrame -> [Continuation (Either C_SRT CmmInfo)] -> (WordOff, WordOff, [(CLabel, ContinuationFormat)]) @@ -330,7 +329,7 @@ processFormats formats update_frame continuations = update_size [] = 0 update_size (expr:exprs) = width + update_size exprs where - width = machRepByteWidth (cmmExprRep expr) `quot` wORD_SIZE + width = (widthInBytes $ typeWidth $ cmmExprType expr) `quot` wORD_SIZE -- TODO: it would be better if we had a machRepWordWidth -- TODO: get rid of "+ 1" etc. @@ -340,7 +339,7 @@ processFormats formats update_frame continuations = stack_size (Nothing:formats) = 1 + stack_size formats -- one dead word stack_size (Just reg:formats) = width + stack_size formats where - width = machRepByteWidth (localRegRep reg) `quot` wORD_SIZE + width = (widthInBytes $ typeWidth $ localRegType reg) `quot` wORD_SIZE -- TODO: it would be better if we had a machRepWordWidth continuationMaxStack :: [(CLabel, ContinuationFormat)] @@ -360,14 +359,14 @@ continuationMaxStack formats (Continuation _ label _ False blocks) = map stmt_arg_size (brokenBlockStmts block)) final_arg_size (FinalReturn args) = - argumentsSize (cmmExprRep . kindlessCmm) args + argumentsSize (cmmExprType . hintlessCmm) args final_arg_size (FinalJump _ args) = - argumentsSize (cmmExprRep . kindlessCmm) args + argumentsSize (cmmExprType . hintlessCmm) args final_arg_size (FinalCall next _ _ args _ _ True) = 0 final_arg_size (FinalCall next _ _ args _ _ False) = -- We have to account for the stack used when we build a frame -- for the *next* continuation from *this* continuation - argumentsSize (cmmExprRep . kindlessCmm) args + + argumentsSize (cmmExprType . hintlessCmm) args + continuation_frame_size next_format where next_format = maybe unknown_format id $ lookup next' formats @@ -376,7 +375,7 @@ continuationMaxStack formats (Continuation _ label _ False blocks) = final_arg_size _ = 0 stmt_arg_size (CmmJump _ args) = - argumentsSize (cmmExprRep . kindlessCmm) args + argumentsSize (cmmExprType . hintlessCmm) args stmt_arg_size (CmmCall _ _ _ (CmmSafe _) _) = panic "Safe call in processFormats" stmt_arg_size (CmmReturn _) = diff --git a/compiler/cmm/CmmCPSGen.hs b/compiler/cmm/CmmCPSGen.hs index dd1887f..c1e7143 100644 --- a/compiler/cmm/CmmCPSGen.hs +++ b/compiler/cmm/CmmCPSGen.hs @@ -17,7 +17,6 @@ import BlockId import Cmm import CLabel import CmmBrokenBlock -- Data types only -import MachOp import CmmUtils import CmmCallConv @@ -57,7 +56,7 @@ data Continuation info = info -- Left <=> Continuation created by the CPS -- Right <=> Function or Proc point CLabel -- Used to generate both info & entry labels - CmmFormalsWithoutKinds -- Argument locals live on entry (C-- procedure params) + CmmFormals -- Argument locals live on entry (C-- procedure params) Bool -- True <=> GC block so ignore stack size [BrokenBlock] -- Code, may be empty. The first block is -- the entry point. The order is otherwise initially @@ -70,7 +69,7 @@ data Continuation info = data ContinuationFormat = ContinuationFormat { - continuation_formals :: CmmFormalsWithoutKinds, + continuation_formals :: CmmFormals, continuation_label :: Maybe CLabel, -- The label occupying the top slot continuation_frame_size :: WordOff, -- Total frame size in words (not including arguments) continuation_stack :: [Maybe LocalReg] -- local reg offsets from stack top @@ -95,7 +94,7 @@ continuationToProc (max_stack, update_frame_size, formats) stack_use uniques curr_format = maybe unknown_block id $ lookup label formats unknown_block = panic "unknown BlockId in continuationToProc" curr_stack = continuation_frame_size curr_format - arg_stack = argumentsSize localRegRep formals + arg_stack = argumentsSize localRegType formals param_stmts :: [CmmStmt] param_stmts = function_entry curr_format @@ -145,8 +144,8 @@ continuationToProc (max_stack, update_frame_size, formats) stack_use uniques [BasicBlock new_next $ pack_continuation curr_format cont_format ++ tail_call (curr_stack - cont_stack) - (CmmLit $ CmmLabel $ toCLabel next) - arguments]) + (CmmLit $ CmmLabel $ toCLabel next) + arguments]) -- branches to blocks in the current function don't have to jump | otherwise @@ -194,7 +193,7 @@ continuationToProc (max_stack, update_frame_size, formats) stack_use uniques -- A return is a tail call to the stack top FinalReturn arguments -> tail_call curr_stack - (entryCode (CmmLoad (CmmReg spReg) wordRep)) + (entryCode (CmmLoad (CmmReg spReg) bWord)) arguments -- A tail call @@ -228,22 +227,22 @@ continuationToProc (max_stack, update_frame_size, formats) stack_use uniques foreignCall call_uniques (CmmPrim target) results arguments -formal_to_actual reg = CmmKinded (CmmReg (CmmLocal reg)) NoHint +formal_to_actual reg = CmmHinted (CmmReg (CmmLocal reg)) NoHint -foreignCall :: [Unique] -> CmmCallTarget -> CmmFormals -> CmmActuals -> [CmmStmt] +foreignCall :: [Unique] -> CmmCallTarget -> HintedCmmFormals -> HintedCmmActuals -> [CmmStmt] foreignCall uniques call results arguments = arg_stmts ++ saveThreadState ++ caller_save ++ [CmmCall (CmmCallee suspendThread CCallConv) - [ CmmKinded id PtrHint ] - [ CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint ] + [ CmmHinted id AddrHint ] + [ CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint ] CmmUnsafe CmmMayReturn, CmmCall call results new_args CmmUnsafe CmmMayReturn, CmmCall (CmmCallee resumeThread CCallConv) - [ CmmKinded new_base PtrHint ] - [ CmmKinded (CmmReg (CmmLocal id)) PtrHint ] + [ CmmHinted new_base AddrHint ] + [ CmmHinted (CmmReg (CmmLocal id)) AddrHint ] CmmUnsafe CmmMayReturn, -- Assign the result to BaseReg: we @@ -251,14 +250,14 @@ foreignCall uniques call results arguments = CmmAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base))] ++ caller_load ++ loadThreadState tso_unique ++ - [CmmJump (CmmReg spReg) (map (formal_to_actual . kindlessCmm) results)] + [CmmJump (CmmReg spReg) (map (formal_to_actual . hintlessCmm) results)] where (_, arg_stmts, new_args) = loadArgsIntoTemps argument_uniques arguments (caller_save, caller_load) = callerSaveVolatileRegs (Just [{-only system regs-}]) - new_base = LocalReg base_unique (cmmRegRep (CmmGlobal BaseReg)) GCKindNonPtr - id = LocalReg id_unique wordRep GCKindNonPtr + new_base = LocalReg base_unique (cmmRegType (CmmGlobal BaseReg)) + id = LocalReg id_unique bWord tso_unique : base_unique : id_unique : argument_uniques = uniques -- ----------------------------------------------------------------------------- @@ -288,7 +287,7 @@ loadThreadState tso_unique = CmmAssign (CmmLocal tso) stgCurrentTSO, -- Sp = tso->sp; CmmAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_SP) - wordRep), + bWord), -- SpLim = tso->stack + RESERVED_STACK_WORDS; CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal tso)) tso_STACK) rESERVED_STACK_WORDS) @@ -297,24 +296,24 @@ loadThreadState tso_unique = -- and load the current cost centre stack from the TSO when profiling: if opt_SccProfilingOn then [CmmStore curCCSAddr - (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) wordRep)] + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord)] else [] - where tso = LocalReg tso_unique wordRep GCKindNonPtr -- TODO FIXME NOW + where tso = LocalReg tso_unique bWord -- TODO FIXME NOW openNursery = [ -- Hp = CurrentNursery->free - 1; - CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free wordRep) (-1)), + CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free bWord) (-1)), -- HpLim = CurrentNursery->start + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; CmmAssign hpLim (cmmOffsetExpr - (CmmLoad nursery_bdescr_start wordRep) + (CmmLoad nursery_bdescr_start bWord) (cmmOffset (CmmMachOp mo_wordMul [ - CmmMachOp (MO_S_Conv I32 wordRep) - [CmmLoad nursery_bdescr_blocks I32], + CmmMachOp (MO_SS_Conv W32 wordWidth) + [CmmLoad nursery_bdescr_blocks b32], CmmLit (mkIntCLit bLOCK_SIZE) ]) (-1) @@ -358,17 +357,17 @@ currentNursery = CmmGlobal CurrentNursery -- for packing/unpacking continuations -- and entering/exiting functions -tail_call :: WordOff -> CmmExpr -> CmmActuals -> [CmmStmt] +tail_call :: WordOff -> CmmExpr -> HintedCmmActuals -> [CmmStmt] tail_call spRel target arguments = store_arguments ++ adjust_sp_reg spRel ++ jump where store_arguments = [stack_put spRel expr offset - | ((CmmKinded expr _), StackParam offset) <- argument_formats] ++ + | ((CmmHinted expr _), StackParam offset) <- argument_formats] ++ [global_put expr global - | ((CmmKinded expr _), RegisterParam global) <- argument_formats] + | ((CmmHinted expr _), RegisterParam global) <- argument_formats] jump = [CmmJump target arguments] - argument_formats = assignArguments (cmmExprRep . kindlessCmm) arguments + argument_formats = assignArguments (cmmExprType . hintlessCmm) arguments adjust_sp_reg spRel = if spRel == 0 @@ -386,8 +385,8 @@ gc_stack_check gc_block max_frame_size = check_stack_limit where check_stack_limit = [ CmmCondBranch - (CmmMachOp (MO_U_Lt $ cmmRegRep spReg) - [CmmRegOff spReg (-max_frame_size*wORD_SIZE), + (CmmMachOp (MO_U_Lt (typeWidth (cmmRegType spReg))) + [CmmRegOff spReg (-max_frame_size*wORD_SIZE), CmmReg spLimReg]) gc_block] @@ -437,7 +436,7 @@ pack_frame curr_frame_size next_frame_size next_frame_header frame_args = mkOffsets size (Nothing:exprs) = mkOffsets (size+1) exprs mkOffsets size (Just expr:exprs) = (expr, size):mkOffsets (size + width) exprs where - width = machRepByteWidth (cmmExprRep expr) `quot` wORD_SIZE + width = (widthInBytes $ typeWidth $ cmmExprType expr) `quot` wORD_SIZE -- TODO: it would be better if we had a machRepWordWidth spRel = curr_frame_size - next_frame_size @@ -461,7 +460,7 @@ function_entry (ContinuationFormat formals _ _ live_regs) [global_get reg global | (reg, RegisterParam global) <- argument_formats] - argument_formats = assignArguments (localRegRep) formals + argument_formats = assignArguments (localRegType) formals -- TODO: eliminate copy/paste with pack_continuation curr_offsets = mkOffsets label_size live_regs @@ -472,7 +471,7 @@ function_entry (ContinuationFormat formals _ _ live_regs) mkOffsets size (Nothing:regs) = mkOffsets (size+1) regs mkOffsets size (Just reg:regs) = (reg, size):mkOffsets (size + width) regs where - width = machRepByteWidth (localRegRep reg) `quot` wORD_SIZE + width = (widthInBytes $ typeWidth $ localRegType reg) `quot` wORD_SIZE -- TODO: it would be better if we had a machRepWordWidth ----------------------------------------------------------------------------- @@ -499,7 +498,7 @@ stack_get :: WordOff stack_get spRel reg offset = CmmAssign (CmmLocal reg) (CmmLoad (CmmRegOff spReg (wORD_SIZE*(spRel + offset))) - (localRegRep reg)) + (localRegType reg)) global_put :: CmmExpr -> GlobalReg -> CmmStmt global_put expr global = CmmAssign (CmmGlobal global) expr global_get :: LocalReg -> GlobalReg -> CmmStmt diff --git a/compiler/cmm/CmmCPSZ.hs b/compiler/cmm/CmmCPSZ.hs index b6b77f0..d8c9560 100644 --- a/compiler/cmm/CmmCPSZ.hs +++ b/compiler/cmm/CmmCPSZ.hs @@ -5,25 +5,19 @@ module CmmCPSZ ( protoCmmCPSZ ) where -import BlockId import Cmm import CmmCommonBlockElimZ -import CmmContFlowOpt import CmmProcPointZ import CmmSpillReload -import CmmTx import DFMonad import PprCmmZ() -import ZipCfg hiding (zip, unzip) import ZipCfgCmmRep import DynFlags import ErrUtils -import FiniteMap import HscTypes import Monad import Outputable -import UniqSupply ----------------------------------------------------------------------------- -- |Top level driver for the CPS pass @@ -38,7 +32,7 @@ protoCmmCPSZ hsc_env (Cmm tops) | otherwise = do let dflags = hsc_dflags hsc_env showPass dflags "CPSZ" - tops <- mapM (cpsTop hsc_env) tops + tops <- liftM concat $ mapM (cpsTop hsc_env) tops dumpIfSet_dyn dflags Opt_D_dump_cps_cmm "Post CPS Cmm" (ppr (Cmm tops)) return $ Cmm tops @@ -49,44 +43,48 @@ mutable reference cells in an 'HscEnv' and are global to one compiler session. -} -cpsTop :: HscEnv -> CmmTopZ -> IO CmmTopZ -cpsTop _ p@(CmmData {}) = return p +cpsTop :: HscEnv -> CmmTopZ -> IO [CmmTopZ] +cpsTop _ p@(CmmData {}) = return [p] cpsTop hsc_env (CmmProc h l args g) = do dump Opt_D_dump_cmmz "Pre Proc Points Added" g let callPPs = callProcPoints g - g <- return $ map_nodes id NotSpillOrReload id g - -- Change types of middle nodes to allow spill/reload - g <- dual_rewrite Opt_D_dump_cmmz "spills and reloads" + g <- dual_rewrite Opt_D_dump_cmmz "spills and reloads" (dualLivenessWithInsertion callPPs) g - (varSlots, g) <- trim g >>= return . elimSpillAndReload emptyFM - procPoints <- run $ minimalProcPointSet callPPs (runTx cmmCfgOptsZ g) + dump Opt_D_dump_cmmz "Pre common block elimination" g + g <- return $ elimCommonBlocks g + dump Opt_D_dump_cmmz "Post common block elimination" g + procPoints <- run $ minimalProcPointSet callPPs g + print $ "call procPoints: " ++ (showSDoc $ ppr procPoints) g <- run $ addProcPointProtocols callPPs procPoints g dump Opt_D_dump_cmmz "Post Proc Points Added" g - g <- return $ map_nodes id NotSpillOrReload id g - -- Change types of middle nodes to allow spill/reload g <- dual_rewrite Opt_D_dump_cmmz "spills and reloads" (dualLivenessWithInsertion procPoints) g -- Insert spills at defns; reloads at return points g <- run $ insertLateReloads' g -- Duplicate reloads just before uses dump Opt_D_dump_cmmz "Post late reloads" g - g <- trim g >>= dual_rewrite Opt_D_dump_cmmz "Dead Assignment Elimination" - (removeDeadAssignmentsAndReloads procPoints) + g <- dual_rewrite Opt_D_dump_cmmz "Dead Assignment Elimination" + (removeDeadAssignmentsAndReloads procPoints) g -- Remove redundant reloads (and any other redundant asst) - (_, g) <- trim g >>= return . elimSpillAndReload varSlots - gs <- run $ splitAtProcPoints args l procPoints g - gs `seq` dump Opt_D_dump_cmmz "Pre common block elimination" g - g <- return $ elimCommonBlocks g - dump Opt_D_dump_cmmz "Post common block elimination" g - return $ CmmProc h l args (runTx cmmCfgOptsZ g) + slotEnv <- run $ liveSlotAnal g + print $ "live slot analysis results: " ++ (showSDoc $ ppr slotEnv) + cafEnv <- run $ cafAnal g + print $ "live CAF analysis results: " ++ (showSDoc $ ppr cafEnv) + slotIGraph <- return $ igraph areaBuilder slotEnv g + print $ "slot IGraph: " ++ (showSDoc $ ppr slotIGraph) + print $ "graph before procPointMap: " ++ (showSDoc $ ppr g) + procPointMap <- run $ procPointAnalysis procPoints g + let areaMap = layout procPoints slotEnv g + g <- run $ manifestSP procPoints procPointMap areaMap g + procPointMap <- run $ procPointAnalysis procPoints g + gs <- run $ splitAtProcPoints l callPPs procPoints procPointMap slotEnv areaMap + (CmmProc h l args g) + return gs + --return $ [CmmProc h l args (runTx cmmCfgOptsZ g)] where dflags = hsc_dflags hsc_env dump f txt g = dumpIfSet_dyn dflags f txt (ppr g) run = runFuelIO (hsc_OptFuel hsc_env) dual_rewrite flag txt pass g = do dump flag ("Pre " ++ txt) g - g <- run $ pass (graphOfLGraph g) >>= lGraphOfGraph + g <- run $ pass g dump flag ("Post " ++ txt) $ g - return $ graphOfLGraph g - trim (Graph (ZLast (LastOther (LastBranch id))) blocks) = return $ LGraph id blocks - trim (Graph tail blocks) = - do entry <- liftM BlockId $ run $ getUniqueM - return $ LGraph entry (insertBlock (Block entry tail) blocks) + return g diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index d24d77a..5476eb8 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -9,29 +9,31 @@ module CmmCallConv ( ParamLocation(..), ArgumentFormat, assignArguments, + assignArgumentsPos, argumentsSize, ) where #include "HsVersions.h" import Cmm -import MachOp import SMRep import Constants import StaticFlags (opt_Unregisterised) +import Outputable import Panic -- Calculate the 'GlobalReg' or stack locations for function call -- parameters as used by the Cmm calling convention. -data ParamLocation +data ParamLocation a = RegisterParam GlobalReg - | StackParam WordOff + | StackParam a -type ArgumentFormat a = [(a, ParamLocation)] +type ArgumentFormat a b = [(a, ParamLocation b)] -assignArguments :: (a -> MachRep) -> [a] -> ArgumentFormat a +-- Stack parameters are returned as word offsets. +assignArguments :: (a -> CmmType) -> [a] -> ArgumentFormat a WordOff assignArguments f reps = assignments where (sizes, assignments) = unzip $ assignArguments' reps (negate (sum sizes)) availRegs @@ -40,20 +42,38 @@ assignArguments f reps = assignments (size,(r,assignment)):assignArguments' rs new_offset remaining where (assignment, new_offset, size, remaining) = - assign_reg (f r) offset availRegs + assign_reg False assign_slot_up (f r) offset availRegs + +-- | JD: For the new stack story, I want arguments passed on the stack to manifest as +-- positive offsets in a CallArea, not negative offsets from the stack pointer. +-- Also, I want byte offsets, not word offsets. +-- The first argument tells us whether we are assigning positions for call arguments +-- or return results. The distinction matters because we reserve different +-- global registers in each case. +assignArgumentsPos :: Bool -> (a -> CmmType) -> [a] -> ArgumentFormat a ByteOff +assignArgumentsPos isCall arg_ty reps = map cvt assignments + where + (sizes, assignments) = unzip $ assignArguments' reps 0 availRegs + assignArguments' [] _ _ = [] + assignArguments' (r:rs) offset avails = + (size,(r,assignment)):assignArguments' rs new_offset remaining + where + (assignment, new_offset, size, remaining) = + assign_reg isCall assign_slot_down (arg_ty r) offset avails + cvt (l, RegisterParam r) = (l, RegisterParam r) + cvt (l, StackParam off) = (l, StackParam $ off * wORD_SIZE) -argumentsSize :: (a -> MachRep) -> [a] -> WordOff +argumentsSize :: (a -> CmmType) -> [a] -> WordOff argumentsSize f reps = maximum (0 : map arg_top args) where args = assignArguments f reps - arg_top (a, StackParam offset) = -offset arg_top (_, RegisterParam _) = 0 ----------------------------------------------------------------------------- -- Local information about the registers available -type AvailRegs = ( [GlobalReg] -- available vanilla regs. +type AvailRegs = ( [VGcPtr -> GlobalReg] -- available vanilla regs. , [GlobalReg] -- floats , [GlobalReg] -- doubles , [GlobalReg] -- longs (int64 and word64) @@ -81,20 +101,49 @@ availRegs = (regList VanillaReg useVanillaRegs, where regList f max = map f [1 .. max] +-- Round the size of a local register up to the nearest word. slot_size :: LocalReg -> Int -slot_size reg = - ((machRepByteWidth (localRegRep reg) - 1) `div` wORD_SIZE) + 1 - -slot_size' :: MachRep -> Int -slot_size' reg = ((machRepByteWidth reg - 1) `div` wORD_SIZE) + 1 - -assign_reg :: MachRep -> WordOff -> AvailRegs -> (ParamLocation, WordOff, WordOff, AvailRegs) -assign_reg I8 off (v:vs, fs, ds, ls) = (RegisterParam $ v, off, 0, (vs, fs, ds, ls)) -assign_reg I16 off (v:vs, fs, ds, ls) = (RegisterParam $ v, off, 0, (vs, fs, ds, ls)) -assign_reg I32 off (v:vs, fs, ds, ls) = (RegisterParam $ v, off, 0, (vs, fs, ds, ls)) -assign_reg I64 off (vs, fs, ds, l:ls) = (RegisterParam $ l, off, 0, (vs, fs, ds, ls)) -assign_reg I128 off _ = panic "I128 is not a supported register type" -assign_reg F32 off (vs, f:fs, ds, ls) = (RegisterParam $ f, off, 0, (vs, fs, ds, ls)) -assign_reg F64 off (vs, fs, d:ds, ls) = (RegisterParam $ d, off, 0, (vs, fs, ds, ls)) -assign_reg F80 off _ = panic "F80 is not a supported register type" -assign_reg reg off _ = (StackParam $ off, off + size, size, ([], [], [], [])) where size = slot_size' reg +slot_size reg = slot_size' (typeWidth (localRegType reg)) + +slot_size' :: Width -> Int +slot_size' reg = ((widthInBytes reg - 1) `div` wORD_SIZE) + 1 + +type Assignment = (ParamLocation WordOff, WordOff, WordOff, AvailRegs) +type SlotAssigner = Width -> Int -> AvailRegs -> Assignment + +assign_reg :: Bool -> SlotAssigner -> CmmType -> WordOff -> AvailRegs -> Assignment +assign_reg isCall slot ty off avails + | isFloatType ty = assign_float_reg slot width off avails + | otherwise = assign_bits_reg isCall slot width off gcp avails + where + width = typeWidth ty + gcp | isGcPtrType ty = VGcPtr + | otherwise = VNonGcPtr + +-- Assigning a slot on a stack that grows up: +-- JD: I don't know why this convention stops using all the registers +-- after running out of one class of registers. +assign_slot_up :: SlotAssigner +assign_slot_up width off regs = + (StackParam $ off, off + size, size, ([], [], [], [])) where size = slot_size' width + +-- Assigning a slot on a stack that grows down: +assign_slot_down :: SlotAssigner +assign_slot_down width off regs = + (StackParam $ off + size, off + size, size, ([], [], [], [])) + where size = slot_size' width + +-- On calls, `node` is used to hold the closure that is entered, so we can't +-- pass arguments in that register. +assign_bits_reg _ _ W128 _ _ _ = panic "I128 is not a supported register type" +assign_bits_reg isCall assign_slot w off gcp regs@(v:vs, fs, ds, ls) = + if isCall && v gcp == node then + assign_bits_reg isCall assign_slot w off gcp (vs, fs, ds, ls) + else if widthInBits w <= widthInBits wordWidth then + (RegisterParam (v gcp), off, 0, (vs, fs, ds, ls)) + else assign_slot w off regs + +assign_float_reg _ W32 off (vs, f:fs, ds, ls) = (RegisterParam $ f, off, 0, (vs, fs, ds, ls)) +assign_float_reg _ W64 off (vs, fs, d:ds, ls) = (RegisterParam $ d, off, 0, (vs, fs, ds, ls)) +assign_float_reg _ W80 off _ = panic "F80 is not a supported register type" +assign_float_reg assign_slot width off r = assign_slot width off r diff --git a/compiler/cmm/CmmCommonBlockElimZ.hs b/compiler/cmm/CmmCommonBlockElimZ.hs index 97ec31d..2cef222 100644 --- a/compiler/cmm/CmmCommonBlockElimZ.hs +++ b/compiler/cmm/CmmCommonBlockElimZ.hs @@ -5,7 +5,6 @@ where import BlockId -import Cmm hiding (blockId) import CmmExpr import Prelude hiding (iterate, zip, unzip) import ZipCfg @@ -70,7 +69,7 @@ upd_graph g subst = map_nodes id middle last g where middle m = m last (LastBranch bid) = LastBranch $ sub bid last (LastCondBranch p t f) = cond p (sub t) (sub f) - last (LastCall t bid) = LastCall t $ liftM sub bid + last (LastCall t bid s) = LastCall t (liftM sub bid) s last (LastSwitch e bs) = LastSwitch e $ map (liftM sub) bs last l = l cond p t f = if t == f then LastBranch t else LastCondBranch p t f @@ -80,17 +79,15 @@ upd_graph g subst = map_nodes id middle last g -- The hashing is a bit arbitrary (the numbers are completely arbitrary), -- but it should be fast and good enough. hash_block :: CmmBlock -> Int -hash_block (Block _ t) = hash_tail t 0 +hash_block (Block _ _ t) = hash_tail t 0 where hash_mid (MidComment (FastString u _ _ _ _)) = u hash_mid (MidAssign r e) = hash_reg r + hash_e e hash_mid (MidStore e e') = hash_e e + hash_e e' - hash_mid (MidUnsafeCall t _ as) = hash_tgt t + hash_as as + hash_mid (MidUnsafeCall t _ as) = hash_tgt t + hash_lst hash_e as hash_mid (MidAddToContext e es) = hash_e e + hash_lst hash_e es - hash_mid (CopyIn _ fs _) = hash_fs fs - hash_mid (CopyOut _ as) = hash_as as hash_reg (CmmLocal l) = hash_local l hash_reg (CmmGlobal _) = 19 - hash_local (LocalReg _ _ _) = 117 + hash_local (LocalReg _ _) = 117 hash_e (CmmLit l) = hash_lit l hash_e (CmmLoad e _) = 67 + hash_e e hash_e (CmmReg r) = hash_reg r @@ -102,17 +99,14 @@ hash_block (Block _ t) = hash_tail t 0 hash_lit (CmmLabel _) = 119 -- ugh hash_lit (CmmLabelOff _ i) = 199 + i hash_lit (CmmLabelDiffOff _ _ i) = 299 + i - hash_tgt (CmmCallee e _) = hash_e e - hash_tgt (CmmPrim _) = 31 -- lots of these - hash_as = hash_lst $ hash_kinded hash_e - hash_fs = hash_lst $ hash_kinded hash_local - hash_kinded f (CmmKinded x _) = f x - hash_lst f = foldl (\z x -> f x + z) 0 + hash_tgt (ForeignTarget e _) = hash_e e + hash_tgt (PrimTarget _) = 31 -- lots of these + hash_lst f = foldl (\z x -> f x + z) (0::Int) hash_last (LastBranch _) = 23 -- would be great to hash these properly hash_last (LastCondBranch p _ _) = hash_e p - hash_last LastReturn = 17 -- better ideas? - hash_last (LastJump e) = hash_e e - hash_last (LastCall e _) = hash_e e + hash_last (LastReturn _) = 17 -- better ideas? + hash_last (LastJump e _) = hash_e e + hash_last (LastCall e _ _) = hash_e e hash_last (LastSwitch e _) = hash_e e hash_tail (ZLast LastExit) v = 29 + v * 2 hash_tail (ZLast (LastOther l)) v = hash_last l + (v * 2) @@ -130,7 +124,8 @@ lookupBid subst bid = case lookupFM subst bid of -- Equality on the body of a block, modulo a function mapping block IDs to block IDs. eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool -eqBlockBodyWith eqBid (Block _ t) (Block _ t') = eqTailWith eqBid t t' +eqBlockBodyWith eqBid (Block _ Nothing t) (Block _ Nothing t') = eqTailWith eqBid t t' +eqBlockBodyWith _ _ _ = False type CmmTail = ZTail Middle Last eqTailWith :: (BlockId -> BlockId -> Bool) -> CmmTail -> CmmTail -> Bool @@ -143,10 +138,11 @@ eqLastWith :: (BlockId -> BlockId -> Bool) -> Last -> Last -> Bool eqLastWith eqBid (LastBranch bid) (LastBranch bid') = eqBid bid bid' eqLastWith eqBid c@(LastCondBranch _ _ _) c'@(LastCondBranch _ _ _) = eqBid (cml_true c) (cml_true c') && eqBid (cml_false c) (cml_false c') -eqLastWith _ LastReturn LastReturn = True -eqLastWith _ (LastJump e) (LastJump e') = e == e' -eqLastWith eqBid c@(LastCall _ _) c'@(LastCall _ _) = - cml_target c == cml_target c' && eqMaybeWith eqBid (cml_cont c) (cml_cont c') +eqLastWith _ (LastReturn s) (LastReturn s') = s == s' +eqLastWith _ (LastJump e s) (LastJump e' s') = e == e' && s == s' +eqLastWith eqBid c@(LastCall _ _ s) c'@(LastCall _ _ s') = + cml_target c == cml_target c' && eqMaybeWith eqBid (cml_cont c) (cml_cont c') && + s == s' eqLastWith eqBid (LastSwitch e bs) (LastSwitch e' bs') = e == e' && eqLstWith (eqMaybeWith eqBid) bs bs' eqLastWith _ _ _ = False diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index 6909250..320b1e7 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -2,7 +2,7 @@ module CmmContFlowOpt ( runCmmOpts, cmmCfgOpts, cmmCfgOptsZ , branchChainElimZ, removeUnreachableBlocksZ, predMap - , replaceLabelsZ + , replaceLabelsZ, runCmmContFlowOptsZs ) where @@ -10,27 +10,28 @@ import BlockId import Cmm import CmmTx import qualified ZipCfg as G +import ZipCfg import ZipCfgCmmRep import Maybes import Monad +import Outputable import Panic import Prelude hiding (unzip, zip) import Util import UniqFM ------------------------------------ -mapProcs :: Tx (GenCmmTop d h s) -> Tx (GenCmm d h s) -mapProcs f (Cmm tops) = fmap Cmm (mapTx f tops) - +runCmmContFlowOptsZs :: [CmmZ] -> [CmmZ] +runCmmContFlowOptsZs prog + = [ runTx (runCmmOpts cmmCfgOptsZ) cmm_top + | cmm_top <- prog ] ------------------------------------- cmmCfgOpts :: Tx (ListGraph CmmStmt) cmmCfgOptsZ :: Tx CmmGraph cmmCfgOpts = branchChainElim -- boring, but will get more exciting later -cmmCfgOptsZ = - branchChainElimZ `seqTx` blockConcatZ `seqTx` removeUnreachableBlocksZ +cmmCfgOptsZ = branchChainElimZ `seqTx` blockConcatZ `seqTx` removeUnreachableBlocksZ -- Here branchChainElim can ultimately be replaced -- with a more exciting combination of optimisations @@ -41,10 +42,15 @@ optGraph :: Tx g -> Tx (GenCmmTop d h g) optGraph _ top@(CmmData {}) = noTx top optGraph opt (CmmProc info lbl formals g) = fmap (CmmProc info lbl formals) (opt g) +------------------------------------ +mapProcs :: Tx (GenCmmTop d h s) -> Tx (GenCmm d h s) +mapProcs f (Cmm tops) = fmap Cmm (mapTx f tops) + ---------------------------------------------------------------- branchChainElim :: Tx (ListGraph CmmStmt) --- Remove any basic block of the form L: goto L', --- and replace L with L' everywhere else +-- If L is not captured in an instruction, we can remove any +-- basic block of the form L: goto L', and replace L with L' everywhere else. +-- How does L get captured? In a CallArea. branchChainElim (ListGraph blocks) | null lone_branch_blocks -- No blocks to remove = noTx (ListGraph blocks) @@ -74,73 +80,100 @@ replaceLabels env (BasicBlock id stmts) branchChainElimZ :: Tx CmmGraph -- Remove any basic block of the form L: goto L', -- and replace L with L' everywhere else -branchChainElimZ g@(G.LGraph eid _) +branchChainElimZ g@(G.LGraph eid args _) | null lone_branch_blocks -- No blocks to remove = noTx g | otherwise - = aTx $ replaceLabelsZ env $ G.of_block_list eid (self_branches ++ others) + = aTx $ replaceLabelsZ env $ G.of_block_list eid args (self_branches ++ others) where (lone_branch_blocks, others) = partitionWith isLoneBranchZ (G.to_block_list g) - env = mkClosureBlockEnv lone_branch_blocks + env = mkClosureBlockEnvZ lone_branch_blocks self_branches = let loop_to (id, _) = if lookup id == id then - Just (G.Block id (G.ZLast (G.mkBranchNode id))) + Just (G.Block id Nothing (G.ZLast (G.mkBranchNode id))) else Nothing in mapMaybe loop_to lone_branch_blocks lookup id = lookupBlockEnv env id `orElse` id isLoneBranchZ :: CmmBlock -> Either (BlockId, BlockId) CmmBlock -isLoneBranchZ (G.Block id (G.ZLast (G.LastOther (LastBranch target)))) +isLoneBranchZ (G.Block id Nothing (G.ZLast (G.LastOther (LastBranch target)))) | id /= target = Left (id,target) isLoneBranchZ other = Right other -- An infinite loop is not a link in a branch chain! replaceLabelsZ :: BlockEnv BlockId -> CmmGraph -> CmmGraph -replaceLabelsZ env = replace_eid . G.map_nodes id id last +replaceLabelsZ env = replace_eid . G.map_nodes id middle last where - replace_eid (G.LGraph eid blocks) = G.LGraph (lookup eid) blocks - last (LastBranch id) = LastBranch (lookup id) - last (LastCondBranch e ti fi) = LastCondBranch e (lookup ti) (lookup fi) - last (LastSwitch e tbl) = LastSwitch e (map (fmap lookup) tbl) - last (LastCall tgt (Just id)) = LastCall tgt (Just $ lookup id) - last exit_jump_return = exit_jump_return - lookup id = lookupBlockEnv env id `orElse` id + replace_eid (G.LGraph eid off blocks) = G.LGraph (lookup eid) off blocks + middle m@(MidComment _) = m + middle (MidAssign r e) = MidAssign r (exp e) + middle (MidStore addr e) = MidStore (exp addr) (exp e) + middle (MidUnsafeCall tgt fs as) = MidUnsafeCall (midcall tgt) fs (map exp as) + middle (MidAddToContext e es) = MidAddToContext (exp e) (map exp es) + last (LastBranch id) = LastBranch (lookup id) + last (LastCondBranch e ti fi) = LastCondBranch (exp e) (lookup ti) (lookup fi) + last (LastSwitch e tbl) = LastSwitch (exp e) (map (fmap lookup) tbl) + last (LastCall tgt mb_id s) = LastCall (exp tgt) (fmap lookup mb_id) s + last (LastJump e s) = LastJump (exp e) s + last (LastReturn s) = LastReturn s + midcall (ForeignTarget e c) = ForeignTarget (exp e) c + midcall m@(PrimTarget _) = m + exp e@(CmmLit _) = e + exp (CmmLoad addr ty) = CmmLoad (exp addr) ty + exp e@(CmmReg _) = e + exp (CmmMachOp op es) = CmmMachOp op $ map exp es + exp e@(CmmRegOff _ _) = e + exp (CmmStackSlot (CallArea (Young id)) i) = + CmmStackSlot (CallArea (Young (lookup id))) i + exp e@(CmmStackSlot _ _) = e + lookup id = fmap lookup (lookupBlockEnv env id) `orElse` id ---------------------------------------------------------------- -- Build a map from a block to its set of predecessors. Very useful. predMap :: G.LastNode l => G.LGraph m l -> BlockEnv BlockSet predMap g = G.fold_blocks add_preds emptyBlockEnv g -- find the back edges where add_preds b env = foldl (add b) env (G.succs b) - add (G.Block bid _) env b' = + add (G.Block bid _ _) env b' = extendBlockEnv env b' $ extendBlockSet (lookupBlockEnv env b' `orElse` emptyBlockSet) bid ---------------------------------------------------------------- -blockConcatZ :: Tx CmmGraph -- If a block B branches to a label L, and L has no other predecessors, -- then we can splice the block starting with L onto the end of B. -- Because this optmization can be inhibited by unreachable blocks, --- we bundle it with a pass that drops unreachable blocks. +-- we first take a pass to drops unreachable blocks. -- Order matters, so we work bottom up (reverse postorder DFS). +-- +-- To ensure correctness, we have to make sure that the BlockId of the block +-- we are about to eliminate is not named in another instruction +-- (except an adjacent stack pointer adjustment, which we expect and also eliminate). +-- For +-- -- Note: This optimization does _not_ subsume branch chain elimination. -blockConcatZ = removeUnreachableBlocksZ `seqTx` blockConcatZ' +blockConcatZ :: Tx CmmGraph +blockConcatZ = removeUnreachableBlocksZ `seqTx` blockConcatZ' blockConcatZ' :: Tx CmmGraph -blockConcatZ' g@(G.LGraph eid blocks) = tx $ G.LGraph eid blocks' - where (changed, blocks') = foldr maybe_concat (False, blocks) $ G.postorder_dfs g - maybe_concat b@(G.Block bid _) (changed, blocks') = - let unchanged = (changed, extendBlockEnv blocks' bid b) +blockConcatZ' g@(G.LGraph eid off blocks) = + tx $ pprTrace "concatMap" (ppr concatMap) $ replaceLabelsZ concatMap $ G.LGraph eid off blocks' + where (changed, blocks', concatMap) = + foldr maybe_concat (False, blocks, emptyBlockEnv) $ G.postorder_dfs g + maybe_concat b@(G.Block bid _ _) (changed, blocks', concatMap) = + let unchanged = (changed, extendBlockEnv blocks' bid b, concatMap) in case G.goto_end $ G.unzip b of (h, G.LastOther (LastBranch b')) -> if num_preds b' == 1 then - (True, extendBlockEnv blocks' bid $ splice blocks' h b') + (True, extendBlockEnv blocks' bid $ splice blocks' h b', + extendBlockEnv concatMap b' bid) else unchanged _ -> unchanged num_preds bid = liftM sizeBlockSet (lookupBlockEnv backEdges bid) `orElse` 0 backEdges = predMap g splice blocks' h bid' = case lookupBlockEnv blocks' bid' of - Just (G.Block _ t) -> G.zip $ G.ZBlock h t + Just (G.Block _ Nothing t) -> G.zip $ G.ZBlock h t + Just (G.Block _ (Just _) _) -> + panic "trying to concatenate but successor block has incoming args" Nothing -> panic "unknown successor block" tx = if changed then aTx else noTx ---------------------------------------------------------------- @@ -151,9 +184,16 @@ mkClosureBlockEnv blocks = mkBlockEnv $ map follow blocks endChain orig id = case lookupBlockEnv singleEnv id of Just id' | id /= orig -> endChain orig id' _ -> id +mkClosureBlockEnvZ :: [(BlockId, BlockId)] -> BlockEnv BlockId +mkClosureBlockEnvZ blocks = mkBlockEnv $ map follow blocks + where singleEnv = mkBlockEnv blocks + follow (id, next) = (id, endChain id next) + endChain orig id = case lookupBlockEnv singleEnv id of + Just id' | id /= orig -> endChain orig id' + _ -> id ---------------------------------------------------------------- removeUnreachableBlocksZ :: Tx CmmGraph -removeUnreachableBlocksZ g@(G.LGraph id blocks) = - if length blocks' < sizeUFM blocks then aTx $ G.of_block_list id blocks' +removeUnreachableBlocksZ g@(G.LGraph id off blocks) = + if length blocks' < sizeUFM blocks then aTx $ G.of_block_list id off blocks' else noTx g where blocks' = G.postorder_dfs g diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs index 0bfa396..0f0ccd2 100644 --- a/compiler/cmm/CmmCvt.hs +++ b/compiler/cmm/CmmCvt.hs @@ -5,14 +5,14 @@ module CmmCvt where import BlockId +import ClosureInfo (C_SRT(..)) import Cmm import CmmExpr -import MkZipCfg import MkZipCfgCmm hiding (CmmGraph) import ZipCfgCmmRep -- imported for reverse conversion import CmmZipUtil +import ForeignCall import PprCmm() -import PprCmmZ() import qualified ZipCfg as G import FastString @@ -31,25 +31,26 @@ cmmToZgraph (Cmm tops) = liftM Cmm $ mapM mapTop tops where mapTop (CmmProc h l args g) = toZgraph (showSDoc $ ppr l) args g >>= return . CmmProc h l args mapTop (CmmData s ds) = return $ CmmData s ds -cmmOfZgraph = cmmMapGraph ofZgraph +cmmOfZgraph = cmmMapGraph ofZgraph - -toZgraph :: String -> CmmFormalsWithoutKinds -> ListGraph CmmStmt -> UniqSM CmmGraph -toZgraph _ _ (ListGraph []) = lgraphOfAGraph emptyAGraph +toZgraph :: String -> CmmFormals -> ListGraph CmmStmt -> UniqSM CmmGraph +toZgraph _ _ (ListGraph []) = lgraphOfAGraph 0 emptyAGraph toZgraph fun_name args g@(ListGraph (BasicBlock id ss : other_blocks)) = - labelAGraph id $ mkMiddles (mkEntry area undefined args) <*> - mkStmts ss <*> foldr addBlock emptyAGraph other_blocks - where addBlock (BasicBlock id ss) g = mkLabel id <*> mkStmts ss <*> g + let (offset, entry) = mkEntry id Native args in + labelAGraph id offset $ + entry <*> mkStmts ss <*> foldr addBlock emptyAGraph other_blocks + where addBlock (BasicBlock id ss) g = mkLabel id Nothing <*> mkStmts ss <*> g mkStmts (CmmNop : ss) = mkNop <*> mkStmts ss mkStmts (CmmComment s : ss) = mkComment s <*> mkStmts ss mkStmts (CmmAssign l r : ss) = mkAssign l r <*> mkStmts ss mkStmts (CmmStore l r : ss) = mkStore l r <*> mkStmts ss mkStmts (CmmCall (CmmCallee f conv) res args (CmmSafe srt) CmmMayReturn : ss) = - mkCall f conv res args srt <*> mkStmts ss + mkCall f conv (map hintlessCmm res) (map hintlessCmm args) srt <*> mkStmts ss mkStmts (CmmCall (CmmPrim {}) _ _ (CmmSafe _) _ : _) = panic "safe call to a primitive CmmPrim CallishMachOp" mkStmts (CmmCall f res args CmmUnsafe CmmMayReturn : ss) = - mkUnsafeCall f res args <*> mkStmts ss + mkUnsafeCall (convert_target f res args) + (strip_hints res) (strip_hints args) <*> mkStmts ss mkStmts (CmmCondBranch e l : fbranch) = mkCmmIfThenElse e (mkBranch l) (mkStmts fbranch) mkStmts (last : []) = mkLast last @@ -57,32 +58,41 @@ toZgraph fun_name args g@(ListGraph (BasicBlock id ss : other_blocks)) = mkStmts (_ : _ : _) = bad "last node not at end" bad msg = pprPanic (msg ++ " in function " ++ fun_name) (ppr g) mkLast (CmmCall (CmmCallee f conv) [] args _ CmmNeverReturns) = - mkFinalCall f conv args + mkFinalCall f conv $ map hintlessCmm args mkLast (CmmCall (CmmPrim {}) _ _ _ CmmNeverReturns) = panic "Call to CmmPrim never returns?!" mkLast (CmmSwitch scrutinee table) = mkSwitch scrutinee table - mkLast (CmmJump tgt args) = mkJump area tgt args - mkLast (CmmReturn ress) = mkReturn area ress + -- SURELY, THESE HINTLESS ARGS ARE WRONG AND WILL BE FIXED WHEN CALLING + -- CONVENTIONS ARE HONORED? + mkLast (CmmJump tgt args) = mkJump tgt $ map hintlessCmm args + mkLast (CmmReturn ress) = mkReturn $ map hintlessCmm ress mkLast (CmmBranch tgt) = mkBranch tgt mkLast (CmmCall _f (_:_) _args _ CmmNeverReturns) = panic "Call never returns but has results?!" mkLast _ = panic "fell off end of block" - -- The entry, jump, and return areas should be the same. - -- This code is horrible, but there's no point trying to fix it until we've figured - -- out our interface for calling conventions. - -- All return statements are required to use return areas of equal size. - -- This isn't necessarily required to write correct programs, but it's sane. - area = case foldr retBlock (retStmts ss Nothing) other_blocks of - Just (as, _) -> mkCallArea id as $ Just args - Nothing -> mkCallArea id [] $ Just args - retBlock (BasicBlock _ ss) z = retStmts ss z - retStmts [CmmReturn ress] z@(Just (_, n)) = - if size ress == n then z - else panic "return statements in C-- procs must return the same results" - retStmts [CmmReturn ress] Nothing = Just (ress, size ress) - retStmts (_ : rst) z = retStmts rst z - retStmts [] z = z - size args = areaSize $ mkCallArea id args Nothing + +strip_hints :: [CmmHinted a] -> [a] +strip_hints = map hintlessCmm + +convert_target :: CmmCallTarget -> HintedCmmFormals -> HintedCmmActuals -> MidCallTarget +convert_target (CmmCallee e cc) ress args = ForeignTarget e (ForeignConvention cc (map cmmHint args) (map cmmHint ress)) +convert_target (CmmPrim op) _ress _args = PrimTarget op + +add_hints :: Convention -> ValueDirection -> [a] -> [CmmHinted a] +add_hints conv vd args = zipWith CmmHinted args (get_hints conv vd) + +get_hints :: Convention -> ValueDirection -> [ForeignHint] +get_hints (Foreign (ForeignConvention _ hints _)) Arguments = hints +get_hints (Foreign (ForeignConvention _ _ hints)) Results = hints +get_hints _other_conv _vd = repeat NoHint + +get_conv :: MidCallTarget -> Convention +get_conv (PrimTarget _) = Native +get_conv (ForeignTarget _ fc) = Foreign fc + +cmm_target :: MidCallTarget -> CmmCallTarget +cmm_target (PrimTarget op) = CmmPrim op +cmm_target (ForeignTarget e (ForeignConvention cc _ _)) = CmmCallee e cc ofZgraph :: CmmGraph -> ListGraph CmmStmt ofZgraph g = ListGraph $ swallow blocks @@ -92,89 +102,67 @@ ofZgraph g = ListGraph $ swallow blocks extend_block _id stmts = stmts _extend_entry stmts = scomment showblocks : scomment cscomm : stmts showblocks = "LGraph has " ++ show (length blocks) ++ " blocks:" ++ - concat (map (\(G.Block id _) -> " " ++ show id) blocks) + concat (map (\(G.Block id _ _) -> " " ++ show id) blocks) cscomm = "Call successors are" ++ (concat $ map (\id -> " " ++ show id) $ uniqSetToList call_succs) swallow [] = [] - swallow (G.Block id t : rest) = tail id [] Nothing t rest - tail id prev' out (G.ZTail (CopyOut conv actuals) t) rest = - case out of - Nothing -> tail id prev' (Just (conv, actuals)) t rest - Just _ -> panic "multiple CopyOut nodes in one basic block" - tail id prev' out (G.ZTail m t) rest = tail id (mid m : prev') out t rest - tail id prev' out (G.ZLast G.LastExit) rest = exit id prev' out rest - tail id prev' out (G.ZLast (G.LastOther l)) rest = last id prev' out l rest + swallow (G.Block id _ t : rest) = tail id [] t rest + tail id prev' (G.ZTail m t) rest = tail id (mid m : prev') t rest + tail id prev' (G.ZLast G.LastExit) rest = exit id prev' rest + tail id prev' (G.ZLast (G.LastOther l)) rest = last id prev' l rest mid (MidComment s) = CmmComment s mid (MidAssign l r) = CmmAssign l r mid (MidStore l r) = CmmStore l r - mid (MidUnsafeCall f ress args) = CmmCall f ress args CmmUnsafe CmmMayReturn + mid (MidUnsafeCall target ress args) + = CmmCall (cmm_target target) + (add_hints conv Results ress) + (add_hints conv Arguments args) + CmmUnsafe CmmMayReturn + where + conv = get_conv target mid m@(MidAddToContext {}) = pcomment (ppr m) - mid m@(CopyOut {}) = pcomment (ppr m) - mid m@(CopyIn {}) = pcomment (ppr m <+> text "(proc point)") pcomment p = scomment $ showSDoc p block' id prev' | id == G.lg_entry g = BasicBlock id $ extend_entry (reverse prev') | otherwise = BasicBlock id $ extend_block id (reverse prev') - last id prev' out l n = + last id prev' l n = let endblock stmt = block' id (stmt : prev') : swallow n in case l of LastBranch tgt -> case n of - G.Block id' t : bs - | tgt == id', unique_pred id' - -> tail id prev' out t bs -- optimize out redundant labels - _ -> if isNothing out then endblock (CmmBranch tgt) - else pprPanic "can't convert LGraph with pending CopyOut" - (text "target" <+> ppr tgt <+> ppr g) + -- THIS IS NOW WRONG -- LABELS CAN SHOW UP ELSEWHERE IN THE GRAPH + --G.Block id' _ t : bs + -- | tgt == id', unique_pred id' + -- -> tail id prev' t bs -- optimize out redundant labels + _ -> endblock (CmmBranch tgt) LastCondBranch expr tid fid -> - if isJust out then pprPanic "CopyOut before conditional branch" (ppr g) - else case n of - G.Block id' t : bs + G.Block id' _ t : bs | id' == fid, unique_pred id' -> - tail id (CmmCondBranch expr tid : prev') Nothing t bs + tail id (CmmCondBranch expr tid : prev') t bs | id' == tid, unique_pred id', Just e' <- maybeInvertCmmExpr expr -> - tail id (CmmCondBranch e' fid : prev') Nothing t bs + tail id (CmmCondBranch e' fid : prev') t bs _ -> let instrs' = CmmBranch fid : CmmCondBranch expr tid : prev' in block' id instrs' : swallow n - LastJump expr -> endblock $ with_out out $ CmmJump expr - LastReturn -> endblock $ with_out out $ CmmReturn + LastJump expr _ -> endblock $ CmmJump expr [] + LastReturn _ -> endblock $ CmmReturn [] LastSwitch arg ids -> endblock $ CmmSwitch arg $ ids - LastCall e cont - | Just (conv, args) <- out - -> let tgt = CmmCallee e (conv_to_cconv conv) in - case cont of - Nothing -> - endblock $ CmmCall tgt [] args CmmUnsafe CmmNeverReturns - Just k - | G.Block id' (G.ZTail (CopyIn _ ress srt) t) : bs <- n, - id' == k, unique_pred k - -> let call = CmmCall tgt ress args (CmmSafe srt) CmmMayReturn - in tail id (call : prev') Nothing t bs - | G.Block id' t : bs <- n, id' == k, unique_pred k - -> let (ress, srt) = findCopyIn t - call = CmmCall tgt ress args (CmmSafe srt) CmmMayReturn - delayed = scomment "delayed CopyIn follows prev. call" - in tail id (delayed : call : prev') Nothing t bs - | otherwise -> panic "unrepairable call" - | otherwise -> panic "call with no CopyOut" - with_out (Just (_conv, actuals)) f = f actuals - with_out Nothing f = pprPanic "unrepairable data flow to" (ppr $ f []) - findCopyIn (G.ZTail (CopyIn _ ress srt) _) = (ress, srt) - findCopyIn (G.ZTail _ t) = findCopyIn t - findCopyIn (G.ZLast _) = panic "missing CopyIn after call" - exit id prev' out n = -- highly irregular (assertion violation?) + LastCall e cont _ -> + let tgt = CmmCallee e CCallConv in + case cont of + Nothing -> + endblock $ CmmCall tgt [] [] CmmUnsafe CmmNeverReturns + Just _ -> + endblock $ CmmCall tgt [] [] (CmmSafe NoC_SRT) CmmMayReturn + exit id prev' n = -- highly irregular (assertion violation?) let endblock stmt = block' id (stmt : prev') : swallow n in case n of [] -> endblock (scomment "procedure falls off end") - G.Block id' t : bs -> + G.Block id' _ t : bs -> if unique_pred id' then - tail id (scomment "went thru exit" : prev') out t bs + tail id (scomment "went thru exit" : prev') t bs else endblock (CmmBranch id') - conv_to_cconv (ConventionStandard c _) = c - conv_to_cconv (ConventionPrivate {}) = - panic "tried to convert private calling convention back to Cmm" preds = zipPreds g single_preds = let add b single = @@ -189,7 +177,7 @@ ofZgraph g = ListGraph $ swallow blocks call_succs = let add b succs = case G.last (G.unzip b) of - G.LastOther (LastCall _ (Just id)) -> extendBlockSet succs id + G.LastOther (LastCall _ (Just id) _) -> extendBlockSet succs id _ -> succs in G.fold_blocks add emptyBlockSet g _is_call_succ id = elemBlockSet id call_succs diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 69a4952..5893843 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -1,22 +1,56 @@ module CmmExpr - ( CmmExpr(..), cmmExprRep, maybeInvertCmmExpr - , CmmReg(..), cmmRegRep - , CmmLit(..), cmmLitRep - , LocalReg(..), localRegRep, localRegGCFollow, GCKind(..) - , GlobalReg(..), globalRegRep, spReg, hpReg, spLimReg, nodeReg, node + ( CmmType -- Abstract + , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord + , cInt, cLong + , cmmBits, cmmFloat + , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood + , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32 + + , Width(..) + , widthInBits, widthInBytes, widthInLog + , wordWidth, halfWordWidth, cIntWidth, cLongWidth + + , CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr + , CmmReg(..), cmmRegType + , CmmLit(..), cmmLitType + , LocalReg(..), localRegType + , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node + , VGcPtr(..), vgcFlag -- Temporary! , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed + , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet , plusRegSet, minusRegSet, timesRegSet - , Area(..), StackSlotMap, getSlot, mkCallArea, outgoingSlot, areaId, areaSize - ) where + , Area(..), AreaId(..), SubArea, StackSlotMap, getSlot + + -- MachOp + , MachOp(..) + , pprMachOp, isCommutableMachOp, isAssociativeMachOp + , isComparisonMachOp, machOpResultType + , machOpArgReps, maybeInvertComparison + + -- MachOp builders + , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32 + ) +where + +#include "HsVersions.h" import BlockId import CLabel +import Constants +import FastString import FiniteMap -import MachOp import Maybes import Monad +import Outputable import Panic import Unique import UniqSet @@ -28,16 +62,24 @@ import UniqSet data CmmExpr = CmmLit CmmLit -- Literal - | CmmLoad CmmExpr MachRep -- Read memory location + | CmmLoad CmmExpr CmmType -- Read memory location | CmmReg CmmReg -- Contents of register | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) + | CmmStackSlot Area Int -- addressing expression of a stack slot | CmmRegOff CmmReg Int -- CmmRegOff reg i -- ** is shorthand only, meaning ** -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep))) - -- where rep = cmmRegRep reg - | CmmStackSlot Area Int - deriving Eq + -- where rep = cmmRegType reg + +instance Eq CmmExpr where -- Equality ignores the types + CmmLit l1 == CmmLit l2 = l1==l2 + CmmLoad e1 _ == CmmLoad e2 _ = e1==e2 + CmmReg r1 == CmmReg r2 = r1==r2 + CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2 + CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2 + CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2 + _e1 == _e2 = False data CmmReg = CmmLocal LocalReg @@ -48,17 +90,24 @@ data CmmReg -- or the stack space where function arguments and results are passed. data Area = RegSlot LocalReg - | CallArea BlockId Int Int + | CallArea AreaId deriving (Eq, Ord) +data AreaId + = Old -- entry parameters, jumps, and returns share one call area at old end of stack + | Young BlockId + deriving (Eq, Ord) + +type SubArea = (Area, Int, Int) -- area, offset, width + data CmmLit - = CmmInt Integer MachRep + = CmmInt Integer Width -- Interpretation: the 2's complement representation of the value -- is truncated to the specified size. This is easier than trying -- to keep the value within range, because we don't know whether - -- it will be used as a signed or unsigned value (the MachRep doesn't + -- it will be used as a signed or unsigned value (the CmmType doesn't -- distinguish between signed & unsigned). - | CmmFloat Rational MachRep + | CmmFloat Rational Width | CmmLabel CLabel -- Address of label | CmmLabelOff CLabel Int -- Address of label + byte offset @@ -72,14 +121,27 @@ data CmmLit | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset deriving Eq -instance Eq LocalReg where - (LocalReg u1 _ _) == (LocalReg u2 _ _) = u1 == u2 +cmmExprType :: CmmExpr -> CmmType +cmmExprType (CmmLit lit) = cmmLitType lit +cmmExprType (CmmLoad _ rep) = rep +cmmExprType (CmmReg reg) = cmmRegType reg +cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args) +cmmExprType (CmmRegOff reg _) = cmmRegType reg +cmmExprType (CmmStackSlot _ _) = bWord -- an address -instance Ord LocalReg where - compare (LocalReg u1 _ _) (LocalReg u2 _ _) = compare u1 u2 +cmmLitType :: CmmLit -> CmmType +cmmLitType (CmmInt _ width) = cmmBits width +cmmLitType (CmmFloat _ width) = cmmFloat width +cmmLitType (CmmLabel lbl) = cmmLabelType lbl +cmmLitType (CmmLabelOff lbl _) = cmmLabelType lbl +cmmLitType (CmmLabelDiffOff {}) = bWord -instance Uniquable LocalReg where - getUnique (LocalReg uniq _ _) = uniq +cmmLabelType :: CLabel -> CmmType +cmmLabelType lbl | isGcPtrLabel lbl = gcWord + | otherwise = bWord + +cmmExprWidth :: CmmExpr -> Width +cmmExprWidth e = typeWidth (cmmExprType e) -------- --- Negation for conditional branches @@ -93,17 +155,33 @@ maybeInvertCmmExpr _ = Nothing -- Local registers ----------------------------------------------------------------------------- --- | Whether a 'LocalReg' is a GC followable pointer -data GCKind = GCKindPtr | GCKindNonPtr deriving (Eq) - data LocalReg - = LocalReg !Unique MachRep GCKind + = LocalReg !Unique CmmType -- ^ Parameters: -- 1. Identifier -- 2. Type - -- 3. Should the GC follow as a pointer --- Sets of local registers +instance Eq LocalReg where + (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2 + +instance Ord LocalReg where + compare (LocalReg u1 _) (LocalReg u2 _) = compare u1 u2 + +instance Uniquable LocalReg where + getUnique (LocalReg uniq _) = uniq + +cmmRegType :: CmmReg -> CmmType +cmmRegType (CmmLocal reg) = localRegType reg +cmmRegType (CmmGlobal reg) = globalRegType reg + +localRegType :: LocalReg -> CmmType +localRegType (LocalReg _ rep) = rep + +----------------------------------------------------------------------------- +-- Register-use information for expressions and other types +----------------------------------------------------------------------------- + +-- | Sets of local registers type RegSet = UniqSet LocalReg emptyRegSet :: RegSet elemRegSet :: LocalReg -> RegSet -> Bool @@ -121,45 +199,6 @@ minusRegSet = minusUniqSet plusRegSet = unionUniqSets timesRegSet = intersectUniqSets ------------------------------------------------------------------------------ --- Stack slots ------------------------------------------------------------------------------ - -mkVarSlot :: LocalReg -> CmmExpr -mkVarSlot r = CmmStackSlot (RegSlot r) 0 - --- Usually, we either want to lookup a variable's spill slot in an environment --- or else allocate it and add it to the environment. --- For a variable, we just need a single area of the appropriate size. -type StackSlotMap = FiniteMap LocalReg CmmExpr -getSlot :: StackSlotMap -> LocalReg -> (StackSlotMap, CmmExpr) -getSlot map r = case lookupFM map r of - Just s -> (map, s) - Nothing -> (addToFM map r s, s) where s = mkVarSlot r - --- Eventually, we'll want something proper that takes arguments and formals --- and gives you back the calling convention code, as well as the stack area. -mkCallArea :: BlockId -> [a] -> Maybe [b] -> Area -mkCallArea id as fs = CallArea id (length as) (liftM length fs `orElse` 0) - --- Return the last slot in the outgoing parameter area. -outgoingSlot :: Area -> CmmExpr -outgoingSlot a@(RegSlot _) = CmmStackSlot a 0 -outgoingSlot a@(CallArea _ outN _) = CmmStackSlot a outN - -areaId :: Area -> BlockId -areaId (RegSlot _) = panic "Register stack slots don't have IDs!" -areaId (CallArea id _ _) = id - -areaSize :: Area -> Int -areaSize (RegSlot _) = 1 -areaSize (CallArea _ outN inN) = max outN inN - - ------------------------------------------------------------------------------ --- Register-use information for expressions and other types ------------------------------------------------------------------------------ - class UserOfLocalRegs a where foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b @@ -205,46 +244,69 @@ instance DefinerOfLocalRegs a => DefinerOfLocalRegs [a] where foldRegsDefd _ set [] = set foldRegsDefd f set (x:xs) = foldRegsDefd f (foldRegsDefd f set x) xs + ----------------------------------------------------------------------------- --- MachRep +-- Stack slots ----------------------------------------------------------------------------- +mkVarSlot :: LocalReg -> CmmExpr +mkVarSlot r = CmmStackSlot (RegSlot r) 0 +-- Usually, we either want to lookup a variable's spill slot in an environment +-- or else allocate it and add it to the environment. +-- For a variable, we just need a single area of the appropriate size. +type StackSlotMap = FiniteMap LocalReg CmmExpr +getSlot :: StackSlotMap -> LocalReg -> (StackSlotMap, CmmExpr) +getSlot map r = case lookupFM map r of + Just s -> (map, s) + Nothing -> (addToFM map r s, s) where s = mkVarSlot r -cmmExprRep :: CmmExpr -> MachRep -cmmExprRep (CmmLit lit) = cmmLitRep lit -cmmExprRep (CmmLoad _ rep) = rep -cmmExprRep (CmmReg reg) = cmmRegRep reg -cmmExprRep (CmmMachOp op _) = resultRepOfMachOp op -cmmExprRep (CmmRegOff reg _) = cmmRegRep reg -cmmExprRep (CmmStackSlot _ _) = wordRep +----------------------------------------------------------------------------- +-- Stack slot use information for expressions and other types [_$_] +----------------------------------------------------------------------------- -cmmRegRep :: CmmReg -> MachRep -cmmRegRep (CmmLocal reg) = localRegRep reg -cmmRegRep (CmmGlobal reg) = globalRegRep reg -localRegRep :: LocalReg -> MachRep -localRegRep (LocalReg _ rep _) = rep +-- Fold over the area, the offset into the area, and the width of the subarea. +class UserOfSlots a where + foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b +class DefinerOfSlots a where + foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b -localRegGCFollow :: LocalReg -> GCKind -localRegGCFollow (LocalReg _ _ p) = p +instance UserOfSlots CmmExpr where + foldSlotsUsed f z e = expr z e + where expr z (CmmLit _) = z + expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty) + expr z (CmmLoad addr _) = foldSlotsUsed f z addr + expr z (CmmReg _) = z + expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs + expr z (CmmRegOff _ _) = z + expr z (CmmStackSlot _ _) = z + +instance UserOfSlots a => UserOfSlots [a] where + foldSlotsUsed _ set [] = set + foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs + + +----------------------------------------------------------------------------- +-- Global STG registers +----------------------------------------------------------------------------- -cmmLitRep :: CmmLit -> MachRep -cmmLitRep (CmmInt _ rep) = rep -cmmLitRep (CmmFloat _ rep) = rep -cmmLitRep (CmmLabel _) = wordRep -cmmLitRep (CmmLabelOff _ _) = wordRep -cmmLitRep (CmmLabelDiffOff _ _ _) = wordRep +data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show ) + -- TEMPORARY!!! ----------------------------------------------------------------------------- -- Global STG registers ----------------------------------------------------------------------------- +vgcFlag :: CmmType -> VGcPtr +vgcFlag ty | isGcPtrType ty = VGcPtr + | otherwise = VNonGcPtr data GlobalReg -- Argument and return registers = VanillaReg -- pointers, unboxed ints and chars {-# UNPACK #-} !Int -- its number + VGcPtr | FloatReg -- single-precision floating-point registers {-# UNPACK #-} !Int -- its number @@ -282,7 +344,71 @@ data GlobalReg -- from platform to platform (see module PositionIndependentCode). | PicBaseReg - deriving( Eq, Ord, Show ) + deriving( Show ) + +instance Eq GlobalReg where + VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes + FloatReg i == FloatReg j = i==j + DoubleReg i == DoubleReg j = i==j + LongReg i == LongReg j = i==j + Sp == Sp = True + SpLim == SpLim = True + Hp == Hp = True + HpLim == HpLim = True + CurrentTSO == CurrentTSO = True + CurrentNursery == CurrentNursery = True + HpAlloc == HpAlloc = True + GCEnter1 == GCEnter1 = True + GCFun == GCFun = True + BaseReg == BaseReg = True + PicBaseReg == PicBaseReg = True + _r1 == _r2 = False + +instance Ord GlobalReg where + compare (VanillaReg i _) (VanillaReg j _) = compare i j + -- Ignore type when seeking clashes + compare (FloatReg i) (FloatReg j) = compare i j + compare (DoubleReg i) (DoubleReg j) = compare i j + compare (LongReg i) (LongReg j) = compare i j + compare Sp Sp = EQ + compare SpLim SpLim = EQ + compare Hp Hp = EQ + compare HpLim HpLim = EQ + compare CurrentTSO CurrentTSO = EQ + compare CurrentNursery CurrentNursery = EQ + compare HpAlloc HpAlloc = EQ + compare GCEnter1 GCEnter1 = EQ + compare GCFun GCFun = EQ + compare BaseReg BaseReg = EQ + compare PicBaseReg PicBaseReg = EQ + compare (VanillaReg _ _) _ = LT + compare _ (VanillaReg _ _) = GT + compare (FloatReg _) _ = LT + compare _ (FloatReg _) = GT + compare (DoubleReg _) _ = LT + compare _ (DoubleReg _) = GT + compare (LongReg _) _ = LT + compare _ (LongReg _) = GT + compare Sp _ = LT + compare _ Sp = GT + compare SpLim _ = LT + compare _ SpLim = GT + compare Hp _ = LT + compare _ Hp = GT + compare HpLim _ = LT + compare _ HpLim = GT + compare CurrentTSO _ = LT + compare _ CurrentTSO = GT + compare CurrentNursery _ = LT + compare _ CurrentNursery = GT + compare HpAlloc _ = LT + compare _ HpAlloc = GT + compare GCEnter1 _ = LT + compare _ GCEnter1 = GT + compare GCFun _ = LT + compare _ GCFun = GT + compare BaseReg _ = LT + compare _ BaseReg = GT -- convenient aliases spReg, hpReg, spLimReg, nodeReg :: CmmReg @@ -292,11 +418,682 @@ spLimReg = CmmGlobal SpLim nodeReg = CmmGlobal node node :: GlobalReg -node = VanillaReg 1 - -globalRegRep :: GlobalReg -> MachRep -globalRegRep (VanillaReg _) = wordRep -globalRegRep (FloatReg _) = F32 -globalRegRep (DoubleReg _) = F64 -globalRegRep (LongReg _) = I64 -globalRegRep _ = wordRep +node = VanillaReg 1 VGcPtr + +globalRegType :: GlobalReg -> CmmType +globalRegType (VanillaReg _ VGcPtr) = gcWord +globalRegType (VanillaReg _ VNonGcPtr) = bWord +globalRegType (FloatReg _) = cmmFloat W32 +globalRegType (DoubleReg _) = cmmFloat W64 +globalRegType (LongReg _) = cmmBits W64 +globalRegType Hp = gcWord -- The initialiser for all + -- dynamically allocated closures +globalRegType _ = bWord + + +----------------------------------------------------------------------------- +-- CmmType +----------------------------------------------------------------------------- + + -- NOTE: CmmType is an abstract type, not exported from this + -- module so you can easily change its representation + -- + -- However Width is exported in a concrete way, + -- and is used extensively in pattern-matching + +data CmmType -- The important one! + = CmmType CmmCat Width + +data CmmCat -- "Category" (not exported) + = GcPtrCat -- GC pointer + | BitsCat -- Non-pointer + | FloatCat -- Float + deriving( Eq ) + -- See Note [Signed vs unsigned] at the end + +instance Outputable CmmType where + ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid) + +instance Outputable CmmCat where + ppr FloatCat = ptext $ sLit("F") + ppr _ = ptext $ sLit("I") +-- Temp Jan 08 +-- ppr FloatCat = ptext $ sLit("float") +-- ppr BitsCat = ptext $ sLit("bits") +-- ppr GcPtrCat = ptext $ sLit("gcptr") + +-- Why is CmmType stratified? For native code generation, +-- most of the time you just want to know what sort of register +-- to put the thing in, and for this you need to know how +-- many bits thing has and whether it goes in a floating-point +-- register. By contrast, the distinction between GcPtr and +-- GcNonPtr is of interest to only a few parts of the code generator. + +-------- Equality on CmmType -------------- +-- CmmType is *not* an instance of Eq; sometimes we care about the +-- Gc/NonGc distinction, and sometimes we don't +-- So we use an explicit function to force you to think about it +cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality +cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2 + +cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool + -- This equality is temporary; used in CmmLint + -- but the RTS files are not yet well-typed wrt pointers +cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2) + = c1 `weak_eq` c2 && w1==w2 + where + FloatCat `weak_eq` FloatCat = True + FloatCat `weak_eq` _other = False + _other `weak_eq` FloatCat = False + _word1 `weak_eq` _word2 = True -- Ignores GcPtr + +--- Simple operations on CmmType ----- +typeWidth :: CmmType -> Width +typeWidth (CmmType _ w) = w + +cmmBits, cmmFloat :: Width -> CmmType +cmmBits = CmmType BitsCat +cmmFloat = CmmType FloatCat + +-------- Common CmmTypes ------------ +-- Floats and words of specific widths +b8, b16, b32, b64, f32, f64 :: CmmType +b8 = cmmBits W8 +b16 = cmmBits W16 +b32 = cmmBits W32 +b64 = cmmBits W64 +f32 = cmmFloat W32 +f64 = cmmFloat W64 + +-- CmmTypes of native word widths +bWord, bHalfWord, gcWord :: CmmType +bWord = cmmBits wordWidth +bHalfWord = cmmBits halfWordWidth +gcWord = CmmType GcPtrCat wordWidth + +cInt, cLong :: CmmType +cInt = cmmBits cIntWidth +cLong = cmmBits cLongWidth + + +------------ Predicates ---------------- +isFloatType, isGcPtrType :: CmmType -> Bool +isFloatType (CmmType FloatCat _) = True +isFloatType _other = False + +isGcPtrType (CmmType GcPtrCat _) = True +isGcPtrType _other = False + +isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool +-- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise) +-- isFloat32 and 64 are obvious + +isWord64 (CmmType BitsCat W64) = True +isWord64 (CmmType GcPtrCat W64) = True +isWord64 _other = False + +isWord32 (CmmType BitsCat W32) = True +isWord32 (CmmType GcPtrCat W32) = True +isWord32 _other = False + +isFloat32 (CmmType FloatCat W32) = True +isFloat32 _other = False + +isFloat64 (CmmType FloatCat W64) = True +isFloat64 _other = False + +----------------------------------------------------------------------------- +-- Width +----------------------------------------------------------------------------- + +data Width = W8 | W16 | W32 | W64 + | W80 -- Extended double-precision float, + -- used in x86 native codegen only. + -- (we use Ord, so it'd better be in this order) + | W128 + deriving (Eq, Ord, Show) + +instance Outputable Width where + ppr rep = ptext (mrStr rep) + +mrStr :: Width -> LitString +mrStr W8 = sLit("W8") +mrStr W16 = sLit("W16") +mrStr W32 = sLit("W32") +mrStr W64 = sLit("W64") +mrStr W128 = sLit("W128") +mrStr W80 = sLit("W80") + + +-------- Common Widths ------------ +wordWidth, halfWordWidth :: Width +wordWidth | wORD_SIZE == 4 = W32 + | wORD_SIZE == 8 = W64 + | otherwise = panic "MachOp.wordRep: Unknown word size" + +halfWordWidth | wORD_SIZE == 4 = W16 + | wORD_SIZE == 8 = W32 + | otherwise = panic "MachOp.halfWordRep: Unknown word size" + +-- cIntRep is the Width for a C-language 'int' +cIntWidth, cLongWidth :: Width +#if SIZEOF_INT == 4 +cIntWidth = W32 +#elif SIZEOF_INT == 8 +cIntWidth = W64 +#endif + +#if SIZEOF_LONG == 4 +cLongWidth = W32 +#elif SIZEOF_LONG == 8 +cLongWidth = W64 +#endif + +widthInBits :: Width -> Int +widthInBits W8 = 8 +widthInBits W16 = 16 +widthInBits W32 = 32 +widthInBits W64 = 64 +widthInBits W128 = 128 +widthInBits W80 = 80 + +widthInBytes :: Width -> Int +widthInBytes W8 = 1 +widthInBytes W16 = 2 +widthInBytes W32 = 4 +widthInBytes W64 = 8 +widthInBytes W128 = 16 +widthInBytes W80 = 10 + +-- log_2 of the width in bytes, useful for generating shifts. +widthInLog :: Width -> Int +widthInLog W8 = 0 +widthInLog W16 = 1 +widthInLog W32 = 2 +widthInLog W64 = 3 +widthInLog W128 = 4 +widthInLog W80 = panic "widthInLog: F80" + + +----------------------------------------------------------------------------- +-- MachOp +----------------------------------------------------------------------------- + +{- +Implementation notes: + +It might suffice to keep just a width, without distinguishing between +floating and integer types. However, keeping the distinction will +help the native code generator to assign registers more easily. +-} + + +{- | +Machine-level primops; ones which we can reasonably delegate to the +native code generators to handle. Basically contains C's primops +and no others. + +Nomenclature: all ops indicate width and signedness, where +appropriate. Widths: 8\/16\/32\/64 means the given size, obviously. +Nat means the operation works on STG word sized objects. +Signedness: S means signed, U means unsigned. For operations where +signedness is irrelevant or makes no difference (for example +integer add), the signedness component is omitted. + +An exception: NatP is a ptr-typed native word. From the point of +view of the native code generators this distinction is irrelevant, +but the C code generator sometimes needs this info to emit the +right casts. +-} + +data MachOp + -- Integer operations (insensitive to signed/unsigned) + = MO_Add Width + | MO_Sub Width + | MO_Eq Width + | MO_Ne Width + | MO_Mul Width -- low word of multiply + + -- Signed multiply/divide + | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows + | MO_S_Quot Width -- signed / (same semantics as IntQuotOp) + | MO_S_Rem Width -- signed % (same semantics as IntRemOp) + | MO_S_Neg Width -- unary - + + -- Unsigned multiply/divide + | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows + | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp) + | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp) + + -- Signed comparisons + | MO_S_Ge Width + | MO_S_Le Width + | MO_S_Gt Width + | MO_S_Lt Width + + -- Unsigned comparisons + | MO_U_Ge Width + | MO_U_Le Width + | MO_U_Gt Width + | MO_U_Lt Width + + -- Floating point arithmetic + | MO_F_Add Width + | MO_F_Sub Width + | MO_F_Neg Width -- unary - + | MO_F_Mul Width + | MO_F_Quot Width + + -- Floating point comparison + | MO_F_Eq Width + | MO_F_Ne Width + | MO_F_Ge Width + | MO_F_Le Width + | MO_F_Gt Width + | MO_F_Lt Width + + -- Bitwise operations. Not all of these may be supported + -- at all sizes, and only integral Widths are valid. + | MO_And Width + | MO_Or Width + | MO_Xor Width + | MO_Not Width + | MO_Shl Width + | MO_U_Shr Width -- unsigned shift right + | MO_S_Shr Width -- signed shift right + + -- Conversions. Some of these will be NOPs. + -- Floating-point conversions use the signed variant. + | MO_SF_Conv Width Width -- Signed int -> Float + | MO_FS_Conv Width Width -- Float -> Signed int + | MO_SS_Conv Width Width -- Signed int -> Signed int + | MO_UU_Conv Width Width -- unsigned int -> unsigned int + | MO_FF_Conv Width Width -- Float -> Float + deriving (Eq, Show) + +pprMachOp :: MachOp -> SDoc +pprMachOp mo = text (show mo) + + + +-- ----------------------------------------------------------------------------- +-- Some common MachReps + +-- A 'wordRep' is a machine word on the target architecture +-- Specifically, it is the size of an Int#, Word#, Addr# +-- and the unit of allocation on the stack and the heap +-- Any pointer is also guaranteed to be a wordRep. + +mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32 + :: MachOp + +mo_wordAdd = MO_Add wordWidth +mo_wordSub = MO_Sub wordWidth +mo_wordEq = MO_Eq wordWidth +mo_wordNe = MO_Ne wordWidth +mo_wordMul = MO_Mul wordWidth +mo_wordSQuot = MO_S_Quot wordWidth +mo_wordSRem = MO_S_Rem wordWidth +mo_wordSNeg = MO_S_Neg wordWidth +mo_wordUQuot = MO_U_Quot wordWidth +mo_wordURem = MO_U_Rem wordWidth + +mo_wordSGe = MO_S_Ge wordWidth +mo_wordSLe = MO_S_Le wordWidth +mo_wordSGt = MO_S_Gt wordWidth +mo_wordSLt = MO_S_Lt wordWidth + +mo_wordUGe = MO_U_Ge wordWidth +mo_wordULe = MO_U_Le wordWidth +mo_wordUGt = MO_U_Gt wordWidth +mo_wordULt = MO_U_Lt wordWidth + +mo_wordAnd = MO_And wordWidth +mo_wordOr = MO_Or wordWidth +mo_wordXor = MO_Xor wordWidth +mo_wordNot = MO_Not wordWidth +mo_wordShl = MO_Shl wordWidth +mo_wordSShr = MO_S_Shr wordWidth +mo_wordUShr = MO_U_Shr wordWidth + +mo_u_8To32 = MO_UU_Conv W8 W32 +mo_s_8To32 = MO_SS_Conv W8 W32 +mo_u_16To32 = MO_UU_Conv W16 W32 +mo_s_16To32 = MO_SS_Conv W16 W32 + +mo_u_8ToWord = MO_UU_Conv W8 wordWidth +mo_s_8ToWord = MO_SS_Conv W8 wordWidth +mo_u_16ToWord = MO_UU_Conv W16 wordWidth +mo_s_16ToWord = MO_SS_Conv W16 wordWidth +mo_s_32ToWord = MO_SS_Conv W32 wordWidth +mo_u_32ToWord = MO_UU_Conv W32 wordWidth + +mo_WordTo8 = MO_UU_Conv wordWidth W8 +mo_WordTo16 = MO_UU_Conv wordWidth W16 +mo_WordTo32 = MO_UU_Conv wordWidth W32 + +mo_32To8 = MO_UU_Conv W32 W8 +mo_32To16 = MO_UU_Conv W32 W16 + + +-- ---------------------------------------------------------------------------- +-- isCommutableMachOp + +{- | +Returns 'True' if the MachOp has commutable arguments. This is used +in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isCommutableMachOp :: MachOp -> Bool +isCommutableMachOp mop = + case mop of + MO_Add _ -> True + MO_Eq _ -> True + MO_Ne _ -> True + MO_Mul _ -> True + MO_S_MulMayOflo _ -> True + MO_U_MulMayOflo _ -> True + MO_And _ -> True + MO_Or _ -> True + MO_Xor _ -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isAssociativeMachOp + +{- | +Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@) +This is used in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isAssociativeMachOp :: MachOp -> Bool +isAssociativeMachOp mop = + case mop of + MO_Add {} -> True -- NB: does not include + MO_Mul {} -> True -- floatint point! + MO_And {} -> True + MO_Or {} -> True + MO_Xor {} -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isComparisonMachOp + +{- | +Returns 'True' if the MachOp is a comparison. + +If in doubt, return False. This generates worse code on the +native routes, but is otherwise harmless. +-} +isComparisonMachOp :: MachOp -> Bool +isComparisonMachOp mop = + case mop of + MO_Eq _ -> True + MO_Ne _ -> True + MO_S_Ge _ -> True + MO_S_Le _ -> True + MO_S_Gt _ -> True + MO_S_Lt _ -> True + MO_U_Ge _ -> True + MO_U_Le _ -> True + MO_U_Gt _ -> True + MO_U_Lt _ -> True + MO_F_Eq {} -> True + MO_F_Ne {} -> True + MO_F_Ge {} -> True + MO_F_Le {} -> True + MO_F_Gt {} -> True + MO_F_Lt {} -> True + _other -> False + +-- ----------------------------------------------------------------------------- +-- Inverting conditions + +-- Sometimes it's useful to be able to invert the sense of a +-- condition. Not all conditional tests are invertible: in +-- particular, floating point conditionals cannot be inverted, because +-- there exist floating-point values which return False for both senses +-- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)). + +maybeInvertComparison :: MachOp -> Maybe MachOp +maybeInvertComparison op + = case op of -- None of these Just cases include floating point + MO_Eq r -> Just (MO_Ne r) + MO_Ne r -> Just (MO_Eq r) + MO_U_Lt r -> Just (MO_U_Ge r) + MO_U_Gt r -> Just (MO_U_Le r) + MO_U_Le r -> Just (MO_U_Gt r) + MO_U_Ge r -> Just (MO_U_Lt r) + MO_S_Lt r -> Just (MO_S_Ge r) + MO_S_Gt r -> Just (MO_S_Le r) + MO_S_Le r -> Just (MO_S_Gt r) + MO_S_Ge r -> Just (MO_S_Lt r) + MO_F_Eq r -> Just (MO_F_Ne r) + MO_F_Ne r -> Just (MO_F_Eq r) + MO_F_Ge r -> Just (MO_F_Le r) + MO_F_Le r -> Just (MO_F_Ge r) + MO_F_Gt r -> Just (MO_F_Lt r) + MO_F_Lt r -> Just (MO_F_Gt r) + _other -> Nothing + +-- ---------------------------------------------------------------------------- +-- machOpResultType + +{- | +Returns the MachRep of the result of a MachOp. +-} +machOpResultType :: MachOp -> [CmmType] -> CmmType +machOpResultType mop tys = + case mop of + MO_Add {} -> ty1 -- Preserve GC-ptr-hood + MO_Sub {} -> ty1 -- of first arg + MO_Mul r -> cmmBits r + MO_S_MulMayOflo r -> cmmBits r + MO_S_Quot r -> cmmBits r + MO_S_Rem r -> cmmBits r + MO_S_Neg r -> cmmBits r + MO_U_MulMayOflo r -> cmmBits r + MO_U_Quot r -> cmmBits r + MO_U_Rem r -> cmmBits r + + MO_Eq {} -> comparisonResultRep + MO_Ne {} -> comparisonResultRep + MO_S_Ge {} -> comparisonResultRep + MO_S_Le {} -> comparisonResultRep + MO_S_Gt {} -> comparisonResultRep + MO_S_Lt {} -> comparisonResultRep + + MO_U_Ge {} -> comparisonResultRep + MO_U_Le {} -> comparisonResultRep + MO_U_Gt {} -> comparisonResultRep + MO_U_Lt {} -> comparisonResultRep + + MO_F_Add r -> cmmFloat r + MO_F_Sub r -> cmmFloat r + MO_F_Mul r -> cmmFloat r + MO_F_Quot r -> cmmFloat r + MO_F_Neg r -> cmmFloat r + MO_F_Eq {} -> comparisonResultRep + MO_F_Ne {} -> comparisonResultRep + MO_F_Ge {} -> comparisonResultRep + MO_F_Le {} -> comparisonResultRep + MO_F_Gt {} -> comparisonResultRep + MO_F_Lt {} -> comparisonResultRep + + MO_And {} -> ty1 -- Used for pointer masking + MO_Or {} -> ty1 + MO_Xor {} -> ty1 + MO_Not r -> cmmBits r + MO_Shl r -> cmmBits r + MO_U_Shr r -> cmmBits r + MO_S_Shr r -> cmmBits r + + MO_SS_Conv _ to -> cmmBits to + MO_UU_Conv _ to -> cmmBits to + MO_FS_Conv _ to -> cmmBits to + MO_SF_Conv _ to -> cmmFloat to + MO_FF_Conv _ to -> cmmFloat to + where + (ty1:_) = tys + +comparisonResultRep :: CmmType +comparisonResultRep = bWord -- is it? + + +-- ----------------------------------------------------------------------------- +-- machOpArgReps + +-- | This function is used for debugging only: we can check whether an +-- application of a MachOp is "type-correct" by checking that the MachReps of +-- its arguments are the same as the MachOp expects. This is used when +-- linting a CmmExpr. + +machOpArgReps :: MachOp -> [Width] +machOpArgReps op = + case op of + MO_Add r -> [r,r] + MO_Sub r -> [r,r] + MO_Eq r -> [r,r] + MO_Ne r -> [r,r] + MO_Mul r -> [r,r] + MO_S_MulMayOflo r -> [r,r] + MO_S_Quot r -> [r,r] + MO_S_Rem r -> [r,r] + MO_S_Neg r -> [r] + MO_U_MulMayOflo r -> [r,r] + MO_U_Quot r -> [r,r] + MO_U_Rem r -> [r,r] + + MO_S_Ge r -> [r,r] + MO_S_Le r -> [r,r] + MO_S_Gt r -> [r,r] + MO_S_Lt r -> [r,r] + + MO_U_Ge r -> [r,r] + MO_U_Le r -> [r,r] + MO_U_Gt r -> [r,r] + MO_U_Lt r -> [r,r] + + MO_F_Add r -> [r,r] + MO_F_Sub r -> [r,r] + MO_F_Mul r -> [r,r] + MO_F_Quot r -> [r,r] + MO_F_Neg r -> [r] + MO_F_Eq r -> [r,r] + MO_F_Ne r -> [r,r] + MO_F_Ge r -> [r,r] + MO_F_Le r -> [r,r] + MO_F_Gt r -> [r,r] + MO_F_Lt r -> [r,r] + + MO_And r -> [r,r] + MO_Or r -> [r,r] + MO_Xor r -> [r,r] + MO_Not r -> [r] + MO_Shl r -> [r,wordWidth] + MO_U_Shr r -> [r,wordWidth] + MO_S_Shr r -> [r,wordWidth] + + MO_SS_Conv from _ -> [from] + MO_UU_Conv from _ -> [from] + MO_SF_Conv from _ -> [from] + MO_FS_Conv from _ -> [from] + MO_FF_Conv from _ -> [from] + + +------------------------------------------------------------------------- +{- Note [Signed vs unsigned] + ~~~~~~~~~~~~~~~~~~~~~~~~~ +Should a CmmType include a signed vs. unsigned distinction? + +This is very much like a "hint" in C-- terminology: it isn't necessary +in order to generate correct code, but it might be useful in that the +compiler can generate better code if it has access to higher-level +hints about data. This is important at call boundaries, because the +definition of a function is not visible at all of its call sites, so +the compiler cannot infer the hints. + +Here in Cmm, we're taking a slightly different approach. We include +the int vs. float hint in the MachRep, because (a) the majority of +platforms have a strong distinction between float and int registers, +and (b) we don't want to do any heavyweight hint-inference in the +native code backend in order to get good code. We're treating the +hint more like a type: our Cmm is always completely consistent with +respect to hints. All coercions between float and int are explicit. + +What about the signed vs. unsigned hint? This information might be +useful if we want to keep sub-word-sized values in word-size +registers, which we must do if we only have word-sized registers. + +On such a system, there are two straightforward conventions for +representing sub-word-sized values: + +(a) Leave the upper bits undefined. Comparison operations must + sign- or zero-extend both operands before comparing them, + depending on whether the comparison is signed or unsigned. + +(b) Always keep the values sign- or zero-extended as appropriate. + Arithmetic operations must narrow the result to the appropriate + size. + +A clever compiler might not use either (a) or (b) exclusively, instead +it would attempt to minimize the coercions by analysis: the same kind +of analysis that propagates hints around. In Cmm we don't want to +have to do this, so we plump for having richer types and keeping the +type information consistent. + +If signed/unsigned hints are missing from MachRep, then the only +choice we have is (a), because we don't know whether the result of an +operation should be sign- or zero-extended. + +Many architectures have extending load operations, which work well +with (b). To make use of them with (a), you need to know whether the +value is going to be sign- or zero-extended by an enclosing comparison +(for example), which involves knowing above the context. This is +doable but more complex. + +Further complicating the issue is foreign calls: a foreign calling +convention can specify that signed 8-bit quantities are passed as +sign-extended 32 bit quantities, for example (this is the case on the +PowerPC). So we *do* need sign information on foreign call arguments. + +Pros for adding signed vs. unsigned to MachRep: + + - It would let us use convention (b) above, and get easier + code generation for extending loads. + + - Less information required on foreign calls. + + - MachOp type would be simpler + +Cons: + + - More complexity + + - What is the MachRep for a VanillaReg? Currently it is + always wordRep, but now we have to decide whether it is + signed or unsigned. The same VanillaReg can thus have + different MachReps in different parts of the program. + + - Extra coercions cluttering up expressions. + +Currently for GHC, the foreign call point is moot, because we do our +own promotion of sub-word-sized values to word-sized values. The Int8 +type is represnted by an Int# which is kept sign-extended at all times +(this is slightly naughty, because we're making assumptions about the +C calling convention rather early on in the compiler). However, given +this, the cons outweigh the pros. + +-} + diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs index 314a9ad..eb226da 100644 --- a/compiler/cmm/CmmInfo.hs +++ b/compiler/cmm/CmmInfo.hs @@ -16,7 +16,6 @@ import Cmm import CmmUtils import CLabel -import MachOp import Bitmap import ClosureInfo @@ -26,6 +25,7 @@ import CgUtils import SMRep import Constants +import Outputable import StaticFlags import Unique import UniqSupply @@ -83,14 +83,15 @@ mkInfoTable uniq (CmmProc (CmmInfo _ _ info) entry_label arguments blocks) = CmmInfoTable (ProfilingInfo ty_prof cl_prof) type_tag type_info -> let info_label = entryLblToInfoLbl entry_label - ty_prof' = makeRelativeRefTo info_label ty_prof - cl_prof' = makeRelativeRefTo info_label cl_prof + ty_prof' = makeRelativeRefTo info_label ty_prof + cl_prof' = makeRelativeRefTo info_label cl_prof in case type_info of -- A function entry point. - FunInfo (ptrs, nptrs) srt fun_type fun_arity pap_bitmap slow_entry -> + FunInfo (ptrs, nptrs) srt fun_arity pap_bitmap slow_entry -> mkInfoTableAndCode info_label std_info fun_extra_bits entry_label arguments blocks where + fun_type = argDescrType pap_bitmap fun_extra_bits = [packHalfWordsCLit fun_type fun_arity] ++ case pap_bitmap of @@ -112,7 +113,6 @@ mkInfoTable uniq (CmmProc (CmmInfo _ _ info) entry_label arguments blocks) = std_info = mkStdInfoTable ty_prof' cl_prof' type_tag con_tag layout con_name = makeRelativeRefTo info_label descr layout = packHalfWordsCLit ptrs nptrs - -- A thunk. ThunkInfo (ptrs, nptrs) srt -> mkInfoTableAndCode info_label std_info srt_label entry_label @@ -150,7 +150,7 @@ mkInfoTableAndCode :: CLabel -> [CmmLit] -> [CmmLit] -> CLabel - -> CmmFormalsWithoutKinds + -> CmmFormals -> ListGraph CmmStmt -> [RawCmmTop] mkInfoTableAndCode info_lbl std_info extra_bits entry_lbl args blocks @@ -210,22 +210,19 @@ mkLiveness uniq live = -- does not fit in one word then (CmmLabel big_liveness, [data_lits], rET_BIG) -- fits in one word - else (mkWordCLit small_liveness, [], rET_SMALL) + else (mkWordCLit small_liveness, [], rET_SMALL) where mkBits [] = [] mkBits (reg:regs) = take sizeW bits ++ mkBits regs where sizeW = case reg of Nothing -> 1 - Just r -> (machRepByteWidth (localRegRep r) + wORD_SIZE - 1) + Just r -> (widthInBytes (typeWidth (localRegType r)) + wORD_SIZE - 1) `quot` wORD_SIZE -- number of words, rounded up bits = repeat $ is_non_ptr reg -- True <=> Non Ptr - is_non_ptr Nothing = True - is_non_ptr (Just reg) = - case localRegGCFollow reg of - GCKindNonPtr -> True - GCKindPtr -> False + is_non_ptr Nothing = True + is_non_ptr (Just reg) = not $ isGcPtrType (localRegType reg) bits :: [Bool] bits = mkBits live diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x index 0adb610..da5e4df 100644 --- a/compiler/cmm/CmmLex.x +++ b/compiler/cmm/CmmLex.x @@ -95,7 +95,8 @@ $white_no_nl+ ; "&&" { kw CmmT_BoolAnd } "||" { kw CmmT_BoolOr } - R@decimal { global_regN VanillaReg } + P@decimal { global_regN (\n -> VanillaReg n VGcPtr) } + R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) } F@decimal { global_regN FloatReg } D@decimal { global_regN DoubleReg } L@decimal { global_regN LongReg } @@ -159,6 +160,7 @@ data CmmToken | CmmT_bits64 | CmmT_float32 | CmmT_float64 + | CmmT_gcptr | CmmT_GlobalReg GlobalReg | CmmT_Name FastString | CmmT_String String @@ -236,7 +238,15 @@ reservedWordsFM = listToUFM $ ( "bits32", CmmT_bits32 ), ( "bits64", CmmT_bits64 ), ( "float32", CmmT_float32 ), - ( "float64", CmmT_float64 ) + ( "float64", CmmT_float64 ), +-- New forms + ( "b8", CmmT_bits8 ), + ( "b16", CmmT_bits16 ), + ( "b32", CmmT_bits32 ), + ( "b64", CmmT_bits64 ), + ( "f32", CmmT_float32 ), + ( "f64", CmmT_float64 ), + ( "gcptr", CmmT_gcptr ) ] tok_decimal span buf len diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 293c203..7c8f2b3 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -19,7 +19,6 @@ module CmmLint ( import BlockId import Cmm import CLabel -import MachOp import Maybe import Outputable import PprCmm @@ -32,17 +31,22 @@ import Control.Monad -- ----------------------------------------------------------------------------- -- Exported entry points: -cmmLint :: GenCmm d h (ListGraph CmmStmt) -> Maybe SDoc -cmmLint (Cmm tops) = runCmmLint $ mapM_ lintCmmTop tops +cmmLint :: (Outputable d, Outputable h) + => GenCmm d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLint (Cmm tops) = runCmmLint (mapM_ lintCmmTop) tops -cmmLintTop :: GenCmmTop d h (ListGraph CmmStmt) -> Maybe SDoc -cmmLintTop top = runCmmLint $ lintCmmTop top +cmmLintTop :: (Outputable d, Outputable h) + => GenCmmTop d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLintTop top = runCmmLint lintCmmTop top -runCmmLint :: CmmLint a -> Maybe SDoc -runCmmLint l = - case unCL l of - Left err -> Just (ptext (sLit "Cmm lint error:") $$ nest 2 err) - Right _ -> Nothing +runCmmLint :: Outputable a => (a -> CmmLint b) -> a -> Maybe SDoc +runCmmLint l p = + case unCL (l p) of + Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), + nest 2 err, + ptext $ sLit ("Program was:"), + nest 2 (ppr p)]) + Right _ -> Nothing lintCmmTop :: (GenCmmTop h i (ListGraph CmmStmt)) -> CmmLint () lintCmmTop (CmmProc _ lbl _ (ListGraph blocks)) @@ -64,40 +68,33 @@ lintCmmBlock labels (BasicBlock id stmts) -- Checks whether a CmmExpr is "type-correct", and check for obvious-looking -- byte/word mismatches. -lintCmmExpr :: CmmExpr -> CmmLint MachRep +lintCmmExpr :: CmmExpr -> CmmLint CmmType lintCmmExpr (CmmLoad expr rep) = do lintCmmExpr expr - when (machRepByteWidth rep >= wORD_SIZE) $ + when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ cmmCheckWordAddress expr return rep lintCmmExpr expr@(CmmMachOp op args) = do - mapM_ lintCmmExpr args - if map cmmExprRep args == machOpArgReps op - then cmmCheckMachOp op args - else cmmLintMachOpErr expr (map cmmExprRep args) (machOpArgReps op) + tys <- mapM lintCmmExpr args + if map (typeWidth . cmmExprType) args == machOpArgReps op + then cmmCheckMachOp op args tys + else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) lintCmmExpr (CmmRegOff reg offset) - = lintCmmExpr (CmmMachOp (MO_Add rep) + = lintCmmExpr (CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) - where rep = cmmRegRep reg -lintCmmExpr lit@(CmmLit (CmmInt _ rep)) - | isFloatingRep rep - = cmmLintErr (text "integer literal with floating MachRep: " <> ppr lit) + where rep = typeWidth (cmmRegType reg) lintCmmExpr expr = - return (cmmExprRep expr) + return (cmmExprType expr) -- Check for some common byte/word mismatches (eg. Sp + 1) -cmmCheckMachOp :: MachOp -> [CmmExpr] -> CmmLint MachRep -cmmCheckMachOp op args@[CmmReg reg, CmmLit (CmmInt i _)] +cmmCheckMachOp :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType +cmmCheckMachOp op args@[CmmReg reg, CmmLit (CmmInt i _)] _ | isWordOffsetReg reg && isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 = cmmLintDubiousWordOffset (CmmMachOp op args) -cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] - = cmmCheckMachOp op [reg, lit] -cmmCheckMachOp op@(MO_U_Conv from to) args - | isFloatingRep from || isFloatingRep to - = cmmLintErr (text "unsigned conversion from/to floating rep: " - <> ppr (CmmMachOp op args)) -cmmCheckMachOp op _args - = return (resultRepOfMachOp op) +cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys + = cmmCheckMachOp op [reg, lit] tys +cmmCheckMachOp op _ tys + = return (machOpResultType op tys) isWordOffsetReg :: CmmReg -> Bool isWordOffsetReg (CmmGlobal Sp) = True @@ -134,24 +131,26 @@ lintCmmStmt labels = lint lint (CmmComment {}) = return () lint stmt@(CmmAssign reg expr) = do erep <- lintCmmExpr expr - if (erep == cmmRegRep reg) + let reg_ty = cmmRegType reg + if (erep `cmmEqType_ignoring_ptrhood` reg_ty) then return () - else cmmLintAssignErr stmt + else cmmLintAssignErr stmt erep reg_ty lint (CmmStore l r) = do lintCmmExpr l lintCmmExpr r return () lint (CmmCall target _res args _ _) = - lintTarget target >> mapM_ (lintCmmExpr . kindlessCmm) args + lintTarget target >> mapM_ (lintCmmExpr . hintlessCmm) args lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr e >> checkCond e lint (CmmSwitch e branches) = do mapM_ checkTarget $ catMaybes branches erep <- lintCmmExpr e - if (erep == wordRep) + if (erep `cmmEqType_ignoring_ptrhood` bWord) then return () - else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e) - lint (CmmJump e args) = lintCmmExpr e >> mapM_ (lintCmmExpr . kindlessCmm) args - lint (CmmReturn ress) = mapM_ (lintCmmExpr . kindlessCmm) ress + else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <> + text " :: " <> ppr erep) + lint (CmmJump e args) = lintCmmExpr e >> mapM_ (lintCmmExpr . hintlessCmm) args + lint (CmmReturn ress) = mapM_ (lintCmmExpr . hintlessCmm) ress lint (CmmBranch id) = checkTarget id checkTarget id = if elemBlockSet id labels then return () else cmmLintErr (text "Branch to nonexistent id" <+> ppr id) @@ -188,16 +187,21 @@ addLintInfo info thing = CmmLint $ Left err -> Left (hang info 2 err) Right a -> Right a -cmmLintMachOpErr :: CmmExpr -> [MachRep] -> [MachRep] -> CmmLint a +cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a cmmLintMachOpErr expr argsRep opExpectsRep = cmmLintErr (text "in MachOp application: " $$ nest 2 (pprExpr expr) $$ (text "op is expecting: " <+> ppr opExpectsRep) $$ (text "arguments provide: " <+> ppr argsRep)) -cmmLintAssignErr :: CmmStmt -> CmmLint a -cmmLintAssignErr stmt = cmmLintErr (text "in assignment: " $$ - nest 2 (pprStmt stmt)) +cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a +cmmLintAssignErr stmt e_ty r_ty + = cmmLintErr (text "in assignment: " $$ + nest 2 (vcat [pprStmt stmt, + text "Reg ty:" <+> ppr r_ty, + text "Rhs ty:" <+> ppr e_ty])) + + cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a cmmLintDubiousWordOffset expr diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs index 078fcd3..93372fc 100644 --- a/compiler/cmm/CmmLive.hs +++ b/compiler/cmm/CmmLive.hs @@ -164,8 +164,8 @@ addKilled new_killed live = live `minusUniqSet` new_killed -------------------------------- -- Liveness of a CmmStmt -------------------------------- -cmmFormalsToLiveLocals :: CmmFormals -> [LocalReg] -cmmFormalsToLiveLocals formals = map kindlessCmm formals +cmmFormalsToLiveLocals :: HintedCmmFormals -> [LocalReg] +cmmFormalsToLiveLocals formals = map hintlessCmm formals cmmStmtLive :: BlockEntryLiveness -> CmmStmt -> CmmLivenessTransformer cmmStmtLive _ (CmmNop) = id @@ -180,7 +180,7 @@ cmmStmtLive _ (CmmStore expr1 expr2) = cmmExprLive expr2 . cmmExprLive expr1 cmmStmtLive _ (CmmCall target results arguments _ _) = target_liveness . - foldr ((.) . cmmExprLive) id (map kindlessCmm arguments) . + foldr ((.) . cmmExprLive) id (map hintlessCmm arguments) . addKilled (mkUniqSet $ cmmFormalsToLiveLocals results) where target_liveness = case target of @@ -198,9 +198,9 @@ cmmStmtLive other_live (CmmSwitch expr targets) = id (mapCatMaybes id targets)) cmmStmtLive _ (CmmJump expr params) = - const (cmmExprLive expr $ foldr ((.) . cmmExprLive) id (map kindlessCmm params) $ emptyUniqSet) + const (cmmExprLive expr $ foldr ((.) . cmmExprLive) id (map hintlessCmm params) $ emptyUniqSet) cmmStmtLive _ (CmmReturn params) = - const (foldr ((.) . cmmExprLive) id (map kindlessCmm params) $ emptyUniqSet) + const (foldr ((.) . cmmExprLive) id (map hintlessCmm params) $ emptyUniqSet) -------------------------------- -- Liveness of a CmmExpr diff --git a/compiler/cmm/CmmLiveZ.hs b/compiler/cmm/CmmLiveZ.hs index 4dc0874..b239ae3 100644 --- a/compiler/cmm/CmmLiveZ.hs +++ b/compiler/cmm/CmmLiveZ.hs @@ -63,15 +63,13 @@ middleLiveness m = middle m middle (MidStore addr rval) = gen addr . gen rval middle (MidUnsafeCall tgt ress args) = gen tgt . gen args . kill ress middle (MidAddToContext ra args) = gen ra . gen args - middle (CopyIn _ formals _) = kill formals - middle (CopyOut _ actuals) = gen actuals lastLiveness :: Last -> (BlockId -> CmmLive) -> CmmLive lastLiveness l env = last l - where last (LastReturn) = emptyUniqSet - last (LastJump e) = gen e $ emptyUniqSet - last (LastBranch id) = env id - last (LastCall tgt (Just k)) = gen tgt $ env k - last (LastCall tgt Nothing) = gen tgt $ emptyUniqSet - last (LastCondBranch e t f) = gen e $ unionUniqSets (env t) (env f) + where last (LastReturn _) = emptyUniqSet + last (LastJump e _) = gen e $ emptyUniqSet + last (LastBranch id) = env id + last (LastCall tgt (Just k) _) = gen tgt $ env k + last (LastCall tgt Nothing _) = gen tgt $ emptyUniqSet + last (LastCondBranch e t f) = gen e $ unionUniqSets (env t) (env f) last (LastSwitch e tbl) = gen e $ unionManyUniqSets $ map env (catMaybes tbl) diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 9873e29..e459a75 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -25,7 +25,6 @@ import Cmm import CmmExpr import CmmUtils import CLabel -import MachOp import StaticFlags import UniqFM @@ -100,7 +99,7 @@ cmmMiniInline blocks = map do_inline blocks cmmMiniInlineStmts :: UniqFM Int -> [CmmStmt] -> [CmmStmt] cmmMiniInlineStmts uses [] = [] -cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _ _)) expr) : stmts) +cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts) -- not used at all: just discard this assignment | Nothing <- lookupUFM uses u = cmmMiniInlineStmts uses stmts @@ -121,7 +120,7 @@ cmmMiniInlineStmts uses (stmt:stmts) -- Try to inline a temporary assignment. We can skip over assignments to -- other tempoararies, because we know that expressions aren't side-effecting -- and temporaries are single-assignment. -lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _ _)) rhs) : rest) +lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest) | u /= u' = case lookupUFM (countUses rhs) u of Just 1 -> Just (inlineStmt u expr stmt : rest) @@ -155,19 +154,21 @@ inlineStmt u a (CmmCall target regs es srt ret) = CmmCall (infn target) regs es' srt ret where infn (CmmCallee fn cconv) = CmmCallee fn cconv infn (CmmPrim p) = CmmPrim p - es' = [ (CmmKinded (inlineExpr u a e) hint) | (CmmKinded e hint) <- es ] + es' = [ (CmmHinted (inlineExpr u a e) hint) | (CmmHinted e hint) <- es ] inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d inlineStmt u a other_stmt = other_stmt inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr -inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _ _))) +inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _))) | u == u' = a | otherwise = e -inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep _)) off) - | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)] +inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off) + | u == u' = CmmMachOp (MO_Add width) [a, CmmLit (CmmInt (fromIntegral off) width)] | otherwise = e + where + width = typeWidth rep inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es) inlineExpr u a other_expr = other_expr @@ -192,17 +193,16 @@ cmmMachOpFold op arg@[CmmLit (CmmInt x rep)] -- "from" type, in order to truncate to the correct size. -- The final narrow/widen to the destination type -- is implicit in the CmmLit. - MO_S_Conv from to - | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to) - | otherwise -> CmmLit (CmmInt (narrowS from x) to) - MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to) + MO_SF_Conv from to -> CmmLit (CmmFloat (fromInteger x) to) + MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to) + MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to) _ -> panic "cmmMachOpFold: unknown unary op" -- Eliminate conversion NOPs -cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x -cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x +cmmMachOpFold (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = x +cmmMachOpFold (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = x -- Eliminate nested conversions where possible cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]] @@ -221,20 +221,18 @@ cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]] cmmMachOpFold (intconv signed1 rep1 rep3) [x] -- Nested narrowings: collapse | rep1 > rep2 && rep2 > rep3 -> - cmmMachOpFold (MO_U_Conv rep1 rep3) [x] + cmmMachOpFold (MO_UU_Conv rep1 rep3) [x] | otherwise -> CmmMachOp conv_outer args where - isIntConversion (MO_U_Conv rep1 rep2) - | not (isFloatingRep rep1) && not (isFloatingRep rep2) + isIntConversion (MO_UU_Conv rep1 rep2) = Just (rep1,rep2,False) - isIntConversion (MO_S_Conv rep1 rep2) - | not (isFloatingRep rep1) && not (isFloatingRep rep2) + isIntConversion (MO_SS_Conv rep1 rep2) = Just (rep1,rep2,True) isIntConversion _ = Nothing - intconv True = MO_S_Conv - intconv False = MO_U_Conv + intconv True = MO_SS_Conv + intconv False = MO_UU_Conv -- ToDo: a narrow of a load can be collapsed into a narrow load, right? -- but what if the architecture only supports word-sized loads, should @@ -244,18 +242,18 @@ cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)] = case mop of -- for comparisons: don't forget to narrow the arguments before -- comparing, since they might be out of range. - MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep) - MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep) + MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordWidth) + MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordWidth) - MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep) - MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep) - MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep) - MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep) + MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordWidth) + MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordWidth) + MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordWidth) + MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordWidth) - MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep) - MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep) - MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep) - MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep) + MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordWidth) + MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordWidth) + MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordWidth) + MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordWidth) MO_Add r -> CmmLit (CmmInt (x + y) r) MO_Sub r -> CmmLit (CmmInt (x - y) r) @@ -350,12 +348,13 @@ cmmMachOpFold cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)] -- then we can do the comparison at the smaller size = cmmMachOpFold narrow_cmp [x, CmmLit (CmmInt i rep)] where - maybe_conversion (MO_U_Conv from to) + maybe_conversion (MO_UU_Conv from to) | to > from = Just (from, False, narrowU) - maybe_conversion (MO_S_Conv from to) - | to > from, not (isFloatingRep from) + maybe_conversion (MO_SS_Conv from to) + | to > from = Just (from, True, narrowS) + -- don't attempt to apply this optimisation when the source -- is a float; see #1916 maybe_conversion _ = Nothing @@ -397,10 +396,10 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))] MO_Eq r | Just x' <- maybeInvertCmmExpr x -> x' MO_U_Gt r | isComparisonExpr x -> x MO_S_Gt r | isComparisonExpr x -> x - MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) - MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) + MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) + MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) MO_U_Le r | Just x' <- maybeInvertCmmExpr x -> x' MO_S_Le r | Just x' <- maybeInvertCmmExpr x -> x' other -> CmmMachOp mop args @@ -416,10 +415,10 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))] MO_Eq r | isComparisonExpr x -> x MO_U_Lt r | Just x' <- maybeInvertCmmExpr x -> x' MO_S_Lt r | Just x' <- maybeInvertCmmExpr x -> x' - MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) - MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) + MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) + MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) MO_U_Ge r | isComparisonExpr x -> x MO_S_Ge r | isComparisonExpr x -> x other -> CmmMachOp mop args @@ -451,7 +450,7 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))] -- x1 = x >> word_size-1 (unsigned) -- return = (x + x1) >>= log2(divisor) let - bits = fromIntegral (machRepBitWidth rep) - 1 + bits = fromIntegral (widthInBits rep) - 1 shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)] x2 = if p == 1 then x1 else @@ -503,18 +502,18 @@ exactLog2 x_ -- ----------------------------------------------------------------------------- -- widening / narrowing -narrowU :: MachRep -> Integer -> Integer -narrowU I8 x = fromIntegral (fromIntegral x :: Word8) -narrowU I16 x = fromIntegral (fromIntegral x :: Word16) -narrowU I32 x = fromIntegral (fromIntegral x :: Word32) -narrowU I64 x = fromIntegral (fromIntegral x :: Word64) +narrowU :: Width -> Integer -> Integer +narrowU W8 x = fromIntegral (fromIntegral x :: Word8) +narrowU W16 x = fromIntegral (fromIntegral x :: Word16) +narrowU W32 x = fromIntegral (fromIntegral x :: Word32) +narrowU W64 x = fromIntegral (fromIntegral x :: Word64) narrowU _ _ = panic "narrowTo" -narrowS :: MachRep -> Integer -> Integer -narrowS I8 x = fromIntegral (fromIntegral x :: Int8) -narrowS I16 x = fromIntegral (fromIntegral x :: Int16) -narrowS I32 x = fromIntegral (fromIntegral x :: Int32) -narrowS I64 x = fromIntegral (fromIntegral x :: Int64) +narrowS :: Width -> Integer -> Integer +narrowS W8 x = fromIntegral (fromIntegral x :: Int8) +narrowS W16 x = fromIntegral (fromIntegral x :: Int16) +narrowS W32 x = fromIntegral (fromIntegral x :: Int32) +narrowS W64 x = fromIntegral (fromIntegral x :: Int64) narrowS _ _ = panic "narrowTo" -- ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index 9d83e2f..9382994 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -36,7 +36,6 @@ import PprCmm import CmmUtils import CmmLex import CLabel -import MachOp import SMRep import Lexer @@ -127,6 +126,7 @@ import System.Exit 'bits64' { L _ (CmmT_bits64) } 'float32' { L _ (CmmT_float32) } 'float64' { L _ (CmmT_float64) } + 'gcptr' { L _ (CmmT_gcptr) } GLOBALREG { L _ (CmmT_GlobalReg $$) } NAME { L _ (CmmT_Name $$) } @@ -191,12 +191,12 @@ static :: { ExtFCode [CmmStatic] } | type expr ';' { do e <- $2; return [CmmStaticLit (getLit e)] } | type ';' { return [CmmUninitialised - (machRepByteWidth $1)] } + (widthInBytes (typeWidth $1))] } | 'bits8' '[' ']' STRING ';' { return [mkString $4] } | 'bits8' '[' INT ']' ';' { return [CmmUninitialised (fromIntegral $3)] } | typenot8 '[' INT ']' ';' { return [CmmUninitialised - (machRepByteWidth $1 * + (widthInBytes (typeWidth $1) * fromIntegral $3)] } | 'align' INT ';' { return [CmmAlign (fromIntegral $2)] } | 'CLOSURE' '(' NAME lits ')' @@ -214,7 +214,7 @@ lits :: { [ExtFCode CmmExpr] } cmmproc :: { ExtCode } -- TODO: add real SRT/info tables to parsed Cmm - : info maybe_formals_without_kinds maybe_gc_block maybe_frame '{' body '}' + : info maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' { do ((entry_ret_label, info, live, formals, gc_block, frame), stmts) <- getCgStmtsEC' $ loopDecls $ do { (entry_ret_label, info, live) <- $1; @@ -226,12 +226,12 @@ cmmproc :: { ExtCode } blks <- code (cgStmtsToBlocks stmts) code (emitInfoTableAndCode entry_ret_label (CmmInfo gc_block frame info) formals blks) } - | info maybe_formals_without_kinds ';' + | info maybe_formals_without_hints ';' { do (entry_ret_label, info, live) <- $1; formals <- sequence $2; code (emitInfoTableAndCode entry_ret_label (CmmInfo Nothing Nothing info) formals []) } - | NAME maybe_formals_without_kinds maybe_gc_block maybe_frame '{' body '}' + | NAME maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' { do ((formals, gc_block, frame), stmts) <- getCgStmtsEC' $ loopDecls $ do { formals <- sequence $2; @@ -256,8 +256,9 @@ info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) } { do prof <- profilingInfo $11 $13 return (mkRtsEntryLabelFS $3, CmmInfoTable prof (fromIntegral $9) - (FunInfo (fromIntegral $5, fromIntegral $7) NoC_SRT (fromIntegral $15) 0 - (ArgSpec 0) + (FunInfo (fromIntegral $5, fromIntegral $7) NoC_SRT + 0 -- Arity zero + (ArgSpec (fromIntegral $15)) zeroCLit), []) } -- we leave most of the fields zero here. This is only used @@ -269,8 +270,8 @@ info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) } { do prof <- profilingInfo $11 $13 return (mkRtsEntryLabelFS $3, CmmInfoTable prof (fromIntegral $9) - (FunInfo (fromIntegral $5, fromIntegral $7) NoC_SRT (fromIntegral $15) (fromIntegral $17) - (ArgSpec 0) + (FunInfo (fromIntegral $5, fromIntegral $7) NoC_SRT (fromIntegral $17) + (ArgSpec (fromIntegral $15)) zeroCLit), []) } -- we leave most of the fields zero here. This is only used @@ -303,7 +304,7 @@ info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) } (ContInfo [] NoC_SRT), []) } - | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals_without_kinds0 ')' + | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals_without_hints0 ')' -- closure type, live regs { do live <- sequence (map (liftM Just) $7) return (mkRtsRetLabelFS $3, @@ -317,10 +318,7 @@ body :: { ExtCode } | stmt body { do $1; $2 } decl :: { ExtCode } - : type names ';' { mapM_ (newLocal defaultKind $1) $2 } - | STRING type names ';' {% do k <- parseGCKind $1; - return $ mapM_ (newLocal k $2) $3 } - + : type names ';' { mapM_ (newLocal $1) $2 } | 'import' names ';' { mapM_ newImport $2 } | 'export' names ';' { return () } -- ignore exports @@ -345,9 +343,9 @@ stmt :: { ExtCode } -- we tweak the syntax to avoid the conflict. The later -- option is taken here because the other way would require -- multiple levels of expanding and get unwieldy. - | maybe_results 'foreign' STRING expr '(' cmm_kind_exprs0 ')' safety vols opt_never_returns ';' + | maybe_results 'foreign' STRING expr '(' cmm_hint_exprs0 ')' safety vols opt_never_returns ';' {% foreignCall $3 $1 $4 $6 $9 $8 $10 } - | maybe_results 'prim' '%' NAME '(' cmm_kind_exprs0 ')' safety vols ';' + | maybe_results 'prim' '%' NAME '(' cmm_hint_exprs0 ')' safety vols ';' {% primCall $1 $4 $6 $9 $8 } -- stmt-level macros, stealing syntax from ordinary C-- function calls. -- Perhaps we ought to use the %%-form? @@ -446,8 +444,8 @@ expr :: { ExtFCode CmmExpr } | expr0 { $1 } expr0 :: { ExtFCode CmmExpr } - : INT maybe_ty { return (CmmLit (CmmInt $1 $2)) } - | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 $2)) } + : INT maybe_ty { return (CmmLit (CmmInt $1 (typeWidth $2))) } + | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 (typeWidth $2))) } | STRING { do s <- code (mkStringCLit $1); return (CmmLit s) } | reg { $1 } @@ -457,27 +455,27 @@ expr0 :: { ExtFCode CmmExpr } -- leaving out the type of a literal gives you the native word size in C-- -maybe_ty :: { MachRep } - : {- empty -} { wordRep } +maybe_ty :: { CmmType } + : {- empty -} { bWord } | '::' type { $2 } -maybe_actuals :: { [ExtFCode CmmActual] } +maybe_actuals :: { [ExtFCode HintedCmmActual] } : {- empty -} { [] } - | '(' cmm_kind_exprs0 ')' { $2 } + | '(' cmm_hint_exprs0 ')' { $2 } -cmm_kind_exprs0 :: { [ExtFCode CmmActual] } +cmm_hint_exprs0 :: { [ExtFCode HintedCmmActual] } : {- empty -} { [] } - | cmm_kind_exprs { $1 } + | cmm_hint_exprs { $1 } -cmm_kind_exprs :: { [ExtFCode CmmActual] } - : cmm_kind_expr { [$1] } - | cmm_kind_expr ',' cmm_kind_exprs { $1 : $3 } +cmm_hint_exprs :: { [ExtFCode HintedCmmActual] } + : cmm_hint_expr { [$1] } + | cmm_hint_expr ',' cmm_hint_exprs { $1 : $3 } -cmm_kind_expr :: { ExtFCode CmmActual } - : expr { do e <- $1; return (CmmKinded e (inferCmmKind e)) } - | expr STRING {% do h <- parseCmmKind $2; +cmm_hint_expr :: { ExtFCode HintedCmmActual } + : expr { do e <- $1; return (CmmHinted e (inferCmmHint e)) } + | expr STRING {% do h <- parseCmmHint $2; return $ do - e <- $1; return (CmmKinded e h) } + e <- $1; return (CmmHinted e h) } exprs0 :: { [ExtFCode CmmExpr] } : {- empty -} { [] } @@ -491,20 +489,20 @@ reg :: { ExtFCode CmmExpr } : NAME { lookupName $1 } | GLOBALREG { return (CmmReg (CmmGlobal $1)) } -maybe_results :: { [ExtFCode CmmFormal] } +maybe_results :: { [ExtFCode HintedCmmFormal] } : {- empty -} { [] } | '(' cmm_formals ')' '=' { $2 } -cmm_formals :: { [ExtFCode CmmFormal] } +cmm_formals :: { [ExtFCode HintedCmmFormal] } : cmm_formal { [$1] } | cmm_formal ',' { [$1] } | cmm_formal ',' cmm_formals { $1 : $3 } -cmm_formal :: { ExtFCode CmmFormal } - : local_lreg { do e <- $1; return (CmmKinded e (inferCmmKind (CmmReg (CmmLocal e)))) } - | STRING local_lreg {% do h <- parseCmmKind $1; +cmm_formal :: { ExtFCode HintedCmmFormal } + : local_lreg { do e <- $1; return (CmmHinted e (inferCmmHint (CmmReg (CmmLocal e)))) } + | STRING local_lreg {% do h <- parseCmmHint $1; return $ do - e <- $2; return (CmmKinded e h) } + e <- $2; return (CmmHinted e h) } local_lreg :: { ExtFCode LocalReg } : NAME { do e <- lookupName $1; @@ -521,23 +519,21 @@ lreg :: { ExtFCode CmmReg } other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") } | GLOBALREG { return (CmmGlobal $1) } -maybe_formals_without_kinds :: { [ExtFCode LocalReg] } +maybe_formals_without_hints :: { [ExtFCode LocalReg] } : {- empty -} { [] } - | '(' formals_without_kinds0 ')' { $2 } + | '(' formals_without_hints0 ')' { $2 } -formals_without_kinds0 :: { [ExtFCode LocalReg] } +formals_without_hints0 :: { [ExtFCode LocalReg] } : {- empty -} { [] } - | formals_without_kinds { $1 } + | formals_without_hints { $1 } -formals_without_kinds :: { [ExtFCode LocalReg] } - : formal_without_kind ',' { [$1] } - | formal_without_kind { [$1] } - | formal_without_kind ',' formals_without_kinds { $1 : $3 } +formals_without_hints :: { [ExtFCode LocalReg] } + : formal_without_hint ',' { [$1] } + | formal_without_hint { [$1] } + | formal_without_hint ',' formals_without_hints { $1 : $3 } -formal_without_kind :: { ExtFCode LocalReg } - : type NAME { newLocal defaultKind $1 $2 } - | STRING type NAME {% do k <- parseGCKind $1; - return $ newLocal k $2 $3 } +formal_without_hint :: { ExtFCode LocalReg } + : type NAME { newLocal $1 $2 } maybe_frame :: { ExtFCode (Maybe UpdateFrame) } : {- empty -} { return Nothing } @@ -550,16 +546,17 @@ maybe_gc_block :: { ExtFCode (Maybe BlockId) } | 'goto' NAME { do l <- lookupLabel $2; return (Just l) } -type :: { MachRep } - : 'bits8' { I8 } +type :: { CmmType } + : 'bits8' { b8 } | typenot8 { $1 } -typenot8 :: { MachRep } - : 'bits16' { I16 } - | 'bits32' { I32 } - | 'bits64' { I64 } - | 'float32' { F32 } - | 'float64' { F64 } +typenot8 :: { CmmType } + : 'bits16' { b16 } + | 'bits32' { b32 } + | 'bits64' { b64 } + | 'float32' { f32 } + | 'float64' { f64 } + | 'gcptr' { gcWord } { section :: String -> Section section "text" = Text @@ -576,17 +573,17 @@ mkString s = CmmString (map (fromIntegral.ord) s) -- argument. We assume that this is correct: for MachOps that don't have -- symmetrical args (e.g. shift ops), the first arg determines the type of -- the op. -mkMachOp :: (MachRep -> MachOp) -> [ExtFCode CmmExpr] -> ExtFCode CmmExpr +mkMachOp :: (Width -> MachOp) -> [ExtFCode CmmExpr] -> ExtFCode CmmExpr mkMachOp fn args = do arg_exprs <- sequence args - return (CmmMachOp (fn (cmmExprRep (head arg_exprs))) arg_exprs) + return (CmmMachOp (fn (typeWidth (cmmExprType (head arg_exprs)))) arg_exprs) getLit :: CmmExpr -> CmmLit getLit (CmmLit l) = l getLit (CmmMachOp (MO_S_Neg _) [CmmLit (CmmInt i r)]) = CmmInt (negate i) r getLit _ = panic "invalid literal" -- TODO messy failure -nameToMachOp :: FastString -> P (MachRep -> MachOp) +nameToMachOp :: FastString -> P (Width -> MachOp) nameToMachOp name = case lookupUFM machOps name of Nothing -> fail ("unknown primitive " ++ unpackFS name) @@ -656,24 +653,27 @@ machOps = listToUFM $ ( "shrl", MO_U_Shr ), ( "shra", MO_S_Shr ), - ( "lobits8", flip MO_U_Conv I8 ), - ( "lobits16", flip MO_U_Conv I16 ), - ( "lobits32", flip MO_U_Conv I32 ), - ( "lobits64", flip MO_U_Conv I64 ), - ( "sx16", flip MO_S_Conv I16 ), - ( "sx32", flip MO_S_Conv I32 ), - ( "sx64", flip MO_S_Conv I64 ), - ( "zx16", flip MO_U_Conv I16 ), - ( "zx32", flip MO_U_Conv I32 ), - ( "zx64", flip MO_U_Conv I64 ), - ( "f2f32", flip MO_S_Conv F32 ), -- TODO; rounding mode - ( "f2f64", flip MO_S_Conv F64 ), -- TODO; rounding mode - ( "f2i8", flip MO_S_Conv I8 ), - ( "f2i16", flip MO_S_Conv I16 ), - ( "f2i32", flip MO_S_Conv I32 ), - ( "f2i64", flip MO_S_Conv I64 ), - ( "i2f32", flip MO_S_Conv F32 ), - ( "i2f64", flip MO_S_Conv F64 ) + ( "lobits8", flip MO_UU_Conv W8 ), + ( "lobits16", flip MO_UU_Conv W16 ), + ( "lobits32", flip MO_UU_Conv W32 ), + ( "lobits64", flip MO_UU_Conv W64 ), + + ( "zx16", flip MO_UU_Conv W16 ), + ( "zx32", flip MO_UU_Conv W32 ), + ( "zx64", flip MO_UU_Conv W64 ), + + ( "sx16", flip MO_SS_Conv W16 ), + ( "sx32", flip MO_SS_Conv W32 ), + ( "sx64", flip MO_SS_Conv W64 ), + + ( "f2f32", flip MO_FF_Conv W32 ), -- TODO; rounding mode + ( "f2f64", flip MO_FF_Conv W64 ), -- TODO; rounding mode + ( "f2i8", flip MO_FS_Conv W8 ), + ( "f2i16", flip MO_FS_Conv W16 ), + ( "f2i32", flip MO_FS_Conv W32 ), + ( "f2i64", flip MO_FS_Conv W64 ), + ( "i2f32", flip MO_SF_Conv W32 ), + ( "i2f64", flip MO_SF_Conv W64 ) ] callishMachOps = listToUFM $ @@ -687,32 +687,25 @@ parseSafety "safe" = return (CmmSafe NoC_SRT) parseSafety "unsafe" = return CmmUnsafe parseSafety str = fail ("unrecognised safety: " ++ str) -parseCmmKind :: String -> P CmmKind -parseCmmKind "ptr" = return PtrHint -parseCmmKind "signed" = return SignedHint -parseCmmKind "float" = return FloatHint -parseCmmKind str = fail ("unrecognised hint: " ++ str) - -parseGCKind :: String -> P GCKind -parseGCKind "ptr" = return GCKindPtr -parseGCKind str = fail ("unrecognized kin: " ++ str) - -defaultKind :: GCKind -defaultKind = GCKindNonPtr +parseCmmHint :: String -> P ForeignHint +parseCmmHint "ptr" = return AddrHint +parseCmmHint "signed" = return SignedHint +parseCmmHint str = fail ("unrecognised hint: " ++ str) -- labels are always pointers, so we might as well infer the hint -inferCmmKind :: CmmExpr -> CmmKind -inferCmmKind (CmmLit (CmmLabel _)) = PtrHint -inferCmmKind (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = PtrHint -inferCmmKind _ = NoHint - -isPtrGlobalReg Sp = True -isPtrGlobalReg SpLim = True -isPtrGlobalReg Hp = True -isPtrGlobalReg HpLim = True -isPtrGlobalReg CurrentTSO = True -isPtrGlobalReg CurrentNursery = True -isPtrGlobalReg _ = False +inferCmmHint :: CmmExpr -> ForeignHint +inferCmmHint (CmmLit (CmmLabel _)) = AddrHint +inferCmmHint (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = AddrHint +inferCmmHint _ = NoHint + +isPtrGlobalReg Sp = True +isPtrGlobalReg SpLim = True +isPtrGlobalReg Hp = True +isPtrGlobalReg HpLim = True +isPtrGlobalReg CurrentTSO = True +isPtrGlobalReg CurrentNursery = True +isPtrGlobalReg (VanillaReg _ VGcPtr) = True +isPtrGlobalReg _ = False happyError :: P a happyError = srcParseFail @@ -819,10 +812,10 @@ addVarDecl var expr = EC $ \e s -> return ((var, Var expr):s, ()) addLabel :: FastString -> BlockId -> ExtCode addLabel name block_id = EC $ \e s -> return ((name, Label block_id):s, ()) -newLocal :: GCKind -> MachRep -> FastString -> ExtFCode LocalReg -newLocal kind ty name = do +newLocal :: CmmType -> FastString -> ExtFCode LocalReg +newLocal ty name = do u <- code newUnique - let reg = LocalReg u ty kind + let reg = LocalReg u ty addVarDecl name (CmmReg (CmmLocal reg)) return reg @@ -895,9 +888,9 @@ staticClosure cl_label info payload foreignCall :: String - -> [ExtFCode CmmFormal] + -> [ExtFCode HintedCmmFormal] -> ExtFCode CmmExpr - -> [ExtFCode CmmActual] + -> [ExtFCode HintedCmmActual] -> Maybe [GlobalReg] -> CmmSafety -> CmmReturnInfo @@ -927,22 +920,22 @@ foreignCall conv_string results_code expr_code args_code vols safety ret (CmmCallee expr' convention) args vols NoC_SRT ret) where unused = panic "not used by emitForeignCall'" -adjCallTarget :: CCallConv -> CmmExpr -> [CmmKinded CmmExpr] -> CmmExpr +adjCallTarget :: CCallConv -> CmmExpr -> [CmmHinted CmmExpr] -> CmmExpr #ifdef mingw32_TARGET_OS -- On Windows, we have to add the '@N' suffix to the label when making -- a call with the stdcall calling convention. adjCallTarget StdCallConv (CmmLit (CmmLabel lbl)) args = CmmLit (CmmLabel (addLabelSize lbl (sum (map size args)))) - where size (CmmKinded e _) = max wORD_SIZE (machRepByteWidth (cmmExprRep e)) + where size (CmmHinted e _) = max wORD_SIZE (widthInBytes (typeWidth (cmmExprType e))) -- c.f. CgForeignCall.emitForeignCall #endif adjCallTarget _ expr _ = expr primCall - :: [ExtFCode CmmFormal] + :: [ExtFCode HintedCmmFormal] -> FastString - -> [ExtFCode CmmActual] + -> [ExtFCode HintedCmmActual] -> Maybe [GlobalReg] -> CmmSafety -> P ExtCode @@ -961,7 +954,7 @@ primCall results_code name args_code vols safety (CmmPrim p) args vols NoC_SRT CmmMayReturn) where unused = panic "not used by emitForeignCall'" -doStore :: MachRep -> ExtFCode CmmExpr -> ExtFCode CmmExpr -> ExtCode +doStore :: CmmType -> ExtFCode CmmExpr -> ExtFCode CmmExpr -> ExtCode doStore rep addr_code val_code = do addr <- addr_code val <- val_code @@ -970,9 +963,11 @@ doStore rep addr_code val_code -- mismatch to be flagged by cmm-lint. If we don't do this, then -- the store will happen at the wrong type, and the error will not -- be noticed. + let val_width = typeWidth (cmmExprType val) + rep_width = typeWidth rep let coerce_val - | cmmExprRep val /= rep = CmmMachOp (MO_U_Conv rep rep) [val] - | otherwise = val + | val_width /= rep_width = CmmMachOp (MO_UU_Conv val_width rep_width) [val] + | otherwise = val stmtEC (CmmStore addr coerce_val) -- Return an unboxed tuple. @@ -982,7 +977,7 @@ emitRetUT args = do (sp, stmts) <- pushUnboxedTuple 0 args emitStmts stmts when (sp /= 0) $ stmtC (CmmAssign spReg (cmmRegOffW spReg (-sp))) - stmtC (CmmJump (entryCode (CmmLoad (cmmRegOffW spReg sp) wordRep)) []) + stmtC (CmmJump (entryCode (CmmLoad (cmmRegOffW spReg sp) bWord)) []) -- TODO (when using CPS): emitStmt (CmmReturn (map snd args)) -- ----------------------------------------------------------------------------- @@ -1088,9 +1083,9 @@ doSwitch mb_range scrut arms deflt initEnv :: Env initEnv = listToUFM [ ( fsLit "SIZEOF_StgHeader", - Var (CmmLit (CmmInt (fromIntegral (fixedHdrSize * wORD_SIZE)) wordRep) )), + Var (CmmLit (CmmInt (fromIntegral (fixedHdrSize * wORD_SIZE)) wordWidth) )), ( fsLit "SIZEOF_StgInfoTable", - Var (CmmLit (CmmInt (fromIntegral stdInfoTableSizeB) wordRep) )) + Var (CmmLit (CmmInt (fromIntegral stdInfoTableSizeB) wordWidth) )) ] parseCmmFile :: DynFlags -> FilePath -> IO (Messages, Maybe Cmm) diff --git a/compiler/cmm/CmmProcPointZ.hs b/compiler/cmm/CmmProcPointZ.hs index 82d3e26..cedb9ef 100644 --- a/compiler/cmm/CmmProcPointZ.hs +++ b/compiler/cmm/CmmProcPointZ.hs @@ -1,16 +1,19 @@ module CmmProcPointZ ( callProcPoints, minimalProcPointSet - , addProcPointProtocols - , splitAtProcPoints + , addProcPointProtocols, splitAtProcPoints, procPointAnalysis + , liveSlotAnal, cafAnal, layout, manifestSP, igraph, areaBuilder ) where +import Constants +import qualified Prelude as P import Prelude hiding (zip, unzip, last) +import Util (sortLe) import BlockId +import Bitmap import CLabel ---import ClosureInfo import Cmm hiding (blockId) import CmmExpr import CmmContFlowOpt @@ -18,13 +21,17 @@ import CmmLiveZ import CmmTx import DFMonad import FiniteMap -import MachOp (MachHint(NoHint)) +import IdInfo +import List (sortBy) import Maybes -import MkZipCfgCmm hiding (CmmBlock, CmmGraph) +import MkZipCfgCmm hiding (CmmBlock, CmmGraph, CmmTopZ) import Monad import Name import Outputable import Panic +import SMRep (rET_SMALL) +import StgCmmClosure +import StgCmmUtils import UniqFM import UniqSet import UniqSupply @@ -66,7 +73,7 @@ be the start of a new procedure to which the continuations can jump: You might think then that a criterion to make a node a proc point is that it is directly reached by two distinct proc points. (Note -[Direct reachability].) But this criterion is a bit two simple; for +[Direct reachability].) But this criterion is a bit too simple; for example, 'return x' is also reached by two proc points, yet there is no point in pulling it out of k_join. A good criterion would be to say that a node should be made a proc point if it is reached by a set @@ -123,7 +130,7 @@ forward = ForwardTransfers first middle last exit where first ProcPoint id = ReachedBy $ unitUniqSet id first x _ = x middle x _ = x - last _ (LastCall _ (Just id)) = LastOutFacts [(id, ProcPoint)] + last _ (LastCall _ (Just id) _) = LastOutFacts [(id, ProcPoint)] last x l = LastOutFacts $ map (\id -> (id, x)) (succs l) exit x = x @@ -136,32 +143,31 @@ minimalProcPointSet :: ProcPointSet -> CmmGraph -> FuelMonad ProcPointSet callProcPoints g = fold_blocks add entryPoint g where entryPoint = unitUniqSet (lg_entry g) add b set = case last $ unzip b of - LastOther (LastCall _ (Just k)) -> extendBlockSet set k + LastOther (LastCall _ (Just k) _) -> extendBlockSet set k _ -> set minimalProcPointSet callProcPoints g = extendPPSet g (postorder_dfs g) callProcPoints type PPFix = FuelMonad (ForwardFixedPoint Middle Last Status ()) -procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelMonad PPFix +procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelMonad (BlockEnv Status) procPointAnalysis procPoints g = let addPP env id = extendBlockEnv env id ProcPoint initProcPoints = foldl addPP emptyBlockEnv (uniqSetToList procPoints) - in runDFM lattice $ -- init with old facts and solve - return $ (zdfSolveFrom initProcPoints "proc-point reachability" lattice + in liftM zdfFpFacts $ + (zdfSolveFrom initProcPoints "proc-point reachability" lattice forward (fact_bot lattice) $ graphOfLGraph g :: PPFix) extendPPSet :: CmmGraph -> [CmmBlock] -> ProcPointSet -> FuelMonad ProcPointSet extendPPSet g blocks procPoints = - do res <- procPointAnalysis procPoints g - env <- liftM zdfFpFacts res + do env <- procPointAnalysis procPoints g let add block pps = let id = blockId block in case lookupBlockEnv env id of Just ProcPoint -> extendBlockSet pps id _ -> pps procPoints' = fold_blocks add emptyBlockSet g newPoint = listToMaybe (mapMaybe ppSuccessor blocks) - ppSuccessor b@(Block id _) = + ppSuccessor b@(Block id _ _) = let nreached id = case lookupBlockEnv env id `orElse` panic "no ppt" of ProcPoint -> 1 ReachedBy ps -> sizeUniqSet ps @@ -178,8 +184,6 @@ extendPPSet g blocks procPoints = Nothing -> return procPoints' - - ------------------------------------------------------------------------ -- Computing Proc-Point Protocols -- ------------------------------------------------------------------------ @@ -243,12 +247,13 @@ addProcPointProtocols callPPs procPoints g = do liveness <- cmmLivenessZ g (protos, g') <- return $ optimize_calls liveness g blocks'' <- add_CopyOuts protos procPoints g' - return $ LGraph (lg_entry g) blocks'' + return $ LGraph (lg_entry g) (lg_argoffset g) blocks'' where optimize_calls liveness g = -- see Note [Separate Adams optimization] let (protos, blocks') = fold_blocks maybe_add_call (init_protocols, emptyBlockEnv) g protos' = add_unassigned liveness procPoints protos - g' = LGraph (lg_entry g) $ add_CopyIns callPPs protos' blocks' + g' = LGraph (lg_entry g) (lg_argoffset g) $ + add_CopyIns callPPs protos' blocks' in (protos', runTx removeUnreachableBlocksZ g') maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock) -> (BlockEnv Protocol, BlockEnv CmmBlock) @@ -257,11 +262,10 @@ addProcPointProtocols callPPs procPoints g = -- redirect the call (cf 'newblock') and set the protocol if necessary maybe_add_call block (protos, blocks) = case goto_end $ unzip block of - (h, LastOther (LastCall tgt (Just k))) + (h, LastOther (LastCall tgt (Just k) s)) | Just proto <- lookupBlockEnv protos k, - Just pee <- jumpsToProcPoint k - -> let newblock = - zipht h (tailOfLast (LastCall tgt (Just pee))) + Just pee <- branchesToProcPoint k + -> let newblock = zipht h (tailOfLast (LastCall tgt (Just pee) s)) changed_blocks = insertBlock newblock blocks unchanged_blocks = insertBlock block blocks in case lookupBlockEnv protos pee of @@ -271,21 +275,20 @@ addProcPointProtocols callPPs procPoints g = else (protos, unchanged_blocks) _ -> (protos, insertBlock block blocks) - jumpsToProcPoint :: BlockId -> Maybe BlockId - -- ^ Tells whether the named block is just a jump to a proc point - jumpsToProcPoint id = - let (Block _ t) = lookupBlockEnv (lg_blocks g) id `orElse` - panic "jump out of graph" + branchesToProcPoint :: BlockId -> Maybe BlockId + -- ^ Tells whether the named block is just a branch to a proc point + branchesToProcPoint id = + let (Block _ _ t) = lookupBlockEnv (lg_blocks g) id `orElse` + panic "branch out of graph" in case t of - ZTail (CopyIn {}) (ZLast (LastOther (LastBranch pee))) + ZLast (LastOther (LastBranch pee)) | elemBlockSet pee procPoints -> Just pee _ -> Nothing init_protocols = fold_blocks maybe_add_proto emptyBlockEnv g maybe_add_proto :: CmmBlock -> BlockEnv Protocol -> BlockEnv Protocol - maybe_add_proto (Block id (ZTail (CopyIn c fs _srt) _)) env = - extendBlockEnv env id (Protocol c fs $ toArea id fs) + --maybe_add_proto (Block id (ZTail (CopyIn c _ fs _srt) _)) env = + -- extendBlockEnv env id (Protocol c fs $ toArea id fs) maybe_add_proto _ env = env - toArea id fs = mkCallArea id fs $ Just fs -- | For now, following a suggestion by Ben Lippmeier, we pass all -- live variables as arguments, hoping that a clever register @@ -297,17 +300,17 @@ add_unassigned = pass_live_vars_as_args pass_live_vars_as_args :: BlockEnv CmmLive -> ProcPointSet -> BlockEnv Protocol -> BlockEnv Protocol -pass_live_vars_as_args liveness procPoints protos = protos' +pass_live_vars_as_args _liveness procPoints protos = protos' where protos' = foldUniqSet addLiveVars protos procPoints addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol addLiveVars id protos = case lookupBlockEnv protos id of Just _ -> protos - Nothing -> let live = lookupBlockEnv liveness id `orElse` - panic ("no liveness at block " ++ show id) - formals = map (\x -> CmmKinded x NoHint) $ uniqSetToList live - prot = Protocol ConventionPrivate formals $ - mkCallArea id formals $ Just formals + Nothing -> let live = emptyBlockEnv + --lookupBlockEnv _liveness id `orElse` + --panic ("no liveness at block " ++ show id) + formals = uniqSetToList live + prot = Protocol Private formals $ CallArea $ Young id in extendBlockEnv protos id prot @@ -315,131 +318,597 @@ pass_live_vars_as_args liveness procPoints protos = protos' -- instruction. (Proc-points that arise from calls already have their copy-in instructions.) add_CopyIns :: ProcPointSet -> BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock -add_CopyIns callPPs protos = mapUFM maybe_insert_CopyIns +add_CopyIns callPPs protos blocks = mapUFM maybe_insert_CopyIns blocks where maybe_insert_CopyIns :: CmmBlock -> CmmBlock - maybe_insert_CopyIns b@(Block id t) | not $ elementOfUniqSet id callPPs = - case lookupBlockEnv protos id of - Nothing -> b - Just (Protocol c fs area) -> - case t of - --ZTail (CopyIn c' fs' _) _ -> - -- if c == c' && fs == fs' then b - -- else panic ("mismatched protocols for block " ++ show id) - _ -> Block id -- (ZTail (CopyIn c fs NoC_SRT) t) - $ foldr ZTail t (copyIn c area fs) + maybe_insert_CopyIns b@(Block id off t) | not $ elementOfUniqSet id callPPs = + case (off, lookupBlockEnv protos id) of + (Just _, _) -> panic "shouldn't copy arguments twice into a block" + (_, Just (Protocol c fs area)) -> Block id (Just off) $ foldr ZTail t copies + where (off, copies) = copyIn c False area fs + (_, Nothing) -> b maybe_insert_CopyIns b = b -- | Add a CopyOut node before each procpoint. --- If the predecessor is a call, then the CopyOut should already exist (in the callee). +-- If the predecessor is a call, then the copy outs should already be done by the callee. +-- Note: If we need to add copy-out instructions, they may require stack space, +-- so we accumulate a map from the successors to the necessary stack space, +-- then update the successors after we have finished inserting the copy-outs. add_CopyOuts :: BlockEnv Protocol -> ProcPointSet -> CmmGraph -> FuelMonad (BlockEnv CmmBlock) -add_CopyOuts protos procPoints g = fold_blocks maybe_insert_CopyOut (return emptyBlockEnv) g - where maybe_insert_CopyOut :: CmmBlock -> FuelMonad (BlockEnv CmmBlock) -> - FuelMonad (BlockEnv CmmBlock) - maybe_insert_CopyOut b@(Block bid _) blocks | bid == lg_entry g = skip b blocks - maybe_insert_CopyOut b blocks = +add_CopyOuts protos procPoints g = fold_blocks mb_copy_out (return emptyBlockEnv) g + where mb_copy_out :: CmmBlock -> FuelMonad (BlockEnv CmmBlock) -> + FuelMonad (BlockEnv CmmBlock) + mb_copy_out b@(Block bid _ _) z | bid == lg_entry g = skip b z + mb_copy_out b z = case last $ unzip b of - LastOther (LastCall _ _) -> skip b blocks -- copy out done by callee - _ -> maybe_insert_CopyOut' b blocks - maybe_insert_CopyOut' b blocks = fold_succs trySucc b init >>= finish - where init = blocks >>= (\bmap -> return (b, bmap)) + LastOther (LastCall _ _ _) -> skip b z -- copy out done by callee + _ -> mb_copy_out' b z + mb_copy_out' b z = fold_succs trySucc b init >>= finish + where init = z >>= (\bmap -> return (b, bmap)) trySucc succId z = if elemBlockSet succId procPoints then case lookupBlockEnv protos succId of Nothing -> z Just (Protocol c fs area) -> - insert z succId $ copyOut c area $ map fetch fs - -- CopyOut c $ map fetch fs + let (_, copies) = copyOut c Jump area $ map (CmmReg . CmmLocal) fs + in insert z succId copies else z - fetch k = k {kindlessCmm = CmmReg $ CmmLocal $ kindlessCmm k} insert z succId m = do (b, bmap) <- z (b, bs) <- insertBetween b m succId - return $ (b, foldl (flip insertBlock) bmap bs) - finish (b@(Block bid _), bmap) = return $ extendBlockEnv bmap bid b - skip b@(Block bid _) bs = bs >>= (\bmap -> return $ extendBlockEnv bmap bid b) - - - + pprTrace "insert for succ" (ppr succId <> ppr m) $ + return $ (b, foldl (flip insertBlock) bmap bs) + finish (b@(Block bid _ _), bmap) = + return $ (extendBlockEnv bmap bid b) + skip b@(Block bid _ _) bs = + bs >>= (\bmap -> return (extendBlockEnv bmap bid b)) + +-- At this point, we have found a set of procpoints, each of which should be +-- the entry point of a procedure. +-- Now, we create the procedure for each proc point, +-- which requires that we: +-- 1. build a map from proc points to the blocks reachable from the proc point +-- 2. turn each branch to a proc point into a jump +-- 3. turn calls and returns into jumps +-- 4. build info tables for the procedures -- and update the info table for +-- the SRTs in the entry procedure as well. -- Input invariant: A block should only be reachable from a single ProcPoint. --- If you want to duplicate blocks, do it before this gets called. -splitAtProcPoints :: CmmFormalsWithoutKinds -> CLabel -> ProcPointSet -> - CmmGraph -> FuelMonad [CmmGraph] -splitAtProcPoints formals entry_label procPoints g@(LGraph entry _) = - do let layout = layout_stack formals g - pprTrace "stack layout" (ppr layout) $ return () - res <- procPointAnalysis procPoints g - procMap <- liftM zdfFpFacts res - let addBlock b@(Block bid _) graphEnv = - case lookupBlockEnv procMap bid of - Just ProcPoint -> add graphEnv bid bid b - Just (ReachedBy set) -> - case uniqSetToList set of - [] -> graphEnv - [id] -> add graphEnv id bid b - _ -> panic "Each block should be reachable from only one ProcPoint" - Nothing -> panic "block not reached by a proc point?" +splitAtProcPoints :: CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status -> + BlockEnv SubAreaSet -> AreaMap -> CmmTopZ -> FuelMonad [CmmTopZ] +splitAtProcPoints entry_label callPPs procPoints procMap slotEnv areaMap + (CmmProc top_info top_l top_args g@(LGraph entry e_off blocks)) = + do -- Build a map from procpoints to the blocks they reach + let addBlock b@(Block bid _ _) graphEnv = + case lookupBlockEnv procMap bid of + Just ProcPoint -> add graphEnv bid bid b + Just (ReachedBy set) -> + case uniqSetToList set of + [] -> graphEnv + [id] -> add graphEnv id bid b + _ -> panic "Each block should be reachable from only one ProcPoint" + Nothing -> pprPanic "block not reached by a proc point?" (ppr bid) add graphEnv procId bid b = extendBlockEnv graphEnv procId graph' where graph = lookupBlockEnv graphEnv procId `orElse` emptyBlockEnv graph' = extendBlockEnv graph bid b graphEnv <- return $ fold_blocks addBlock emptyBlockEnv g -- Build a map from proc point BlockId to labels for their new procedures - let add_label map pp = clabel pp >>= (\l -> return $ (pp, l) : map) - clabel procPoint = if procPoint == entry then return entry_label - else getUniqueM >>= return . to_label - to_label u = mkEntryLabel (mkFCallName u "procpoint") - procLabels <- foldM add_label [] (uniqSetToList procPoints) + let add_label map pp = return $ addToFM map pp lbl + where lbl = if pp == entry then entry_label else blockLbl pp + procLabels <- foldM add_label emptyFM (uniqSetToList procPoints) + -- Convert call and return instructions to jumps. + let last (LastCall e _ n) = LastJump e n + last l = l + graphEnv <- return $ mapUFM (mapUFM (map_one_block id id last)) graphEnv -- In each new graph, add blocks jumping off to the new procedures, -- and replace branches to procpoints with branches to the jump-off blocks let add_jump_block (env, bs) (pp, l) = do bid <- liftM mkBlockId getUniqueM - let b = Block bid (ZLast (LastOther (LastJump $ CmmLit $ CmmLabel l))) + let b = Block bid Nothing (ZLast (LastOther jump)) + argSpace = case lookupBlockEnv blocks pp of + Just (Block _ (Just s) _) -> s + Just (Block _ Nothing _) -> panic "no args at procpoint" + _ -> panic "can't find procpoint block" + jump = LastJump (CmmLit (CmmLabel l)) argSpace return $ (extendBlockEnv env pp bid, b : bs) add_jumps newGraphEnv (guniq, blockEnv) = - do (jumpEnv, jumpBlocks) <- foldM add_jump_block (emptyBlockEnv, []) procLabels + do (jumpEnv, jumpBlocks) <- foldM add_jump_block (emptyBlockEnv, []) + $ fmToList procLabels let ppId = mkBlockId guniq - LGraph _ blockEnv' = replaceLabelsZ jumpEnv $ LGraph ppId blockEnv - blockEnv'' = foldl (flip insertBlock) blockEnv' jumpBlocks + (b_off, b) = + case lookupBlockEnv blockEnv ppId of + Just (Block id (Just b_off) t) -> (b_off, Block id Nothing t) + Just b@(Block _ Nothing _) -> (0, b) + Nothing -> panic "couldn't find entry block while splitting" + off = if ppId == entry then e_off else b_off + LGraph _ _ blockEnv' = pprTrace "jumpEnv" (ppr jumpEnv) $ + replaceLabelsZ jumpEnv $ LGraph ppId off blockEnv + blockEnv'' = foldl (flip insertBlock) (extendBlockEnv blockEnv' ppId b) + jumpBlocks return $ extendBlockEnv newGraphEnv ppId $ - runTx cmmCfgOptsZ $ LGraph ppId blockEnv'' - _ <- return $ replaceLabelsZ + runTx cmmCfgOptsZ $ LGraph ppId off blockEnv'' + upd_info_tbl srt' (CmmInfoTable p t typeinfo) = CmmInfoTable p t typeinfo' + where typeinfo' = case typeinfo of + t@(ConstrInfo _ _ _) -> t + (FunInfo c _ a d e) -> FunInfo c srt' a d e + (ThunkInfo c _) -> ThunkInfo c srt' + (ThunkSelectorInfo s _) -> ThunkSelectorInfo s srt' + (ContInfo vars _) -> ContInfo vars srt' + upd_info_tbl _ CmmNonInfoTable = CmmNonInfoTable + to_proc cafMap (ppUniq, g) | elementOfUniqSet bid callPPs = + if bid == entry then + CmmProc (CmmInfo gc upd_fr (upd_info_tbl srt' info_tbl)) top_l top_args g + else + pprTrace "adding infotable for" (ppr bid) $ + CmmProc (CmmInfo Nothing Nothing $ infoTbl) lbl [] g + where bid = mkBlockId ppUniq + lbl = expectJust "pp label" $ lookupFM procLabels bid + infoTbl = CmmInfoTable (ProfilingInfo zero zero) rET_SMALL + (ContInfo stack_vars srt') + stack_vars = pprTrace "slotEnv" (ppr slotEnv) $ + live_vars slotEnv areaMap bid + zero = CmmInt 0 wordWidth + srt' = expectJust "procpoint.infoTbl" $ lookupBlockEnv cafMap bid + CmmInfo gc upd_fr info_tbl = top_info + to_proc _ (ppUniq, g) = + pprTrace "not adding infotable for" (ppr bid) $ + CmmProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] g + where bid = mkBlockId ppUniq + lbl = expectJust "pp label" $ lookupFM procLabels bid graphEnv <- foldM add_jumps emptyBlockEnv $ ufmToList graphEnv + cafEnv <- cafAnal g + (cafTable, blockCafs) <- buildCafs cafEnv + procs <- return $ map (to_proc blockCafs) $ ufmToList graphEnv return $ pprTrace "procLabels" (ppr procLabels) $ - pprTrace "splitting graphs" (ppr graphEnv) $ [g] + pprTrace "splitting graphs" (ppr graphEnv) $ cafTable ++ procs +splitAtProcPoints _ _ _ _ _ _ t@(CmmData _ _) = return [t] ------------------------------------------------------------------------ --- Stack Layout (completely bogus for now) -- +-- Stack Layout -- ------------------------------------------------------------------------ --- At some point, we'll do stack layout properly. --- But for now, we can move forward on generating code by just producing --- a brain dead layout, giving a separate slot to every variable, --- and (incorrectly) assuming that all parameters are passed on the stack. - --- For now, variables are placed at explicit offsets from a virtual --- frame pointer. --- We may want to use abstract stack slots at some point. -data Placement = VFPMinus Int - -instance Outputable Placement where - ppr (VFPMinus k) = text "VFP - " <> int k - --- Build a map from registers to stack locations. --- Return that map along with the offset to the end of the block --- containing local registers. -layout_stack ::CmmFormalsWithoutKinds -> CmmGraph -> - (Int, FiniteMap LocalReg Placement, FiniteMap LocalReg Placement) -layout_stack formals g = (ix', incomingMap, localMap) - where (ix, incomingMap) = foldl (flip place) (1, emptyFM) formals -- IGNORES CC'S - -- 1 leaves space for the return infotable - (ix', localMap) = foldUniqSet place (ix, emptyFM) regs - place r (ix, map) = (ix', addToFM map r $ VFPMinus ix') where ix' = ix + 1 - regs = fold_blocks (fold_fwd_block (\_ y -> y) add addL) emptyRegSet g - add x y = foldRegsDefd extendRegSet y x - addL (LastOther l) z = add l z - addL LastExit z = z +-- | Before we lay out the stack, we need to know something about the +-- liveness of the stack slots. In particular, to decide whether we can +-- reuse a stack location to hold multiple stack slots, we need to know +-- when each of the stack slots is used. +-- Although tempted to use something simpler, we really need a full interference +-- graph. Consider the following case: +-- case <...> of +-- 1 -> ; // y is dead out +-- 2 -> ; // x is dead out +-- 3 -> +-- If we consider the arms in order and we use just the deadness information given by a +-- dataflow analysis, we might decide to allocate the stack slots for x and y +-- to the same stack location, which will lead to incorrect code in the third arm. +-- We won't make this mistake with an interference graph. + +-- First, the liveness analysis. +-- We represent a slot with an area, an offset into the area, and a width. +-- Tracking the live slots is a bit tricky because there may be loads and stores +-- into only a part of a stack slot (e.g. loading the low word of a 2-word long), +-- e.g. Slot A 0 8 overlaps with Slot A 4 4. +-- +-- The definition of a slot set is intended to reduce the number of overlap +-- checks we have to make. There's no reason to check for overlap between +-- slots in different areas, so we segregate the map by Area's. +-- We expect few slots in each Area, so we collect them in an unordered list. +-- To keep these lists short, any contiguous live slots are coalesced into +-- a single slot, on insertion. + +type SubAreaSet = FiniteMap Area [SubArea] +fold_subareas :: (SubArea -> z -> z) -> SubAreaSet -> z -> z +fold_subareas f m z = foldFM (\_ s z -> foldr (\a z -> f a z) z s) z m + +liveGen :: SubArea -> [SubArea] -> (Bool, [SubArea]) +liveGen s set = liveGen' s set [] + where liveGen' s [] z = (True, s : z) + liveGen' s@(a, hi, w) (s'@(a', hi', w') : rst) z = + if a /= a' || hi < lo' || lo > hi' then -- no overlap + liveGen' s rst (s' : z) + else if s' `contains` s then -- old contains new + (False, set) + else -- overlap: coalesce the slots + let new_hi = max hi hi' + new_lo = min lo lo' + in liveGen' (a, new_hi, new_hi - new_lo) rst z + where lo = hi - w -- remember: areas grow down + lo' = hi' - w' + contains (a, hi, w) (a', hi', w') = + a == a' && hi >= hi' && hi - w <= hi' - w' + +liveKill :: SubArea -> [SubArea] -> [SubArea] +liveKill (a, hi, w) set = pprTrace "killing slots in area" (ppr a) $ liveKill' set [] + where liveKill' [] z = z + liveKill' (s'@(a', hi', w') : rst) z = + if a /= a' || hi < lo' || lo > hi' then -- no overlap + liveKill' rst (s' : z) + else -- overlap: split the old slot + let z' = if hi' > hi then (a, hi', hi' - hi) : z else z + z'' = if lo > lo' then (a, lo, lo - lo') : z' else z' + in liveKill' rst z'' + where lo = hi - w -- remember: areas grow down + lo' = hi' - w' + +slotLattice :: DataflowLattice SubAreaSet +slotLattice = DataflowLattice "live slots" emptyFM add True + where add new old = case foldFM addArea (False, old) new of + (True, x) -> aTx x + (False, x) -> noTx x + addArea a newSlots z = foldr (addSlot a) z newSlots + addSlot a slot (changed, map) = + let (c, live) = liveGen slot $ lookupWithDefaultFM map [] a + in (c || changed, addToFM map a live) + +liveInSlots :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet +liveInSlots live x = foldSlotsUsed add (foldSlotsDefd remove live x) x + where add live (a, i, w) = liftToArea a (snd . liveGen (a, i, w)) live + remove live (a, i, w) = liftToArea a (liveKill (a, i, w)) live + liftToArea a f map = addToFM map a $ f (lookupWithDefaultFM map [] a) + +-- Unlike the liveness transfer functions @gen@ and @kill@, this function collects +-- _any_ slot that is named. +--addNamedSlots :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet +--addNamedSlots live x = foldSlotsUsed add (foldSlotsDefd add live x) x +-- where add live (a, i, w) = liftToArea a (snd . liveGen (a, i, w)) live +-- liftToArea a f map = addToFM map a $ f (lookupWithDefaultFM map [] a) + +-- Note: the stack slots that hold variables returned on the stack are not +-- considered live in to the block -- we treat the first node as a definition site. +-- BEWARE: I'm being a little careless here in failing to check for the +-- entry Id (which would use the CallArea Old). +liveTransfers :: BackwardTransfers Middle Last SubAreaSet +liveTransfers = BackwardTransfers first liveInSlots liveLastIn + where first live id = delFromFM live (CallArea (Young id)) + +liveLastIn :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet +liveLastIn env l = liveInSlots (liveLastOut env l) l + +-- Don't forget to keep the outgoing parameters in the CallArea live. +liveLastOut :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet +liveLastOut env l = + case l of + LastReturn n -> add_area (CallArea Old) n out + LastJump _ n -> add_area (CallArea Old) n out + LastCall _ Nothing n -> add_area (CallArea Old) n out + LastCall _ (Just k) n -> add_area (CallArea (Young k)) n out + _ -> out + where out = joinOuts slotLattice env l +add_area :: Area -> Int -> SubAreaSet -> SubAreaSet +add_area a n live = + addToFM live a $ snd $ liveGen (a, n, n) $ lookupWithDefaultFM live [] a + +type SlotFix a = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet a) +liveSlotAnal :: LGraph Middle Last -> FuelMonad (BlockEnv SubAreaSet) +liveSlotAnal g = liftM zdfFpFacts (res :: SlotFix ()) + where res = zdfSolveFromL emptyBlockEnv "live slot analysis" slotLattice + liveTransfers (fact_bot slotLattice) g + +-- The liveness analysis must be precise: otherwise, we won't know if a definition +-- should really kill a live-out stack slot. +-- But the interference graph does not have to be precise -- it might decide that +-- any live areas interfere. To maintain both a precise analysis and an imprecise +-- interference graph, we need to convert the live-out stack slots to graph nodes +-- at each and every instruction; rather than reconstruct a new list of nodes +-- every time, I provide a function to fold over the nodes, which should be a +-- reasonably efficient approach for the implementations we envision. +-- Of course, it will probably be much easier to program if we just return a list... +type Set x = FiniteMap x () +type AreaMap = FiniteMap Area Int +data IGraphBuilder n = + Builder { foldNodes :: forall z. SubArea -> (n -> z -> z) -> z -> z + , _wordsOccupied :: AreaMap -> AreaMap -> n -> [Int] + } + +areaBuilder :: IGraphBuilder Area +areaBuilder = Builder fold words + where fold (a, _, _) f z = f a z + words areaSize areaMap a = + case lookupFM areaMap a of + Just addr -> [addr .. addr + (lookupFM areaSize a `orElse` + pprPanic "wordsOccupied: unknown area" (ppr a))] + Nothing -> [] + +--slotBuilder :: IGraphBuilder (Area, Int) +--slotBuilder = undefined + +-- Now, we can build the interference graph. +-- The usual story: a definition interferes with all live outs and all other +-- definitions. +type IGraph x = FiniteMap x (Set x) +type IGPair x = (IGraph x, IGraphBuilder x) +igraph :: (Ord x) => IGraphBuilder x -> BlockEnv SubAreaSet -> LGraph Middle Last -> IGraph x +igraph builder env g = foldr interfere emptyFM (postorder_dfs g) + where foldN = foldNodes builder + interfere block igraph = + let (h, l) = goto_end (unzip block) + --heads :: ZHead Middle -> (IGraph x, SubAreaSet) -> IGraph x + heads (ZFirst _ _) (igraph, _) = igraph + heads (ZHead h m) (igraph, liveOut) = + heads h (addEdges igraph m liveOut, liveInSlots liveOut m) + -- add edges between a def and the other defs and liveouts + addEdges igraph i out = fst $ foldSlotsDefd addDef (igraph, out) i + addDef (igraph, out) def@(a, _, _) = + (foldN def (addDefN out) igraph, + addToFM out a (snd $ liveGen def (lookupWithDefaultFM out [] a))) + addDefN out n igraph = + let addEdgeNO o igraph = foldN o addEdgeNN igraph + addEdgeNN n' igraph = addEdgeNN' n n' $ addEdgeNN' n' n igraph + addEdgeNN' n n' igraph = addToFM igraph n (addToFM set n' ()) + where set = lookupWithDefaultFM igraph emptyFM n + in foldFM (\ _ os igraph -> foldr addEdgeNO igraph os) igraph out + env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph" + in heads h $ case l of LastExit -> (igraph, emptyFM) + LastOther l -> (addEdges igraph l $ liveLastOut env' l, + liveLastIn env' l) + +-- Before allocating stack slots, we need to collect one more piece of information: +-- what's the highest offset (in bytes) used in each Area? +-- We'll need to allocate that much space for each Area. +getAreaSize :: LGraph Middle Last -> AreaMap +getAreaSize g@(LGraph _ off _) = + fold_blocks (fold_fwd_block first add add) (unitFM (CallArea Old) off) g + where first _ z = z + add x z = foldSlotsUsed addSlot (foldSlotsDefd addSlot z x) x + addSlot z (a, off, _) = addToFM z a $ max off $ lookupWithDefaultFM z 0 a + + +-- Find the Stack slots occupied by the subarea's conflicts +conflictSlots :: Ord x => IGPair x -> AreaMap -> AreaMap -> SubArea -> Set Int +conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea = + foldNodes subarea foldNode emptyFM + where foldNode n set = foldFM conflict set $ lookupWithDefaultFM ig emptyFM n + conflict n' () set = liveInSlots areaMap n' set + -- Add stack slots occupied by igraph node n + liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n) + setAdd w s = addToFM s w () + +-- Find any open space on the stack, starting from the offset. +freeSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> Int +freeSlotFrom ig areaSize offset areaMap area = + let size = lookupFM areaSize area `orElse` 0 + conflicts = conflictSlots ig areaSize areaMap (area, size, size) + -- Find a space big enough to hold the area + findSpace curr 0 = curr + findSpace curr cnt = -- target slot, considerand, # left to check + if elemFM curr conflicts then + findSpace (curr + size) size + else findSpace (curr - 1) (cnt - 1) + in findSpace (offset + size) size + +-- Find an open space on the stack, and assign it to the area. +allocSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> AreaMap +allocSlotFrom ig areaSize from areaMap area = + if elemFM area areaMap then areaMap + else addToFM areaMap area $ freeSlotFrom ig areaSize from areaMap area + +-- | Greedy stack layout. +-- Compute liveness, build the interference graph, and allocate slots for the areas. +-- We visit each basic block in a (generally) forward order. +-- At each instruction that names a register subarea r, we immediately allocate +-- any available slot on the stack by the following procedure: +-- 1. Find the nodes N' that conflict with r +-- 2. Find the stack slots used for N' +-- 3. Choose a contiguous stack space s not in N' (s must be large enough to hold r) +-- For a CallArea, we allocate the stack space only when we reach a function +-- call that returns to the CallArea's blockId. +-- We use a similar procedure, with one exception: the stack space +-- must be allocated below the youngest stack slot that is live out. + +-- Note: The stack pointer only has to be younger than the youngest live stack slot +-- at proc points. Otherwise, the stack pointer can point anywhere. +layout :: ProcPointSet -> BlockEnv SubAreaSet -> LGraph Middle Last -> AreaMap +layout procPoints env g@(LGraph _ entrySp _) = + let builder = areaBuilder + ig = (igraph builder env g, builder) + env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph" + areaSize = getAreaSize g + -- Find the slots that are live-in to the block + live_in (ZTail m l) = liveInSlots (live_in l) m + live_in (ZLast (LastOther l)) = liveLastIn env' l + live_in (ZLast LastExit) = emptyFM + -- Find the youngest live stack slot + youngest_live areaMap live = fold_subareas young_slot live 0 + where young_slot (a, o, _) z = case lookupFM areaMap a of + Just top -> max z $ top + o + Nothing -> z + -- Allocate space for spill slots and call areas + allocVarSlot = allocSlotFrom ig areaSize 0 + allocCallSlot areaMap (Block id _ t) | elemBlockSet id procPoints = + allocSlotFrom ig areaSize (youngest_live areaMap $ live_in t) + areaMap (CallArea (Young id)) + allocCallSlot areaMap _ = areaMap + alloc i areaMap = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap i) i + where alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a + alloc' areaMap _ = areaMap + layoutAreas areaMap b@(Block _ _ t) = layout areaMap t + where layout areaMap (ZTail m t) = layout (alloc m areaMap) t + layout areaMap (ZLast _) = allocCallSlot areaMap b + areaMap = foldl layoutAreas (addToFM emptyFM (CallArea Old) 0) $ postorder_dfs g + in pprTrace "ProcPoints" (ppr procPoints) $ + pprTrace "Area SizeMap" (ppr areaSize) $ + pprTrace "Entry SP" (ppr entrySp) $ + pprTrace "Area Map" (ppr areaMap) $ areaMap + +-- After determining the stack layout, we can: +-- 1. Replace references to stack Areas with addresses relative to the stack +-- pointer. +-- 2. Insert adjustments to the stack pointer to ensure that it is at a +-- conventional location at each proc point. +-- Because we don't take interrupts on the execution stack, we only need the +-- stack pointer to be younger than the live values on the stack at proc points. +-- 3. At some point, we should check for stack overflow, but not just yet. +manifestSP :: ProcPointSet -> BlockEnv Status -> AreaMap -> + LGraph Middle Last -> FuelMonad (LGraph Middle Last) +manifestSP procPoints procMap areaMap g@(LGraph entry args blocks) = + liftM (LGraph entry args) blocks' + where blocks' = foldl replB (return emptyBlockEnv) (postorder_dfs g) + slot a = pprTrace "slot" (ppr a) $ lookupFM areaMap a `orElse` panic "unallocated Area" + slot' id = pprTrace "slot'" (ppr id)$ slot $ CallArea (Young id) + sp_on_entry id | id == entry = slot (CallArea Old) + args + sp_on_entry id | elemBlockSet id procPoints = + case lookupBlockEnv blocks id of + Just (Block _ (Just o) _) -> slot' id + o + Just (Block _ Nothing _) -> slot' id + Nothing -> panic "procpoint dropped from block env" + sp_on_entry id = + case lookupBlockEnv procMap id of + Just (ReachedBy pp) -> case uniqSetToList pp of + [id] -> sp_on_entry id + _ -> panic "block not reached by single proc point" + Just ProcPoint -> panic "procpoint not in procpoint set" + Nothing -> panic "block not found in procmap" + -- On entry to procpoints, the stack pointer is conventional; + -- otherwise, we check the SP set by predecessors. + replB :: FuelMonad (BlockEnv CmmBlock) -> CmmBlock -> FuelMonad (BlockEnv CmmBlock) + replB blocks (Block id o t) = + do bs <- replTail (Block id o) spIn t + pprTrace "spIn" (ppr id <+> ppr spIn)$ liftM (flip (foldr insertBlock) bs) blocks + where spIn = sp_on_entry id + replTail :: (ZTail Middle Last -> CmmBlock) -> Int -> (ZTail Middle Last) -> + FuelMonad ([CmmBlock]) + replTail h spOff (ZTail m t) = replTail (h . ZTail (middle spOff m)) spOff t + replTail h spOff (ZLast (LastOther l)) = fixSp h spOff l + replTail h _ l@(ZLast LastExit) = return [h l] + middle spOff m = mapExpDeepMiddle (replSlot spOff) m + last spOff l = mapExpDeepLast (replSlot spOff) l + replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i)) + replSlot _ e = e + -- The block must establish the SP expected at each successsor. + fixSp :: (ZTail Middle Last -> CmmBlock) -> Int -> Last -> FuelMonad ([CmmBlock]) + fixSp h spOff l@(LastReturn n) = updSp h spOff (slot (CallArea Old) + n) l + fixSp h spOff l@(LastJump _ n) = updSp h spOff (slot (CallArea Old) + n) l + fixSp h spOff l@(LastCall _ (Just k) n) = updSp h spOff (slot' k + n) l + fixSp h spOff l@(LastCall _ Nothing n) = updSp h spOff (slot (CallArea Old) + n) l + fixSp h spOff l@(LastBranch k) | elemBlockSet k procPoints = + pprTrace "updSp" (ppr k <> ppr spOff <> ppr (sp_on_entry k)) $ updSp h spOff (sp_on_entry k) l + fixSp h spOff l = liftM (uncurry (:)) $ fold_succs succ l $ return (b, []) + where b = h (ZLast (LastOther (last spOff l))) + succ succId z = + let succSp = sp_on_entry succId in + if elemBlockSet succId procPoints && succSp /= spOff then + do (b, bs) <- z + (b', bs') <- insertBetween b [setSpMid spOff succSp] succId + return (b', bs ++ bs') + else z + updSp h old new l = return [h $ setSp old new $ ZLast $ LastOther (last new l)] + setSpMid sp sp' = MidAssign (CmmGlobal Sp) e + where e = CmmMachOp (MO_Add wordWidth) [CmmReg (CmmGlobal Sp), off] + off = CmmLit $ CmmInt (toInteger $ sp - sp') wordWidth + setSp sp sp' t = if sp == sp' then t else ZTail (setSpMid sp sp') t +---------------------------------------------------------------- +-- Building InfoTables + +type CAFSet = FiniteMap CLabel () + +-- First, an analysis to find live CAFs. +cafLattice :: DataflowLattice CAFSet +cafLattice = DataflowLattice "live cafs" emptyFM add True + where add new old = if sizeFM new' > sizeFM old then aTx new' else noTx new' + where new' = new `plusFM` old + +cafTransfers :: BackwardTransfers Middle Last CAFSet +cafTransfers = BackwardTransfers first middle last + where first live _ = live + middle live m = pprTrace "cafmiddle" (ppr m) $ foldExpDeepMiddle addCaf m live + last env l = foldExpDeepLast addCaf l (joinOuts cafLattice env l) + addCaf e set = case e of + CmmLit (CmmLabel c) -> add c set + CmmLit (CmmLabelOff c _) -> add c set + CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set + _ -> set + add c s = pprTrace "CAF analysis saw label" (ppr c) $ + if hasCAF c then (pprTrace "has caf" (ppr c) $ addToFM s c ()) else (pprTrace "no cafs" (ppr c) $ s) + +type CafFix a = FuelMonad (BackwardFixedPoint Middle Last CAFSet a) +cafAnal :: LGraph Middle Last -> FuelMonad (BlockEnv CAFSet) +cafAnal g = liftM zdfFpFacts (res :: CafFix ()) + where res = zdfSolveFromL emptyBlockEnv "live CAF analysis" cafLattice + cafTransfers (fact_bot cafLattice) g + +-- Once we have found the CAFs, we need to do two things: +-- 1. Build a table of all the CAFs used in the procedure. +-- 2. Compute the C_SRT describing the subset of CAFs live at each procpoint. +buildCafs :: (BlockEnv CAFSet) -> FuelMonad ([CmmTopZ], BlockEnv C_SRT) +buildCafs blockCafs = + -- This is surely the wrong way to get names, as in BlockId + do top_lbl <- getUniqueM >>= \ u -> return $ mkSRTLabel (mkFCallName u "srt") MayHaveCafRefs + let allCafs = foldBlockEnv (\_ x y -> plusFM x y) emptyFM blockCafs + caf_entry (ix, map, tbl') caf = (ix + 1, addToFM map caf ix, entry : tbl') + where entry = CmmStaticLit $ CmmLabel caf + (_::Int, cafMap, tbl') = foldl caf_entry (0, emptyFM, []) $ keysFM allCafs + top_tbl = CmmData RelocatableReadOnlyData $ CmmDataLabel top_lbl : reverse tbl' + sub_srt id cafs z = + do (tbls, blocks) <- z + (top, srt) <- procpointSRT top_lbl cafMap cafs + let blocks' = extendBlockEnv blocks id srt + case top of Just t -> return (t:tbls, blocks') + Nothing -> return (tbls, blocks') + (sub_tbls, blockSRTs) <- foldBlockEnv sub_srt (return ([], emptyBlockEnv)) blockCafs + return (top_tbl : sub_tbls, blockSRTs) + +-- Construct an SRT bitmap. +-- Adapted from simpleStg/SRT.lhs, which expects Id's. +procpointSRT :: CLabel -> FiniteMap CLabel Int -> FiniteMap CLabel () -> + FuelMonad (Maybe CmmTopZ, C_SRT) +procpointSRT top_srt top_table entries + | isEmptyFM entries = pprTrace "nil SRT" (ppr top_srt) $ return (Nothing, NoC_SRT) + | otherwise = pprTrace "non-nil SRT" (ppr top_srt) $ bitmap `seq` to_SRT top_srt offset len bitmap + where + ints = map (expectJust "constructSRT" . lookupFM top_table) (keysFM entries) + sorted_ints = sortLe (<=) ints + offset = head sorted_ints + bitmap_entries = map (subtract offset) sorted_ints + len = P.last bitmap_entries + 1 + bitmap = intsToBitmap len bitmap_entries + +-- Adapted from codeGen/StgCmmUtils, which converts from SRT to C_SRT. +to_SRT :: CLabel -> Int -> Int -> Bitmap -> FuelMonad (Maybe CmmTopZ, C_SRT) +to_SRT top_srt off len bmp + | len > widthInBits wordWidth `div` 2 || bmp == [fromIntegral srt_escape] + = do id <- getUniqueM + let srt_desc_lbl = mkLargeSRTLabel id + tbl = CmmData RelocatableReadOnlyData $ + CmmDataLabel srt_desc_lbl : map CmmStaticLit + ( cmmLabelOffW top_srt off + : mkWordCLit (fromIntegral len) + : map mkWordCLit bmp) + return (Just tbl, C_SRT srt_desc_lbl 0 srt_escape) + | otherwise + = return (Nothing, C_SRT top_srt off (fromIntegral (head bmp))) + -- The fromIntegral converts to StgHalfWord + +-- Given a block ID, we return a representation of the layout of the stack. +-- If the element is `Nothing`, then it represents an empty or dead +-- word on the stack. +-- If the element is `Just` a register, then it represents a live spill slot +-- for the register; note that a register may occupy multiple words. +-- The head of the list represents the young end of the stack where the infotable +-- pointer for the block `Bid` is stored. +-- The infotable pointer itself is not included in the list. +live_vars :: BlockEnv SubAreaSet -> AreaMap -> BlockId -> [Maybe LocalReg] +live_vars slotEnv areaMap bid = slotsToList youngByte liveSlots + where slotsToList 0 [] = [] + slotsToList 0 ((_, r, _) : _) = pprPanic "slot left off live_vars" (ppr r) + slotsToList n _ | n < 0 = panic "stack slots not allocated on word boundaries?" + slotsToList n ((n', r, w) : rst) = + if n == n' then Just r : slotsToList (n - w) rst + else Nothing : slotsToList (n - wORD_SIZE) rst + slotsToList n [] = Nothing : slotsToList (n - wORD_SIZE) [] + liveSlots = sortBy (\ (_,off,_) (_,off',_) -> compare off' off) + (foldFM (\_ -> flip $ foldr add_slot) [] slots) + add_slot (a@(RegSlot r@(LocalReg _ ty)), off, w) rst = + if off == w && widthInBytes (typeWidth ty) == w then + (expectJust "add_slot" (lookupFM areaMap a), r, w) : rst + else panic "live_vars: only part of a variable live at a proc point" + add_slot (CallArea Old, off, w) rst = + if off == wORD_SIZE && w == wORD_SIZE then + rst -- the return infotable should be live + else pprPanic "CallAreas must not be live across function calls" (ppr bid) + add_slot (CallArea (Young _), _, _) _ = + pprPanic "CallAreas must not be live across function calls" (ppr bid) + slots = expectJust "live_vars slots" $ lookupBlockEnv slotEnv bid + youngByte = expectJust "live_vars bid_pos" $ lookupFM areaMap (CallArea (Young bid)) ---------------------------------------------------------------- diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs index 3cc102f..67cf8d3 100644 --- a/compiler/cmm/CmmSpillReload.hs +++ b/compiler/cmm/CmmSpillReload.hs @@ -1,11 +1,9 @@ module CmmSpillReload - ( ExtendWithSpills(..) - , DualLive(..) + ( DualLive(..) , dualLiveLattice, dualLiveTransfers, dualLiveness --, insertSpillsAndReloads --- XXX todo check live-in at entry against formals , dualLivenessWithInsertion - , elimSpillAndReload , availRegsLattice , cmmAvailableReloads @@ -41,17 +39,10 @@ import Prelude hiding (zip) -- establish the invariant that at a call (or at any proc point with -- an established protocol) all live variables not expected in -- registers are sitting on the stack. We use a backward analysis to --- insert spills and reloads. It should some day be followed by a +-- insert spills and reloads. It should be followed by a -- forward transformation to sink reloads as deeply as possible, so as -- to reduce register pressure. -data ExtendWithSpills m - = NotSpillOrReload m - | Spill RegSet - | Reload RegSet - -type M = ExtendWithSpills Middle - -- A variable can be expected to be live in a register, live on the -- stack, or both. This analysis ensures that spills and reloads are -- inserted as needed to make sure that every live variable needed @@ -70,8 +61,8 @@ dualUnionList ls = DualLive ss rs where ss = unionManyUniqSets $ map on_stack ls rs = unionManyUniqSets $ map in_regs ls -_changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive -_changeStack f live = live { on_stack = f (on_stack live) } +changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive +changeStack f live = live { on_stack = f (on_stack live) } changeRegs f live = live { in_regs = f (in_regs live) } @@ -85,24 +76,23 @@ dualLiveLattice = return $ DualLive stack regs add1 = fact_add_to liveLattice -type LiveReloadFix a = FuelMonad (BackwardFixedPoint M Last DualLive a) +type LiveReloadFix a = FuelMonad (BackwardFixedPoint Middle Last DualLive a) -dualLivenessWithInsertion :: BlockSet -> (Graph M Last) -> FuelMonad (Graph M Last) +dualLivenessWithInsertion :: BlockSet -> (LGraph Middle Last) -> FuelMonad (LGraph Middle Last) dualLivenessWithInsertion procPoints g = - liftM zdfFpContents $ (res :: LiveReloadFix (Graph M Last)) - where res = zdfRewriteFrom RewriteDeep emptyBlockEnv "dual liveness with insertion" - dualLiveLattice (dualLiveTransfers procPoints) - (insertSpillAndReloadRewrites procPoints) empty g + liftM zdfFpContents $ (res :: LiveReloadFix (LGraph Middle Last)) + where res = zdfBRewriteFromL RewriteDeep emptyBlockEnv "dual liveness with insertion" + dualLiveLattice (dualLiveTransfers procPoints) + (insertSpillAndReloadRewrites procPoints) empty g empty = fact_bot dualLiveLattice --- = a_ft_b_unlimited dualLiveness insertSpillsAndReloads -dualLiveness :: BlockSet -> Graph M Last -> FuelMonad (BlockEnv DualLive) +dualLiveness :: BlockSet -> LGraph Middle Last -> FuelMonad (BlockEnv DualLive) dualLiveness procPoints g = liftM zdfFpFacts $ (res :: LiveReloadFix ()) - where res = zdfSolveFrom emptyBlockEnv "dual liveness" dualLiveLattice - (dualLiveTransfers procPoints) empty g + where res = zdfSolveFromL emptyBlockEnv "dual liveness" dualLiveLattice + (dualLiveTransfers procPoints) empty g empty = fact_bot dualLiveLattice -dualLiveTransfers :: BlockSet -> BackwardTransfers M Last DualLive +dualLiveTransfers :: BlockSet -> BackwardTransfers Middle Last DualLive dualLiveTransfers procPoints = BackwardTransfers first middle last where last = lastDualLiveness middle = middleDualLiveness @@ -112,29 +102,25 @@ dualLiveTransfers procPoints = BackwardTransfers first middle last , in_regs = emptyRegSet } else live - -middleDualLiveness :: DualLive -> M -> DualLive -middleDualLiveness live (Spill regs) = live' - -- live-in on-stack requirements are satisfied; - -- live-out in-regs obligations are created - where live' = DualLive { on_stack = on_stack live `minusRegSet` regs - , in_regs = in_regs live `plusRegSet` regs } - -middleDualLiveness live (Reload regs) = live' - -- live-in in-regs requirements are satisfied; - -- live-out on-stack obligations are created - where live' = DualLive { on_stack = on_stack live `plusRegSet` regs - , in_regs = in_regs live `minusRegSet` regs } - -middleDualLiveness live (NotSpillOrReload m) = changeRegs (middleLiveness m) live +middleDualLiveness :: DualLive -> Middle -> DualLive +middleDualLiveness live m = + changeStack updSlots $ changeRegs (middleLiveness m) live + where updSlots live = foldSlotsUsed reload (foldSlotsDefd spill live m) m + spill live s@(RegSlot r, _, _) = check s $ deleteFromRegSet live r + spill live _ = live + reload live s@(RegSlot r, _, _) = check s $ extendRegSet live r + reload live _ = live + check (RegSlot (LocalReg _ ty), o, w) x + | o == w && w == widthInBytes (typeWidth ty) = x + check _ _ = panic "middleDualLiveness unsupported: slices" lastDualLiveness :: (BlockId -> DualLive) -> Last -> DualLive lastDualLiveness env l = last l - where last (LastReturn) = empty - last (LastJump e) = changeRegs (gen e) empty - last (LastBranch id) = env id - last (LastCall tgt Nothing) = changeRegs (gen tgt) empty - last (LastCall tgt (Just k)) = + where last (LastReturn _) = empty + last (LastJump e _) = changeRegs (gen e) empty + last (LastBranch id) = env id + last (LastCall tgt Nothing _) = changeRegs (gen tgt) empty + last (LastCall tgt (Just k) _) = -- nothing can be live in registers at this point let live = env k in if isEmptyUniqSet (in_regs live) then @@ -142,77 +128,52 @@ lastDualLiveness env l = last l else pprTrace "Offending party:" (ppr k <+> ppr live) $ panic "live values in registers at call continuation" - last (LastCondBranch e t f) = changeRegs (gen e) $ dualUnion (env t) (env f) - last (LastSwitch e tbl) = changeRegs (gen e) $ dualUnionList $ + last (LastCondBranch e t f) = changeRegs (gen e) $ dualUnion (env t) (env f) + last (LastSwitch e tbl) = changeRegs (gen e) $ dualUnionList $ map env (catMaybes tbl) empty = fact_bot dualLiveLattice -gen, kill :: UserOfLocalRegs a => a -> RegSet -> RegSet -gen a live = foldRegsUsed extendRegSet live a -kill a live = foldRegsUsed delOneFromUniqSet live a +gen :: UserOfLocalRegs a => a -> RegSet -> RegSet +gen a live = foldRegsUsed extendRegSet live a -insertSpillAndReloadRewrites :: BlockSet -> BackwardRewrites M Last DualLive +insertSpillAndReloadRewrites :: BlockSet -> BackwardRewrites Middle Last DualLive insertSpillAndReloadRewrites procPoints = BackwardRewrites first middle last exit where middle = middleInsertSpillsAndReloads last = \_ _ -> Nothing exit = Nothing first live id = if elemBlockSet id procPoints && not (isEmptyUniqSet reloads) then - Just $ mkMiddles $ [Reload reloads] + Just $ mkMiddles $ map reload $ uniqSetToList reloads else Nothing - where reloads = in_regs live + where reloads = in_regs live -middleInsertSpillsAndReloads :: DualLive -> M -> Maybe (AGraph M Last) -middleInsertSpillsAndReloads _ (Spill _) = Nothing -middleInsertSpillsAndReloads _ (Reload _) = Nothing -middleInsertSpillsAndReloads live m@(NotSpillOrReload nsr) = middle nsr - where middle (MidAssign (CmmLocal reg) _) = +middleInsertSpillsAndReloads :: DualLive -> Middle -> Maybe (AGraph Middle Last) +middleInsertSpillsAndReloads live m = middle m + where middle (MidAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg') _) _)) + | reg == reg' = Nothing + middle (MidAssign (CmmLocal reg) _) = if reg `elemRegSet` on_stack live then -- must spill - my_trace "Spilling" (f4sep [text "spill" <+> ppr reg, - text "after", ppr m]) $ - Just $ mkMiddles [m, Spill $ mkRegSet [reg]] - else - Nothing - middle (CopyIn _ formals _) = - -- only 'formals' can be in regs at this point - let regs' = kill formals (in_regs live) -- live in regs; must reload - is_stack_var r = elemRegSet r (on_stack live) - needs_spilling = filterRegsUsed is_stack_var formals - -- a formal that is expected on the stack; must spill - in if isEmptyUniqSet regs' && isEmptyUniqSet needs_spilling then - Nothing - else - let code = if isEmptyUniqSet regs' then [] - else Reload regs' : [] - code' = if isEmptyUniqSet needs_spilling then code - else Spill needs_spilling : code - in - my_trace "At CopyIn" (f4sep [text "Triggered by ", ppr live, - ppr (Reload regs' :: M), - ppr (Spill needs_spilling :: M), - text "after", ppr m]) $ - Just $ mkMiddles (m : code') + my_trace "Spilling" (f4sep [text "spill" <+> ppr reg, + text "after", ppr m]) $ + Just $ mkMiddles $ [m, spill reg] + else Nothing middle _ = Nothing --- | For conversion back to vanilla C-- - -elimSpillAndReload :: StackSlotMap -> LGraph M l -> (StackSlotMap, LGraph Middle l) -elimSpillAndReload slots g = toGraph $ fold_blocks block ((slots, [])) g - where toGraph (slots, l) = (slots, of_block_list (lg_entry g) l) - block (Block id t) (slots, blocks) = - lift (\ t' -> Block id t' : blocks) $ tail t slots - tail (ZLast l) slots = (slots, ZLast l) - tail (ZTail m t) slots = middle m $ tail t slots - middle (NotSpillOrReload m) (slots, t) = (slots, ZTail m t) - middle (Spill regs) z = foldUniqSet spill z regs - middle (Reload regs) z = foldUniqSet reload z regs - move f r (slots, t) = - lift (\ slot -> ZTail (f slot (CmmLocal r)) t) $ getSlot slots r - spill = move (\ slot reg -> MidStore slot (CmmReg reg)) - reload = move (\ slot reg -> MidAssign reg slot) - lift f (slots, x) = (slots, f x) +-- Generating spill and reload code +regSlot :: LocalReg -> CmmExpr +regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r) + +spill, reload :: LocalReg -> Middle +spill r = MidStore (regSlot r) (CmmReg $ CmmLocal r) +reload r = MidAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r) +spillHead :: ZHead Middle -> RegSet -> ZHead Middle +reloadTail :: RegSet -> ZTail Middle Last -> ZTail Middle Last +spillHead h regset = foldl spl h $ uniqSetToList regset + where spl h r = ZHead h $ spill r +reloadTail regset t = foldl rel t $ uniqSetToList regset + where rel t r = ZTail (reload r) t ---------------------------------------------------------------- --- sinking reloads @@ -249,9 +210,9 @@ smallerAvail (UniverseMinus _) (AvailRegs _) = False smallerAvail (AvailRegs s) (AvailRegs s') = sizeUniqSet s < sizeUniqSet s' smallerAvail (UniverseMinus s) (UniverseMinus s') = sizeUniqSet s > sizeUniqSet s' -extendAvail :: AvailRegs -> LocalReg -> AvailRegs -extendAvail (UniverseMinus s) r = UniverseMinus (deleteFromRegSet s r) -extendAvail (AvailRegs s) r = AvailRegs (extendRegSet s r) +--extendAvail :: AvailRegs -> LocalReg -> AvailRegs +--extendAvail (UniverseMinus s) r = UniverseMinus (deleteFromRegSet s r) +--extendAvail (AvailRegs s) r = AvailRegs (extendRegSet s r) deleteFromAvail :: AvailRegs -> LocalReg -> AvailRegs deleteFromAvail (UniverseMinus s) r = UniverseMinus (extendRegSet s r) @@ -262,15 +223,15 @@ elemAvail (UniverseMinus s) r = not $ elemRegSet r s elemAvail (AvailRegs s) r = elemRegSet r s type CmmAvail = BlockEnv AvailRegs -type AvailFix = FuelMonad (ForwardFixedPoint M Last AvailRegs ()) +type AvailFix = FuelMonad (ForwardFixedPoint Middle Last AvailRegs ()) -cmmAvailableReloads :: Graph M Last -> FuelMonad CmmAvail +cmmAvailableReloads :: LGraph Middle Last -> FuelMonad CmmAvail cmmAvailableReloads g = liftM zdfFpFacts $ (res :: AvailFix) - where res = zdfSolveFrom emptyBlockEnv "available reloads" availRegsLattice - avail_reloads_transfer empty g + where res = zdfSolveFromL emptyBlockEnv "available reloads" availRegsLattice + avail_reloads_transfer empty g empty = (fact_bot availRegsLattice) -avail_reloads_transfer :: ForwardTransfers M Last AvailRegs +avail_reloads_transfer :: ForwardTransfers Middle Last AvailRegs avail_reloads_transfer = ForwardTransfers first middle last id where first avail _ = avail middle = flip middleAvail @@ -278,36 +239,33 @@ avail_reloads_transfer = ForwardTransfers first middle last id -- | The transfer equations use the traditional 'gen' and 'kill' -- notations, which should be familiar from the dragon book. -agen, akill :: UserOfLocalRegs a => a -> AvailRegs -> AvailRegs -agen a live = foldRegsUsed extendAvail live a +--agen, +akill :: UserOfLocalRegs a => a -> AvailRegs -> AvailRegs +--agen a live = foldRegsUsed extendAvail live a akill a live = foldRegsUsed deleteFromAvail live a -- Note: you can't sink the reload past a use. -middleAvail :: M -> AvailRegs -> AvailRegs -middleAvail (Spill _) = id -middleAvail (Reload regs) = agen regs -middleAvail (NotSpillOrReload m) = middle m +middleAvail :: Middle -> AvailRegs -> AvailRegs +middleAvail m = middle m where middle m live = middle' m $ foldRegsUsed deleteFromAvail live m middle' (MidComment {}) = id middle' (MidAssign lhs _expr) = akill lhs middle' (MidStore {}) = id middle' (MidUnsafeCall _tgt ress _args) = akill ress middle' (MidAddToContext {}) = id - middle' (CopyIn _ formals _) = akill formals - middle' (CopyOut {}) = id lastAvail :: AvailRegs -> Last -> LastOutFacts AvailRegs -lastAvail _ (LastCall _ (Just k)) = LastOutFacts [(k, AvailRegs emptyRegSet)] +lastAvail _ (LastCall _ (Just k) _) = LastOutFacts [(k, AvailRegs emptyRegSet)] lastAvail avail l = LastOutFacts $ map (\id -> (id, avail)) $ succs l -insertLateReloads :: Graph M Last -> FuelMonad (Graph M Last) +insertLateReloads :: LGraph Middle Last -> FuelMonad (LGraph Middle Last) insertLateReloads g = do env <- cmmAvailableReloads g - g <- lGraphOfGraph g - liftM graphOfLGraph $ mapM_blocks (insertM env) g + mapM_blocks (insertM env) g where insertM env b = fuelConsumingPass "late reloads" (insert b) where avail id = lookupBlockEnv env id `orElse` AvailRegs emptyRegSet - insert (Block id tail) fuel = propagate (ZFirst id) (avail id) tail fuel + insert (Block id off tail) fuel = + propagate (ZFirst id off) (avail id) tail fuel propagate h avail (ZTail m t) fuel = let (h', fuel') = maybe_add_reload h avail m fuel in propagate (ZHead h' m) (middleAvail m avail) t fuel' @@ -318,31 +276,31 @@ insertLateReloads g = let used = filterRegsUsed (elemAvail avail) node in if not (canRewriteWithFuel fuel) || isEmptyUniqSet used then (h,fuel) - else (ZHead h (Reload used), oneLessFuel fuel) + else (spillHead h used, oneLessFuel fuel) -type LateReloadFix = FuelMonad (ForwardFixedPoint M Last AvailRegs (Graph M Last)) +type LateReloadFix = FuelMonad (ForwardFixedPoint Middle Last AvailRegs (LGraph Middle Last)) -insertLateReloads' :: (Graph M Last) -> FuelMonad (Graph M Last) +insertLateReloads' :: (LGraph Middle Last) -> FuelMonad (LGraph Middle Last) insertLateReloads' g = liftM zdfFpContents $ (res :: LateReloadFix) - where res = zdfRewriteFrom RewriteShallow emptyBlockEnv "insert late reloads" - availRegsLattice avail_reloads_transfer rewrites bot g + where res = zdfFRewriteFromL RewriteShallow emptyBlockEnv "insert late reloads" + availRegsLattice avail_reloads_transfer rewrites bot g bot = fact_bot availRegsLattice rewrites = ForwardRewrites first middle last exit first _ _ = Nothing - middle :: AvailRegs -> M -> Maybe (AGraph M Last) - last :: AvailRegs -> Last -> Maybe (AGraph M Last) + middle :: AvailRegs -> Middle -> Maybe (AGraph Middle Last) + last :: AvailRegs -> Last -> Maybe (AGraph Middle Last) middle avail m = maybe_reload_before avail m (ZTail m (ZLast LastExit)) last avail l = maybe_reload_before avail l (ZLast (LastOther l)) exit _ = Nothing maybe_reload_before avail node tail = let used = filterRegsUsed (elemAvail avail) node in if isEmptyUniqSet used then Nothing - else Just $ mkZTail $ ZTail (Reload used) tail + else Just $ mkZTail $ reloadTail used tail -removeDeadAssignmentsAndReloads :: BlockSet -> (Graph M Last) -> FuelMonad (Graph M Last) +removeDeadAssignmentsAndReloads :: BlockSet -> (LGraph Middle Last) -> FuelMonad (LGraph Middle Last) removeDeadAssignmentsAndReloads procPoints g = - liftM zdfFpContents $ (res :: LiveReloadFix (Graph M Last)) - where res = zdfRewriteFrom RewriteDeep emptyBlockEnv "dead-assignment & -reload elim" + liftM zdfFpContents $ (res :: LiveReloadFix (LGraph Middle Last)) + where res = zdfBRewriteFromL RewriteDeep emptyBlockEnv "dead-assignment & -reload elim" dualLiveLattice (dualLiveTransfers procPoints) rewrites (fact_bot dualLiveLattice) g rewrites = BackwardRewrites first middle last exit @@ -351,16 +309,8 @@ removeDeadAssignmentsAndReloads procPoints g = middle = middleRemoveDeads first _ _ = Nothing -middleRemoveDeads :: DualLive -> M -> Maybe (AGraph M Last) -middleRemoveDeads _ (Spill _) = Nothing -middleRemoveDeads live (Reload s) = - if sizeUniqSet worth_reloading < sizeUniqSet s then - Just $ if isEmptyUniqSet worth_reloading then emptyAGraph - else mkMiddles [Reload worth_reloading] - else - Nothing - where worth_reloading = intersectUniqSets s (in_regs live) -middleRemoveDeads live (NotSpillOrReload m) = middle m +middleRemoveDeads :: DualLive -> Middle -> Maybe (AGraph Middle Last) +middleRemoveDeads live m = middle m where middle (MidAssign (CmmLocal reg') _) | not (reg' `elemRegSet` in_regs live) = Just emptyAGraph middle _ = Nothing @@ -368,23 +318,8 @@ middleRemoveDeads live (NotSpillOrReload m) = middle m --------------------- --- register usage - -instance UserOfLocalRegs m => UserOfLocalRegs (ExtendWithSpills m) where - foldRegsUsed f z (Spill regs) = foldRegsUsed f z regs - foldRegsUsed _f z (Reload _) = z - foldRegsUsed f z (NotSpillOrReload m) = foldRegsUsed f z m - ---------------------- -- prettyprinting -instance Outputable m => Outputable (ExtendWithSpills m) where - ppr (Spill regs) = ppr_regs "Spill" regs - ppr (Reload regs) = ppr_regs "Reload" regs - ppr (NotSpillOrReload m) = ppr m - -instance Outputable m => DebugNodes (ExtendWithSpills m) Last - ppr_regs :: String -> RegSet -> SDoc ppr_regs s regs = text s <+> commafy (map ppr $ uniqSetToList regs) where commafy xs = hsep $ punctuate comma xs diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 1922ee0..841f65b 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -17,6 +17,9 @@ module CmmUtils( CmmStmts, noStmts, oneStmt, mkStmts, plusStmts, stmtList, isNopStmt, + primRepCmmType, primRepForeignHint, + typeCmmType, typeForeignHint, + isTrivialCmmExpr, hasNoGlobalRegs, cmmRegOff, cmmLabelOff, cmmOffset, cmmOffsetLit, cmmIndex, @@ -26,20 +29,57 @@ module CmmUtils( mkLblExpr, - loadArgsIntoTemps, maybeAssignTemp, + maybeAssignTemp, loadArgsIntoTemps ) where #include "HsVersions.h" +import TyCon ( PrimRep(..) ) +import Type ( Type, typePrimRep ) + import CLabel import Cmm -import MachOp import OrdList import Outputable import Unique --------------------------------------------------- -- +-- CmmTypes +-- +--------------------------------------------------- + +primRepCmmType :: PrimRep -> CmmType +primRepCmmType VoidRep = panic "primRepCmmType:VoidRep" +primRepCmmType PtrRep = gcWord +primRepCmmType IntRep = bWord +primRepCmmType WordRep = bWord +primRepCmmType Int64Rep = b64 +primRepCmmType Word64Rep = b64 +primRepCmmType AddrRep = bWord +primRepCmmType FloatRep = f32 +primRepCmmType DoubleRep = f64 + +typeCmmType :: Type -> CmmType +typeCmmType ty = primRepCmmType (typePrimRep ty) + +primRepForeignHint :: PrimRep -> ForeignHint +primRepForeignHint VoidRep = panic "primRepForeignHint:VoidRep" +primRepForeignHint PtrRep = AddrHint +primRepForeignHint IntRep = SignedHint +primRepForeignHint WordRep = NoHint +primRepForeignHint Int64Rep = SignedHint +primRepForeignHint Word64Rep = NoHint +primRepForeignHint AddrRep = AddrHint -- NB! AddrHint, but NonPtrArg +primRepForeignHint FloatRep = NoHint +primRepForeignHint DoubleRep = NoHint + +typeForeignHint :: Type -> ForeignHint +typeForeignHint = primRepForeignHint . typePrimRep + + +--------------------------------------------------- +-- -- CmmStmts -- --------------------------------------------------- @@ -115,12 +155,11 @@ hasNoGlobalRegs _ = False --------------------------------------------------- cmmOffsetExpr :: CmmExpr -> CmmExpr -> CmmExpr --- assumes base and offset have the same MachRep +-- assumes base and offset have the same CmmType cmmOffsetExpr e (CmmLit (CmmInt n _)) = cmmOffset e (fromInteger n) -cmmOffsetExpr e byte_off = CmmMachOp (MO_Add (cmmExprRep e)) [e, byte_off] +cmmOffsetExpr e byte_off = CmmMachOp (MO_Add (cmmExprWidth e)) [e, byte_off] -- NB. Do *not* inspect the value of the offset in these smart constructors!!! --- -- because the offset is sometimes involved in a loop in the code generator -- (we don't know the real Hp offset until we've generated code for the entire -- basic block, for example). So we cannot eliminate zero offsets at this @@ -136,9 +175,9 @@ cmmOffset (CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt byte_off1 _rep)]) byte_o = CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt (byte_off1 + toInteger byte_off2) rep)] cmmOffset expr byte_off - = CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt (toInteger byte_off) rep)] + = CmmMachOp (MO_Add width) [expr, CmmLit (CmmInt (toInteger byte_off) width)] where - rep = cmmExprRep expr + width = cmmExprWidth expr -- Smart constructor for CmmRegOff. Same caveats as cmmOffset above. cmmRegOff :: CmmReg -> Int -> CmmExpr @@ -156,21 +195,27 @@ cmmLabelOff lbl 0 = CmmLabel lbl cmmLabelOff lbl byte_off = CmmLabelOff lbl byte_off -- | Useful for creating an index into an array, with a staticaly known offset. -cmmIndex :: MachRep -> CmmExpr -> Int -> CmmExpr -cmmIndex rep base idx = cmmOffset base (idx * machRepByteWidth rep) +-- The type is the element type; used for making the multiplier +cmmIndex :: Width -- Width w + -> CmmExpr -- Address of vector of items of width w + -> Int -- Which element of the vector (0 based) + -> CmmExpr -- Address of i'th element +cmmIndex width base idx = cmmOffset base (idx * widthInBytes width) -- | Useful for creating an index into an array, with an unknown offset. -cmmIndexExpr :: MachRep -> CmmExpr -> CmmExpr -> CmmExpr -cmmIndexExpr rep base (CmmLit (CmmInt n _)) = cmmIndex rep base (fromInteger n) -cmmIndexExpr rep base idx = +cmmIndexExpr :: Width -- Width w + -> CmmExpr -- Address of vector of items of width w + -> CmmExpr -- Which element of the vector (0 based) + -> CmmExpr -- Address of i'th element +cmmIndexExpr width base (CmmLit (CmmInt n _)) = cmmIndex width base (fromInteger n) +cmmIndexExpr width base idx = cmmOffsetExpr base byte_off where - idx_rep = cmmExprRep idx - byte_off = CmmMachOp (MO_Shl idx_rep) [ - idx, CmmLit (mkIntCLit (machRepLogWidth rep))] + idx_w = cmmExprWidth idx + byte_off = CmmMachOp (MO_Shl idx_w) [idx, CmmLit (mkIntCLit (widthInLog width))] -cmmLoadIndex :: MachRep -> CmmExpr -> Int -> CmmExpr -cmmLoadIndex rep expr ix = CmmLoad (cmmIndex rep expr ix) rep +cmmLoadIndex :: CmmType -> CmmExpr -> Int -> CmmExpr +cmmLoadIndex ty expr ix = CmmLoad (cmmIndex (typeWidth ty) expr ix) ty --------------------------------------------------- -- @@ -179,10 +224,10 @@ cmmLoadIndex rep expr ix = CmmLoad (cmmIndex rep expr ix) rep --------------------------------------------------- mkIntCLit :: Int -> CmmLit -mkIntCLit i = CmmInt (toInteger i) wordRep +mkIntCLit i = CmmInt (toInteger i) wordWidth zeroCLit :: CmmLit -zeroCLit = CmmInt 0 wordRep +zeroCLit = CmmInt 0 wordWidth mkLblExpr :: CLabel -> CmmExpr mkLblExpr lbl = CmmLit (CmmLabel lbl) @@ -194,20 +239,21 @@ mkLblExpr lbl = CmmLit (CmmLabel lbl) --------------------------------------------------- loadArgsIntoTemps :: [Unique] - -> CmmActuals - -> ([Unique], [CmmStmt], CmmActuals) + -> HintedCmmActuals + -> ([Unique], [CmmStmt], HintedCmmActuals) loadArgsIntoTemps uniques [] = (uniques, [], []) -loadArgsIntoTemps uniques ((CmmKinded e hint):args) = +loadArgsIntoTemps uniques ((CmmHinted e hint):args) = (uniques'', new_stmts ++ remaining_stmts, - (CmmKinded new_e hint) : remaining_e) + (CmmHinted new_e hint) : remaining_e) where (uniques', new_stmts, new_e) = maybeAssignTemp uniques e (uniques'', remaining_stmts, remaining_e) = loadArgsIntoTemps uniques' args + maybeAssignTemp :: [Unique] -> CmmExpr -> ([Unique], [CmmStmt], CmmExpr) maybeAssignTemp uniques e | hasNoGlobalRegs e = (uniques, [], e) | otherwise = (tail uniques, [CmmAssign local e], CmmReg local) - where local = CmmLocal (LocalReg (head uniques) (cmmExprRep e) GCKindNonPtr) + where local = CmmLocal (LocalReg (head uniques) (cmmExprType e)) diff --git a/compiler/cmm/CmmZipUtil.hs b/compiler/cmm/CmmZipUtil.hs index 6a343f8..9f0993d 100644 --- a/compiler/cmm/CmmZipUtil.hs +++ b/compiler/cmm/CmmZipUtil.hs @@ -14,7 +14,7 @@ import UniqSet -- | Compute the predecessors of each /reachable/ block zipPreds :: LastNode l => LGraph m l -> BlockEnv BlockSet zipPreds g = foldl add emptyBlockEnv (postorder_dfs g) - where add env block@(Block id _) = + where add env block@(Block id _ _) = foldl (\env sid -> let preds = lookupBlockEnv env sid `orElse` emptyBlockSet in extendBlockEnv env sid (extendBlockSet preds id)) diff --git a/compiler/cmm/DFMonad.hs b/compiler/cmm/DFMonad.hs index 209403e..cce112b 100644 --- a/compiler/cmm/DFMonad.hs +++ b/compiler/cmm/DFMonad.hs @@ -7,7 +7,6 @@ module DFMonad , DFM, runDFM, liftToDFM , markGraphRewritten, graphWasRewritten - , freshBlockId , module OptimizationFuel ) where @@ -194,9 +193,6 @@ graphWasRewritten :: DFM f ChangeFlag graphWasRewritten = DFM' f where f _ s = return (df_rewritten s, s) -freshBlockId :: String -> DFM f BlockId -freshBlockId _s = getUniqueM >>= return . BlockId - instance Monad m => Monad (DFM' m f) where DFM' f >>= k = DFM' (\l s -> do (a, s') <- f l s let DFM' f' = k a in f' l s') diff --git a/compiler/cmm/MachOp.hs b/compiler/cmm/MachOp.hs deleted file mode 100644 index 422ed5e..0000000 --- a/compiler/cmm/MachOp.hs +++ /dev/null @@ -1,661 +0,0 @@ -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - ------------------------------------------------------------------------------ --- --- (c) The University of Glasgow 2002-2006 --- --- Low-level machine operations, used in the Cmm datatype. --- ------------------------------------------------------------------------------ - -module MachOp ( - MachRep(..), - machRepBitWidth, - machRepByteWidth, - machRepLogWidth, - isFloatingRep, - - MachHint(..), - - MachOp(..), - pprMachOp, - isCommutableMachOp, - isAssociativeMachOp, - isComparisonMachOp, - resultRepOfMachOp, - machOpArgReps, - maybeInvertComparison, - - CallishMachOp(..), - pprCallishMachOp, - - wordRep, - halfWordRep, - cIntRep, cLongRep, - - mo_wordAdd, - mo_wordSub, - mo_wordEq, - mo_wordNe, - mo_wordMul, - mo_wordSQuot, - mo_wordSRem, - mo_wordSNeg, - mo_wordUQuot, - mo_wordURem, - - mo_wordSGe, - mo_wordSLe, - mo_wordSGt, - mo_wordSLt, - - mo_wordUGe, - mo_wordULe, - mo_wordUGt, - mo_wordULt, - - mo_wordAnd, - mo_wordOr, - mo_wordXor, - mo_wordNot, - mo_wordShl, - mo_wordSShr, - mo_wordUShr, - - mo_u_8To32, - mo_s_8To32, - mo_u_16To32, - mo_s_16To32, - - mo_u_8ToWord, - mo_s_8ToWord, - mo_u_16ToWord, - mo_s_16ToWord, - mo_u_32ToWord, - mo_s_32ToWord, - - mo_32To8, - mo_32To16, - mo_WordTo8, - mo_WordTo16, - mo_WordTo32, - ) where - -#include "HsVersions.h" - -import Constants -import Outputable -import FastString - --- ----------------------------------------------------------------------------- --- MachRep - -{- | -A MachRep is the "representation" of a value in Cmm. It is used for -resource allocation: eg. which kind of register a value should be -stored in. - -The primary requirement is that there exists a function - - cmmExprRep :: CmmExpr -> MachRep - -This means that: - - - a register has an implicit MachRep - - a literal has an implicit MachRep - - an operation (MachOp) has an implicit result MachRep - -It also means that we can check that the arguments to a MachOp have -the correct MachRep, i.e. we can do a kind of lint-style type checking -on Cmm. --} - -data MachRep - = I8 - | I16 - | I32 - | I64 - | I128 - | F32 - | F64 - | F80 -- extended double-precision, used in x86 native codegen only. - deriving (Eq, Ord, Show) - -mrStr I8 = sLit "I8" -mrStr I16 = sLit "I16" -mrStr I32 = sLit "I32" -mrStr I64 = sLit "I64" -mrStr I128 = sLit "I128" -mrStr F32 = sLit "F32" -mrStr F64 = sLit "F64" -mrStr F80 = sLit "F80" - -instance Outputable MachRep where - ppr rep = ptext (mrStr rep) - -{- -Implementation notes: - -It might suffice to keep just a width, without distinguishing between -floating and integer types. However, keeping the distinction will -help the native code generator to assign registers more easily. --} - -{- -Should a MachRep include a signed vs. unsigned distinction? - -This is very much like a "hint" in C-- terminology: it isn't necessary -in order to generate correct code, but it might be useful in that the -compiler can generate better code if it has access to higher-level -hints about data. This is important at call boundaries, because the -definition of a function is not visible at all of its call sites, so -the compiler cannot infer the hints. - -Here in Cmm, we're taking a slightly different approach. We include -the int vs. float hint in the MachRep, because (a) the majority of -platforms have a strong distinction between float and int registers, -and (b) we don't want to do any heavyweight hint-inference in the -native code backend in order to get good code. We're treating the -hint more like a type: our Cmm is always completely consistent with -respect to hints. All coercions between float and int are explicit. - -What about the signed vs. unsigned hint? This information might be -useful if we want to keep sub-word-sized values in word-size -registers, which we must do if we only have word-sized registers. - -On such a system, there are two straightforward conventions for -representing sub-word-sized values: - -(a) Leave the upper bits undefined. Comparison operations must - sign- or zero-extend both operands before comparing them, - depending on whether the comparison is signed or unsigned. - -(b) Always keep the values sign- or zero-extended as appropriate. - Arithmetic operations must narrow the result to the appropriate - size. - -A clever compiler might not use either (a) or (b) exclusively, instead -it would attempt to minimize the coercions by analysis: the same kind -of analysis that propagates hints around. In Cmm we don't want to -have to do this, so we plump for having richer types and keeping the -type information consistent. - -If signed/unsigned hints are missing from MachRep, then the only -choice we have is (a), because we don't know whether the result of an -operation should be sign- or zero-extended. - -Many architectures have extending load operations, which work well -with (b). To make use of them with (a), you need to know whether the -value is going to be sign- or zero-extended by an enclosing comparison -(for example), which involves knowing above the context. This is -doable but more complex. - -Further complicating the issue is foreign calls: a foreign calling -convention can specify that signed 8-bit quantities are passed as -sign-extended 32 bit quantities, for example (this is the case on the -PowerPC). So we *do* need sign information on foreign call arguments. - -Pros for adding signed vs. unsigned to MachRep: - - - It would let us use convention (b) above, and get easier - code generation for extending loads. - - - Less information required on foreign calls. - - - MachOp type would be simpler - -Cons: - - - More complexity - - - What is the MachRep for a VanillaReg? Currently it is - always wordRep, but now we have to decide whether it is - signed or unsigned. The same VanillaReg can thus have - different MachReps in different parts of the program. - - - Extra coercions cluttering up expressions. - -Currently for GHC, the foreign call point is moot, because we do our -own promotion of sub-word-sized values to word-sized values. The Int8 -type is represnted by an Int# which is kept sign-extended at all times -(this is slightly naughty, because we're making assumptions about the -C calling convention rather early on in the compiler). However, given -this, the cons outweigh the pros. - --} - - -machRepBitWidth :: MachRep -> Int -machRepBitWidth I8 = 8 -machRepBitWidth I16 = 16 -machRepBitWidth I32 = 32 -machRepBitWidth I64 = 64 -machRepBitWidth I128 = 128 -machRepBitWidth F32 = 32 -machRepBitWidth F64 = 64 -machRepBitWidth F80 = 80 - -machRepByteWidth :: MachRep -> Int -machRepByteWidth I8 = 1 -machRepByteWidth I16 = 2 -machRepByteWidth I32 = 4 -machRepByteWidth I64 = 8 -machRepByteWidth I128 = 16 -machRepByteWidth F32 = 4 -machRepByteWidth F64 = 8 -machRepByteWidth F80 = 10 - --- log_2 of the width in bytes, useful for generating shifts. -machRepLogWidth :: MachRep -> Int -machRepLogWidth I8 = 0 -machRepLogWidth I16 = 1 -machRepLogWidth I32 = 2 -machRepLogWidth I64 = 3 -machRepLogWidth I128 = 4 -machRepLogWidth F32 = 2 -machRepLogWidth F64 = 3 -machRepLogWidth F80 = panic "machRepLogWidth: F80" - -isFloatingRep :: MachRep -> Bool -isFloatingRep F32 = True -isFloatingRep F64 = True -isFloatingRep F80 = True -isFloatingRep _ = False - --- ----------------------------------------------------------------------------- --- Hints - -{- -A hint gives a little more information about a data value. Hints are -used on the arguments to a foreign call, where the code generator needs -to know some extra information on top of the MachRep of each argument in -order to generate a correct call. --} - -data MachHint - = NoHint - | PtrHint - | SignedHint - | FloatHint - deriving Eq - -mhStr NoHint = sLit "NoHint" -mhStr PtrHint = sLit "PtrHint" -mhStr SignedHint = sLit "SignedHint" -mhStr FloatHint = sLit "FloatHint" - -instance Outputable MachHint where - ppr hint = ptext (mhStr hint) - --- ----------------------------------------------------------------------------- --- MachOp - -{- | -Machine-level primops; ones which we can reasonably delegate to the -native code generators to handle. Basically contains C's primops -and no others. - -Nomenclature: all ops indicate width and signedness, where -appropriate. Widths: 8\/16\/32\/64 means the given size, obviously. -Nat means the operation works on STG word sized objects. -Signedness: S means signed, U means unsigned. For operations where -signedness is irrelevant or makes no difference (for example -integer add), the signedness component is omitted. - -An exception: NatP is a ptr-typed native word. From the point of -view of the native code generators this distinction is irrelevant, -but the C code generator sometimes needs this info to emit the -right casts. --} - -data MachOp - - -- Integer operations - = MO_Add MachRep - | MO_Sub MachRep - | MO_Eq MachRep - | MO_Ne MachRep - | MO_Mul MachRep -- low word of multiply - | MO_S_MulMayOflo MachRep -- nonzero if signed multiply overflows - | MO_S_Quot MachRep -- signed / (same semantics as IntQuotOp) - | MO_S_Rem MachRep -- signed % (same semantics as IntRemOp) - | MO_S_Neg MachRep -- unary - - | MO_U_MulMayOflo MachRep -- nonzero if unsigned multiply overflows - | MO_U_Quot MachRep -- unsigned / (same semantics as WordQuotOp) - | MO_U_Rem MachRep -- unsigned % (same semantics as WordRemOp) - - -- Signed comparisons (floating-point comparisons also use these) - | MO_S_Ge MachRep - | MO_S_Le MachRep - | MO_S_Gt MachRep - | MO_S_Lt MachRep - - -- Unsigned comparisons - | MO_U_Ge MachRep - | MO_U_Le MachRep - | MO_U_Gt MachRep - | MO_U_Lt MachRep - - -- Bitwise operations. Not all of these may be supported at all sizes, - -- and only integral MachReps are valid. - | MO_And MachRep - | MO_Or MachRep - | MO_Xor MachRep - | MO_Not MachRep - | MO_Shl MachRep - | MO_U_Shr MachRep -- unsigned shift right - | MO_S_Shr MachRep -- signed shift right - - -- Conversions. Some of these will be NOPs. - -- Floating-point conversions use the signed variant. - | MO_S_Conv MachRep{-from-} MachRep{-to-} -- signed conversion - | MO_U_Conv MachRep{-from-} MachRep{-to-} -- unsigned conversion - - deriving (Eq, Show) - -pprMachOp :: MachOp -> SDoc -pprMachOp mo = text (show mo) - - --- These MachOps tend to be implemented by foreign calls in some backends, --- so we separate them out. In Cmm, these can only occur in a --- statement position, in contrast to an ordinary MachOp which can occur --- anywhere in an expression. -data CallishMachOp - = MO_F64_Pwr - | MO_F64_Sin - | MO_F64_Cos - | MO_F64_Tan - | MO_F64_Sinh - | MO_F64_Cosh - | MO_F64_Tanh - | MO_F64_Asin - | MO_F64_Acos - | MO_F64_Atan - | MO_F64_Log - | MO_F64_Exp - | MO_F64_Sqrt - | MO_F32_Pwr - | MO_F32_Sin - | MO_F32_Cos - | MO_F32_Tan - | MO_F32_Sinh - | MO_F32_Cosh - | MO_F32_Tanh - | MO_F32_Asin - | MO_F32_Acos - | MO_F32_Atan - | MO_F32_Log - | MO_F32_Exp - | MO_F32_Sqrt - | MO_WriteBarrier - deriving (Eq, Show) - -pprCallishMachOp :: CallishMachOp -> SDoc -pprCallishMachOp mo = text (show mo) - --- ----------------------------------------------------------------------------- --- Some common MachReps - --- A 'wordRep' is a machine word on the target architecture --- Specifically, it is the size of an Int#, Word#, Addr# --- and the unit of allocation on the stack and the heap --- Any pointer is also guaranteed to be a wordRep. - -wordRep | wORD_SIZE == 4 = I32 - | wORD_SIZE == 8 = I64 - | otherwise = panic "MachOp.wordRep: Unknown word size" - -halfWordRep | wORD_SIZE == 4 = I16 - | wORD_SIZE == 8 = I32 - | otherwise = panic "MachOp.halfWordRep: Unknown word size" - -mo_wordAdd = MO_Add wordRep -mo_wordSub = MO_Sub wordRep -mo_wordEq = MO_Eq wordRep -mo_wordNe = MO_Ne wordRep -mo_wordMul = MO_Mul wordRep -mo_wordSQuot = MO_S_Quot wordRep -mo_wordSRem = MO_S_Rem wordRep -mo_wordSNeg = MO_S_Neg wordRep -mo_wordUQuot = MO_U_Quot wordRep -mo_wordURem = MO_U_Rem wordRep - -mo_wordSGe = MO_S_Ge wordRep -mo_wordSLe = MO_S_Le wordRep -mo_wordSGt = MO_S_Gt wordRep -mo_wordSLt = MO_S_Lt wordRep - -mo_wordUGe = MO_U_Ge wordRep -mo_wordULe = MO_U_Le wordRep -mo_wordUGt = MO_U_Gt wordRep -mo_wordULt = MO_U_Lt wordRep - -mo_wordAnd = MO_And wordRep -mo_wordOr = MO_Or wordRep -mo_wordXor = MO_Xor wordRep -mo_wordNot = MO_Not wordRep -mo_wordShl = MO_Shl wordRep -mo_wordSShr = MO_S_Shr wordRep -mo_wordUShr = MO_U_Shr wordRep - -mo_u_8To32 = MO_U_Conv I8 I32 -mo_s_8To32 = MO_S_Conv I8 I32 -mo_u_16To32 = MO_U_Conv I16 I32 -mo_s_16To32 = MO_S_Conv I16 I32 - -mo_u_8ToWord = MO_U_Conv I8 wordRep -mo_s_8ToWord = MO_S_Conv I8 wordRep -mo_u_16ToWord = MO_U_Conv I16 wordRep -mo_s_16ToWord = MO_S_Conv I16 wordRep -mo_s_32ToWord = MO_S_Conv I32 wordRep -mo_u_32ToWord = MO_U_Conv I32 wordRep - -mo_WordTo8 = MO_U_Conv wordRep I8 -mo_WordTo16 = MO_U_Conv wordRep I16 -mo_WordTo32 = MO_U_Conv wordRep I32 - -mo_32To8 = MO_U_Conv I32 I8 -mo_32To16 = MO_U_Conv I32 I16 - --- cIntRep is the MachRep for a C-language 'int' -#if SIZEOF_INT == 4 -cIntRep = I32 -#elif SIZEOF_INT == 8 -cIntRep = I64 -#endif - -#if SIZEOF_LONG == 4 -cLongRep = I32 -#elif SIZEOF_LONG == 8 -cLongRep = I64 -#endif - --- ---------------------------------------------------------------------------- --- isCommutableMachOp - -{- | -Returns @True@ if the MachOp has commutable arguments. This is used -in the platform-independent Cmm optimisations. - -If in doubt, return @False@. This generates worse code on the -native routes, but is otherwise harmless. --} -isCommutableMachOp :: MachOp -> Bool -isCommutableMachOp mop = - case mop of - MO_Add _ -> True - MO_Eq _ -> True - MO_Ne _ -> True - MO_Mul _ -> True - MO_S_MulMayOflo _ -> True - MO_U_MulMayOflo _ -> True - MO_And _ -> True - MO_Or _ -> True - MO_Xor _ -> True - _other -> False - --- ---------------------------------------------------------------------------- --- isAssociativeMachOp - -{- | -Returns @True@ if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@) -This is used in the platform-independent Cmm optimisations. - -If in doubt, return @False@. This generates worse code on the -native routes, but is otherwise harmless. --} -isAssociativeMachOp :: MachOp -> Bool -isAssociativeMachOp mop = - case mop of - MO_Add r -> not (isFloatingRep r) - MO_Mul r -> not (isFloatingRep r) - MO_And _ -> True - MO_Or _ -> True - MO_Xor _ -> True - _other -> False - --- ---------------------------------------------------------------------------- --- isComparisonMachOp - -{- | -Returns @True@ if the MachOp is a comparison. - -If in doubt, return False. This generates worse code on the -native routes, but is otherwise harmless. --} -isComparisonMachOp :: MachOp -> Bool -isComparisonMachOp mop = - case mop of - MO_Eq _ -> True - MO_Ne _ -> True - MO_S_Ge _ -> True - MO_S_Le _ -> True - MO_S_Gt _ -> True - MO_S_Lt _ -> True - MO_U_Ge _ -> True - MO_U_Le _ -> True - MO_U_Gt _ -> True - MO_U_Lt _ -> True - _other -> False - --- ----------------------------------------------------------------------------- --- Inverting conditions - --- Sometimes it's useful to be able to invert the sense of a --- condition. Not all conditional tests are invertible: in --- particular, floating point conditionals cannot be inverted, because --- there exist floating-point values which return False for both senses --- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)). - -maybeInvertComparison :: MachOp -> Maybe MachOp -maybeInvertComparison op - = case op of - MO_Eq r | not (isFloatingRep r) -> Just (MO_Ne r) - MO_Ne r | not (isFloatingRep r) -> Just (MO_Eq r) - MO_U_Lt r | not (isFloatingRep r) -> Just (MO_U_Ge r) - MO_U_Gt r | not (isFloatingRep r) -> Just (MO_U_Le r) - MO_U_Le r | not (isFloatingRep r) -> Just (MO_U_Gt r) - MO_U_Ge r | not (isFloatingRep r) -> Just (MO_U_Lt r) - MO_S_Lt r | not (isFloatingRep r) -> Just (MO_S_Ge r) - MO_S_Gt r | not (isFloatingRep r) -> Just (MO_S_Le r) - MO_S_Le r | not (isFloatingRep r) -> Just (MO_S_Gt r) - MO_S_Ge r | not (isFloatingRep r) -> Just (MO_S_Lt r) - _other -> Nothing - --- ---------------------------------------------------------------------------- --- resultRepOfMachOp - -{- | -Returns the MachRep of the result of a MachOp. --} -resultRepOfMachOp :: MachOp -> MachRep -resultRepOfMachOp mop = - case mop of - MO_Add r -> r - MO_Sub r -> r - MO_Eq r -> comparisonResultRep - MO_Ne r -> comparisonResultRep - MO_Mul r -> r - MO_S_MulMayOflo r -> r - MO_S_Quot r -> r - MO_S_Rem r -> r - MO_S_Neg r -> r - MO_U_MulMayOflo r -> r - MO_U_Quot r -> r - MO_U_Rem r -> r - - MO_S_Ge r -> comparisonResultRep - MO_S_Le r -> comparisonResultRep - MO_S_Gt r -> comparisonResultRep - MO_S_Lt r -> comparisonResultRep - - MO_U_Ge r -> comparisonResultRep - MO_U_Le r -> comparisonResultRep - MO_U_Gt r -> comparisonResultRep - MO_U_Lt r -> comparisonResultRep - - MO_And r -> r - MO_Or r -> r - MO_Xor r -> r - MO_Not r -> r - MO_Shl r -> r - MO_U_Shr r -> r - MO_S_Shr r -> r - - MO_S_Conv from to -> to - MO_U_Conv from to -> to - - -comparisonResultRep = wordRep -- is it? - - --- ----------------------------------------------------------------------------- --- machOpArgReps - --- | This function is used for debugging only: we can check whether an --- application of a MachOp is "type-correct" by checking that the MachReps of --- its arguments are the same as the MachOp expects. This is used when --- linting a CmmExpr. - -machOpArgReps :: MachOp -> [MachRep] -machOpArgReps op = - case op of - MO_Add r -> [r,r] - MO_Sub r -> [r,r] - MO_Eq r -> [r,r] - MO_Ne r -> [r,r] - MO_Mul r -> [r,r] - MO_S_MulMayOflo r -> [r,r] - MO_S_Quot r -> [r,r] - MO_S_Rem r -> [r,r] - MO_S_Neg r -> [r] - MO_U_MulMayOflo r -> [r,r] - MO_U_Quot r -> [r,r] - MO_U_Rem r -> [r,r] - - MO_S_Ge r -> [r,r] - MO_S_Le r -> [r,r] - MO_S_Gt r -> [r,r] - MO_S_Lt r -> [r,r] - - MO_U_Ge r -> [r,r] - MO_U_Le r -> [r,r] - MO_U_Gt r -> [r,r] - MO_U_Lt r -> [r,r] - - MO_And r -> [r,r] - MO_Or r -> [r,r] - MO_Xor r -> [r,r] - MO_Not r -> [r] - MO_Shl r -> [r,wordRep] - MO_U_Shr r -> [r,wordRep] - MO_S_Shr r -> [r,wordRep] - - MO_S_Conv from to -> [from] - MO_U_Conv from to -> [from] diff --git a/compiler/cmm/MkZipCfg.hs b/compiler/cmm/MkZipCfg.hs index b405352..0b549fa 100644 --- a/compiler/cmm/MkZipCfg.hs +++ b/compiler/cmm/MkZipCfg.hs @@ -1,6 +1,7 @@ {-# LANGUAGE ScopedTypeVariables #-} module MkZipCfg ( AGraph, (<*>), catAGraphs + , freshBlockId , emptyAGraph, withFreshLabel, withUnique , mkMiddle, mkMiddles, mkLast, mkZTail, mkBranch, mkLabel, mkIfThenElse, mkWhileDo , outOfLine @@ -166,7 +167,7 @@ catAGraphs :: [AGraph m l] -> AGraph m l emptyAGraph :: AGraph m l mkLabel :: (LastNode l) => - BlockId -> AGraph m l -- graph contains the label + BlockId -> Maybe Int -> AGraph m l -- graph contains the label mkMiddle :: m -> AGraph m l -- graph contains the node mkLast :: (Outputable m, Outputable l, LastNode l) => l -> AGraph m l -- graph contains the node @@ -230,9 +231,9 @@ mkWhileDo :: (Outputable m, Outputable l, LastNode l) -- because it may require the allocation of fresh, unique labels. graphOfAGraph :: AGraph m l -> UniqSM (Graph m l) -lgraphOfAGraph :: AGraph m l -> UniqSM (LGraph m l) +lgraphOfAGraph :: Int -> AGraph m l -> UniqSM (LGraph m l) -- ^ allocate a fresh label for the entry point -labelAGraph :: BlockId -> AGraph m l -> UniqSM (LGraph m l) +labelAGraph :: BlockId -> Int -> AGraph m l -> UniqSM (LGraph m l) -- ^ use the given BlockId as the label of the entry point @@ -261,20 +262,20 @@ emptyAGraph = AGraph return graphOfAGraph (AGraph f) = f emptyGraph emptyGraph = Graph (ZLast LastExit) emptyBlockEnv -labelAGraph id g = +labelAGraph id args g = do Graph tail blocks <- graphOfAGraph g - return $ LGraph id $ insertBlock (Block id tail) blocks + return $ LGraph id args $ insertBlock (Block id Nothing tail) blocks -lgraphOfAGraph g = do id <- freshBlockId "graph entry" - labelAGraph id g +lgraphOfAGraph args g = do id <- freshBlockId "graph entry" + labelAGraph id args g ------------------------------------- -- constructors -mkLabel id = AGraph f +mkLabel id args = AGraph f where f (Graph tail blocks) = return $ Graph (ZLast (mkBranchNode id)) - (insertBlock (Block id tail) blocks) + (insertBlock (Block id args tail) blocks) mkBranch target = mkLast $ mkBranchNode target @@ -314,24 +315,21 @@ outOfLine (AGraph f) = AGraph f' note_this_code_becomes_unreachable emptyEntrance return $ Graph tail' (blocks `plusUFM` blocks') - mkIfThenElse cbranch tbranch fbranch = withFreshLabel "end of if" $ \endif -> withFreshLabel "start of then" $ \tid -> withFreshLabel "start of else" $ \fid -> cbranch tid fid <*> - mkLabel tid <*> tbranch <*> mkBranch endif <*> - mkLabel fid <*> fbranch <*> mkLabel endif - + mkLabel tid Nothing <*> tbranch <*> mkBranch endif <*> + mkLabel fid Nothing <*> fbranch <*> mkLabel endif Nothing mkWhileDo cbranch body = withFreshLabel "loop test" $ \test -> withFreshLabel "loop head" $ \head -> withFreshLabel "end while" $ \endwhile -> -- Forrest Baskett's while-loop layout - mkBranch test <*> mkLabel head <*> body <*> mkLabel test - <*> cbranch head endwhile <*> mkLabel endwhile - + mkBranch test <*> mkLabel head Nothing <*> body <*> mkLabel test Nothing + <*> cbranch head endwhile <*> mkLabel endwhile Nothing -- | Bleat if the insertion of a last node will create unreachable code note_this_code_becomes_unreachable :: @@ -360,6 +358,6 @@ Emitting a Branch at this point is fine: -- thrown away at this spot---there's no reason a BlockId couldn't one day carry -- a string. -freshBlockId :: String -> UniqSM BlockId -freshBlockId _ = do { u <- getUniqueM; return $ BlockId u } +freshBlockId :: MonadUnique m => String -> m BlockId +freshBlockId _s = getUniqueM >>= return . BlockId diff --git a/compiler/cmm/MkZipCfgCmm.hs b/compiler/cmm/MkZipCfgCmm.hs index dd29aa8..1d80650 100644 --- a/compiler/cmm/MkZipCfgCmm.hs +++ b/compiler/cmm/MkZipCfgCmm.hs @@ -13,7 +13,8 @@ module MkZipCfgCmm , (<*>), catAGraphs, mkLabel, mkBranch , emptyAGraph, withFreshLabel, withUnique, outOfLine , lgraphOfAGraph, graphOfAGraph, labelAGraph - , CmmZ, CmmTopZ, CmmGraph, CmmBlock, CmmAGraph, Middle, Last, Convention(..) + , CmmZ, CmmTopZ, CmmGraph, CmmBlock, CmmAGraph + , Middle, Last, Convention(..), ForeignConvention(..), MidCallTarget(..), Transfer(..) ) where @@ -22,10 +23,9 @@ where import BlockId import CmmExpr import Cmm ( GenCmm(..), GenCmmTop(..), CmmStatic, CmmInfo - , CmmCallTarget(..), CmmActuals, CmmFormals, CmmFormalsWithoutKinds - , CmmKinded (..) + , CmmActuals, CmmFormals ) -import MachOp (MachHint(..), wordRep) +import CmmCallConv (assignArgumentsPos, ParamLocation(..)) import ZipCfgCmmRep hiding (CmmGraph, CmmAGraph, CmmBlock, CmmZ, CmmTopZ) -- to make this module more self-contained, the above definitions are -- duplicated below @@ -34,8 +34,9 @@ import PprCmm() import ClosureInfo import FastString import ForeignCall -import ZipCfg import MkZipCfg +import Panic +import ZipCfg type CmmGraph = LGraph Middle Last type CmmAGraph = AGraph Middle Last @@ -43,6 +44,8 @@ type CmmBlock = Block Middle Last type CmmZ = GenCmm CmmStatic CmmInfo CmmGraph type CmmTopZ = GenCmmTop CmmStatic CmmInfo CmmGraph +data Transfer = Call | Jump | Ret deriving Eq + ---------- No-ops mkNop :: CmmAGraph mkComment :: FastString -> CmmAGraph @@ -55,7 +58,7 @@ mkStore :: CmmExpr -> CmmExpr -> CmmAGraph mkCall :: CmmExpr -> CCallConv -> CmmFormals -> CmmActuals -> C_SRT -> CmmAGraph mkCmmCall :: CmmExpr -> CmmFormals -> CmmActuals -> C_SRT -> CmmAGraph -- Native C-- calling convention -mkUnsafeCall :: CmmCallTarget -> CmmFormals -> CmmActuals -> CmmAGraph +mkUnsafeCall :: MidCallTarget -> CmmFormals -> CmmActuals -> CmmAGraph mkFinalCall :: CmmExpr -> CCallConv -> CmmActuals -> CmmAGraph -- Never returns; like exit() or barf() @@ -63,10 +66,10 @@ mkFinalCall :: CmmExpr -> CCallConv -> CmmActuals -> CmmAGraph mkAddToContext :: CmmExpr -> [CmmExpr] -> CmmAGraph ---------- Control transfer -mkJump :: Area -> CmmExpr -> CmmActuals -> CmmAGraph -mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph -mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph -mkReturn :: Area -> CmmActuals -> CmmAGraph +mkJump :: CmmExpr -> CmmActuals -> CmmAGraph +mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph +mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph +mkReturn :: CmmActuals -> CmmAGraph mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> CmmAGraph mkCmmIfThen :: CmmExpr -> CmmAGraph -> CmmAGraph @@ -74,7 +77,7 @@ mkCmmWhileDo :: CmmExpr -> CmmAGraph -> CmmAGraph -- Not to be forgotten, but exported by MkZipCfg: -- mkBranch :: BlockId -> CmmAGraph --- mkLabel :: BlockId -> CmmAGraph +-- mkLabel :: BlockId -> Maybe Int -> CmmAGraph -- outOfLine :: CmmAGraph -> CmmAGraph -- withUnique :: (Unique -> CmmAGraph) -> CmmAGraph -- withFreshLabel :: String -> (BlockId -> CmmAGraph) -> CmmAGraph @@ -88,8 +91,8 @@ mkCmmIfThen e tbranch = withFreshLabel "end of if" $ \endif -> withFreshLabel "start of then" $ \tid -> mkCbranch e tid endif <*> - mkLabel tid <*> tbranch <*> mkBranch endif <*> - mkLabel endif + mkLabel tid Nothing <*> tbranch <*> mkBranch endif <*> + mkLabel endif Nothing @@ -100,65 +103,89 @@ mkComment fs = mkMiddle $ MidComment fs mkAssign l r = mkMiddle $ MidAssign l r mkStore l r = mkMiddle $ MidStore l r -mkCbranch pred ifso ifnot = mkLast $ LastCondBranch pred ifso ifnot + +-- Why are we inserting extra blocks that simply branch to the successors? +-- Because in addition to the branch instruction, @mkBranch@ will insert +-- a necessary adjustment to the stack pointer. +mkCbranch pred ifso ifnot = mkLast (LastCondBranch pred ifso ifnot) mkSwitch e tbl = mkLast $ LastSwitch e tbl mkUnsafeCall tgt results actuals = mkMiddle $ MidUnsafeCall tgt results actuals mkAddToContext ra actuals = mkMiddle $ MidAddToContext ra actuals cmmResConv :: Convention -cmmResConv = ConventionStandard CmmCallConv Results - -copyIn :: Convention -> Area -> CmmFormals -> [Middle] -copyIn _ area formals = reverse $ snd $ foldl ci (1, []) formals - where ci (n, ms) v = (n+1, MidAssign (CmmLocal $ kindlessCmm v) - (CmmLoad (CmmStackSlot area n) wordRep) : ms) - -copyOut :: Convention -> Area -> CmmActuals -> [Middle] -copyOut conv area actuals = moveSP conv $ snd $ foldl co (1, []) actuals - where moveSP (ConventionStandard _ Arguments) args = - MidAssign spReg (outgoingSlot area) : reverse args - moveSP _ args = reverse $ MidAssign spReg (outgoingSlot area) : args - co (n, ms) v = (n+1, MidStore (CmmStackSlot area n) (kindlessCmm v) : ms) -mkEntry :: Area -> Convention -> CmmFormalsWithoutKinds -> [Middle] -mkEntry area conv formals = copyIn conv area fs - where fs = map (\f -> CmmKinded f NoHint) formals +cmmResConv = Native + +-- Return the number of bytes used for copying arguments, as well as the +-- instructions to copy the arguments. +copyIn :: Convention -> Bool -> Area -> CmmFormals -> (Int, [Middle]) +copyIn _ isCall area formals = + foldr ci (init_offset, []) $ assignArgumentsPos isCall localRegType formals + where ci (reg, RegisterParam r) (n, ms) = + (n, MidAssign (CmmLocal reg) (CmmReg $ CmmGlobal r) : ms) + ci (reg, StackParam off) (n, ms) = + let ty = localRegType reg + off' = off + init_offset + in (max n off', + MidAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off') ty) : ms) + init_offset = widthInBytes wordWidth + +-- The argument layout function ignores the pointer to the info table, so we slot that +-- in here. When copying-out to a young area, we set the info table for return +-- and adjust the offsets of the other parameters. +-- If this is a call instruction, we adjust the offsets of the other parameters. +copyOut :: Convention -> Transfer -> Area -> CmmActuals -> (Int, [Middle]) +copyOut _ transfer area@(CallArea a) actuals = + foldr co (init_offset, []) args' + where args = assignArgumentsPos skip_node cmmExprType actuals + skip_node = transfer /= Ret + (setRA, init_offset) = + case a of Young id -> -- set RA if making a call + if transfer == Call then + ([(CmmLit (CmmLabel (infoTblLbl id)), + StackParam init_offset)], ra_width) + else ([], 0) + Old -> ([], ra_width) + ra_width = widthInBytes wordWidth + args' = foldl adjust setRA args + where adjust rst (v, StackParam off) = (v, StackParam (off + init_offset)) : rst + adjust rst x@(_, RegisterParam _) = x : rst + co (v, RegisterParam r) (n, ms) = (n, MidAssign (CmmGlobal r) v : ms) + co (v, StackParam off) (n, ms) = + (max n off, MidStore (CmmStackSlot area off) v : ms) +copyOut _ _ (RegSlot _) _ = panic "cannot copy arguments into a register slot" + +mkEntry :: BlockId -> Convention -> CmmFormals -> (Int, CmmAGraph) +mkEntry _ conv formals = + let (off, copies) = copyIn conv False (CallArea Old) formals in + (off, mkMiddles copies) -- I'm not sure how to get the calling conventions right yet, -- and I suspect this should not be resolved until sometime after -- Simon's patch is applied. -- For now, I apply a bogus calling convention: all arguments go on the -- stack, using the same amount of stack space. -lastWithArgs' :: BlockId -> Area -> Convention -> CmmActuals -> Maybe CmmFormals -> - (BlockId -> Last) -> CmmAGraph -lastWithArgs' k area conv actuals formals toLast = - (mkMiddles $ copyOut conv area actuals) <*> - -- adjust the sp - mkLast (toLast k) <*> - case formals of - Just formals -> mkLabel k <*> (mkMiddles $ copyIn conv area formals) - Nothing -> emptyAGraph -lastWithArgs :: Convention -> CmmActuals -> Maybe CmmFormals -> (BlockId -> Last) -> CmmAGraph -lastWithArgs c a f l = - withFreshLabel "call successor" $ \k -> lastWithArgs' k (mkCallArea k a f) c a f l - -always :: a -> b -> a -always x _ = x + +lastWithArgs :: Transfer -> Area -> Convention -> CmmActuals -> (Int -> Last) -> CmmAGraph +lastWithArgs transfer area conv actuals last = + let (outArgs, copies) = copyOut conv transfer area actuals in + mkMiddles copies <*> mkLast (last outArgs) -- The area created for the jump and return arguments is the same area as the -- procedure entry. -mkJump area e actuals = - lastWithArgs' (areaId area) area cmmResConv actuals Nothing $ always $ LastJump e -mkReturn area actuals = - lastWithArgs' (areaId area) area cmmResConv actuals Nothing $ always LastReturn +mkJump e actuals = lastWithArgs Jump (CallArea Old) cmmResConv actuals $ LastJump e +mkReturn actuals = lastWithArgs Ret (CallArea Old) cmmResConv actuals $ LastJump e + where e = CmmStackSlot (CallArea Old) (widthInBytes wordWidth) -mkFinalCall f conv actuals = - lastWithArgs (ConventionStandard conv Arguments) actuals Nothing - $ always $ LastCall f Nothing --mkFinalCall f conv actuals = +mkFinalCall f _ actuals = + lastWithArgs Call (CallArea Old) Native actuals $ LastCall f Nothing mkCmmCall f results actuals srt = mkCall f CmmCallConv results actuals srt -- I'm dropping the SRT, but that should be okay: we plan to reconstruct it later. -mkCall f conv results actuals _ = - lastWithArgs (ConventionStandard conv Arguments) actuals (Just results) - $ \k -> LastCall f (Just k) +mkCall f _ results actuals _ = + withFreshLabel "call successor" $ \k -> + let area = CallArea $ Young k + (off, copyin) = copyIn Native False area results + copyout = lastWithArgs Call area Native actuals $ LastCall f (Just k) + in copyout <*> mkLabel k (Just off) <*> (mkMiddles copyin) diff --git a/compiler/cmm/OptimizationFuel.hs b/compiler/cmm/OptimizationFuel.hs index 3d5b645..d9e8365 100644 --- a/compiler/cmm/OptimizationFuel.hs +++ b/compiler/cmm/OptimizationFuel.hs @@ -128,7 +128,7 @@ fuelDecrementState new_optimizer old new s = optimizer = if diffFuel old new > 0 then new_optimizer else fs_lastpass s -- lGraphOfGraph is here because we need uniques to implement it. -lGraphOfGraph :: Graph m l -> FuelMonad (LGraph m l) -lGraphOfGraph (Graph tail blocks) = +lGraphOfGraph :: Graph m l -> Int -> FuelMonad (LGraph m l) +lGraphOfGraph (Graph tail blocks) args = do entry <- liftM BlockId $ getUniqueM - return $ LGraph entry (insertBlock (Block entry tail) blocks) + return $ LGraph entry args (insertBlock (Block entry Nothing tail) blocks) diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 2a01217..fea2374 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -37,7 +37,6 @@ import BlockId import Cmm import PprCmm () -- Instances only import CLabel -import MachOp import ForeignCall import ClosureInfo @@ -191,18 +190,15 @@ pprStmt stmt = case stmt of CmmAssign dest src -> pprAssign dest src CmmStore dest src - | rep == I64 && wordRep /= I64 - -> ptext (sLit "ASSIGN_Word64") <> - parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi - - | rep == F64 && wordRep /= I64 - -> ptext (sLit "ASSIGN_DBL") <> - parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi + | typeWidth rep == W64 && wordWidth /= W64 + -> (if isFloatType rep then ptext (sLit "ASSIGN_DBL") + else ptext (sLit ("ASSIGN_Word64"))) <> + parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi | otherwise -> hsep [ pprExpr (CmmLoad dest rep), equals, pprExpr src <> semi ] where - rep = cmmExprRep src + rep = cmmExprType src CmmCall (CmmCallee fn cconv) results args safety ret -> maybe_proto $$ @@ -254,16 +250,16 @@ pprStmt stmt = case stmt of CmmJump lbl _params -> mkJMP_(pprExpr lbl) <> semi CmmSwitch arg ids -> pprSwitch arg ids -pprCFunType :: SDoc -> CCallConv -> CmmFormals -> CmmActuals -> SDoc +pprCFunType :: SDoc -> CCallConv -> HintedCmmFormals -> HintedCmmActuals -> SDoc pprCFunType ppr_fn cconv ress args = res_type ress <+> parens (text (ccallConvAttribute cconv) <> ppr_fn) <> parens (commafy (map arg_type args)) where res_type [] = ptext (sLit "void") - res_type [CmmKinded one hint] = machRepHintCType (localRegRep one) hint + res_type [CmmHinted one hint] = machRepHintCType (localRegType one) hint - arg_type (CmmKinded expr hint) = machRepHintCType (cmmExprRep expr) hint + arg_type (CmmHinted expr hint) = machRepHintCType (cmmExprType expr) hint -- --------------------------------------------------------------------- -- unconditional branches @@ -304,11 +300,11 @@ pprSwitch e maybe_ids caseify (ix:ixs, ident) = vcat (map do_fallthrough ixs) $$ final_branch ix where do_fallthrough ix = - hsep [ ptext (sLit "case") , pprHexVal ix wordRep <> colon , + hsep [ ptext (sLit "case") , pprHexVal ix wordWidth <> colon , ptext (sLit "/* fall through */") ] final_branch ix = - hsep [ ptext (sLit "case") , pprHexVal ix wordRep <> colon , + hsep [ ptext (sLit "case") , pprHexVal ix wordWidth <> colon , ptext (sLit "goto") , (pprBlockId ident) <> semi ] -- --------------------------------------------------------------------- @@ -321,7 +317,7 @@ pprSwitch e maybe_ids -- -- has a type in C which is also given by -- --- machRepCType (cmmExprRep e) +-- machRepCType (cmmExprType e) -- -- (similar invariants apply to the rest of the pretty printer). @@ -329,30 +325,8 @@ pprExpr :: CmmExpr -> SDoc pprExpr e = case e of CmmLit lit -> pprLit lit - CmmLoad e I64 | wordRep /= I64 - -> ptext (sLit "PK_Word64") <> parens (mkP_ <> pprExpr1 e) - - CmmLoad e F64 | wordRep /= I64 - -> ptext (sLit "PK_DBL") <> parens (mkP_ <> pprExpr1 e) - - CmmLoad (CmmReg r) rep - | isPtrReg r && rep == wordRep - -> char '*' <> pprAsPtrReg r - - CmmLoad (CmmRegOff r 0) rep - | isPtrReg r && rep == wordRep - -> char '*' <> pprAsPtrReg r - - CmmLoad (CmmRegOff r off) rep - | isPtrReg r && rep == wordRep && (off `rem` wORD_SIZE == 0) - -- ToDo: check that the offset is a word multiple? - -- (For tagging to work, I had to avoid unaligned loads. --ARY) - -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift)) - - CmmLoad expr rep -> - -- the general case: - cLoad expr rep + CmmLoad e ty -> pprLoad e ty CmmReg reg -> pprCastReg reg CmmRegOff reg 0 -> pprCastReg reg @@ -364,6 +338,32 @@ pprExpr e = case e of CmmMachOp mop args -> pprMachOpApp mop args + +pprLoad :: CmmExpr -> CmmType -> SDoc +pprLoad e ty + | width == W64, wordWidth /= W64 + = (if isFloatType ty then ptext (sLit "PK_DBL") + else ptext (sLit "PK_Word64")) + <> parens (mkP_ <> pprExpr1 e) + + | otherwise + = case e of + CmmReg r | isPtrReg r && width == wordWidth && not (isFloatType ty) + -> char '*' <> pprAsPtrReg r + + CmmRegOff r 0 | isPtrReg r && width == wordWidth && not (isFloatType ty) + -> char '*' <> pprAsPtrReg r + + CmmRegOff r off | isPtrReg r && width == wordWidth + , off `rem` wORD_SIZE == 0 && not (isFloatType ty) + -- ToDo: check that the offset is a word multiple? + -- (For tagging to work, I had to avoid unaligned loads. --ARY) + -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift)) + + _other -> cLoad e ty + where + width = typeWidth ty + pprExpr1 :: CmmExpr -> SDoc pprExpr1 (CmmLit lit) = pprLit1 lit pprExpr1 e@(CmmReg _reg) = pprExpr e @@ -406,8 +406,15 @@ pprMachOpApp' mop args _ -> panic "PprC.pprMachOp : machop with wrong number of args" where - pprArg e | signedOp mop = cCast (machRepSignedCType (cmmExprRep e)) e + -- Cast needed for signed integer ops + pprArg e | signedOp mop = cCast (machRep_S_CType (typeWidth (cmmExprType e))) e + | needsFCasts mop = cCast (machRep_F_CType (typeWidth (cmmExprType e))) e | otherwise = pprExpr1 e + needsFCasts (MO_F_Eq _) = False + needsFCasts (MO_F_Ne _) = False + needsFCasts (MO_F_Neg _) = True + needsFCasts (MO_F_Quot _) = True + needsFCasts mop = floatComparison mop -- -------------------------------------------------------------------------- -- Literals @@ -416,7 +423,7 @@ pprLit :: CmmLit -> SDoc pprLit lit = case lit of CmmInt i rep -> pprHexVal i rep - CmmFloat f rep -> parens (machRepCType rep) <> str + CmmFloat f w -> parens (machRep_F_CType w) <> str where d = fromRational f :: Double str | isInfinite d && d < 0 = ptext (sLit "-INFINITY") | isInfinite d = ptext (sLit "INFINITY") @@ -449,29 +456,29 @@ pprLit1 other = pprLit other pprStatics :: [CmmStatic] -> [SDoc] pprStatics [] = [] -pprStatics (CmmStaticLit (CmmFloat f F32) : rest) +pprStatics (CmmStaticLit (CmmFloat f W32) : rest) -- floats are padded to a word, see #1852 - | wORD_SIZE == 8, CmmStaticLit (CmmInt 0 I32) : rest' <- rest + | wORD_SIZE == 8, CmmStaticLit (CmmInt 0 W32) : rest' <- rest = pprLit1 (floatToWord f) : pprStatics rest' | wORD_SIZE == 4 = pprLit1 (floatToWord f) : pprStatics rest | otherwise - = pprPanic "pprStatics: float" (vcat (map (\(CmmStaticLit l) -> ppr (cmmLitRep l)) rest)) -pprStatics (CmmStaticLit (CmmFloat f F64) : rest) + = pprPanic "pprStatics: float" (vcat (map (\(CmmStaticLit l) -> ppr (cmmLitType l)) rest)) +pprStatics (CmmStaticLit (CmmFloat f W64) : rest) = map pprLit1 (doubleToWords f) ++ pprStatics rest -pprStatics (CmmStaticLit (CmmInt i I64) : rest) - | machRepByteWidth I32 == wORD_SIZE +pprStatics (CmmStaticLit (CmmInt i W64) : rest) + | wordWidth == W32 #ifdef WORDS_BIGENDIAN - = pprStatics (CmmStaticLit (CmmInt q I32) : - CmmStaticLit (CmmInt r I32) : rest) + = pprStatics (CmmStaticLit (CmmInt q W32) : + CmmStaticLit (CmmInt r W32) : rest) #else - = pprStatics (CmmStaticLit (CmmInt r I32) : - CmmStaticLit (CmmInt q I32) : rest) + = pprStatics (CmmStaticLit (CmmInt r W32) : + CmmStaticLit (CmmInt q W32) : rest) #endif where r = i .&. 0xffffffff q = i `shiftR` 32 -pprStatics (CmmStaticLit (CmmInt i rep) : rest) - | machRepByteWidth rep /= wORD_SIZE +pprStatics (CmmStaticLit (CmmInt i w) : rest) + | w /= wordWidth = panic "pprStatics: cannot emit a non-word-sized static literal" pprStatics (CmmStaticLit lit : rest) = pprLit1 lit : pprStatics rest @@ -518,18 +525,33 @@ pprMachOp_for_C mop = case mop of MO_U_Quot _ -> char '/' MO_U_Rem _ -> char '%' - -- Signed comparisons (floating-point comparisons also use these) - -- & Unsigned comparisons + -- & Floating-point operations + MO_F_Add _ -> char '+' + MO_F_Sub _ -> char '-' + MO_F_Neg _ -> char '-' + MO_F_Mul _ -> char '*' + MO_F_Quot _ -> char '/' + + -- Signed comparisons MO_S_Ge _ -> ptext (sLit ">=") MO_S_Le _ -> ptext (sLit "<=") MO_S_Gt _ -> char '>' MO_S_Lt _ -> char '<' + -- & Unsigned comparisons MO_U_Ge _ -> ptext (sLit ">=") MO_U_Le _ -> ptext (sLit "<=") MO_U_Gt _ -> char '>' MO_U_Lt _ -> char '<' + -- & Floating-point comparisons + MO_F_Eq _ -> ptext (sLit "==") + MO_F_Ne _ -> ptext (sLit "!=") + MO_F_Ge _ -> ptext (sLit ">=") + MO_F_Le _ -> ptext (sLit "<=") + MO_F_Gt _ -> char '>' + MO_F_Lt _ -> char '<' + -- Bitwise operations. Not all of these may be supported at all -- sizes, and only integral MachReps are valid. MO_And _ -> char '&' @@ -540,29 +562,31 @@ pprMachOp_for_C mop = case mop of MO_U_Shr _ -> ptext (sLit ">>") -- unsigned shift right MO_S_Shr _ -> ptext (sLit ">>") -- signed shift right --- Conversions. Some of these will be NOPs. +-- Conversions. Some of these will be NOPs, but never those that convert +-- between ints and floats. -- Floating-point conversions use the signed variant. -- We won't know to generate (void*) casts here, but maybe from -- context elsewhere -- noop casts - MO_U_Conv I8 I8 -> empty - MO_U_Conv I16 I16 -> empty - MO_U_Conv I32 I32 -> empty - MO_U_Conv I64 I64 -> empty - MO_U_Conv I128 I128 -> empty - MO_S_Conv I8 I8 -> empty - MO_S_Conv I16 I16 -> empty - MO_S_Conv I32 I32 -> empty - MO_S_Conv I64 I64 -> empty - MO_S_Conv I128 I128 -> empty - - MO_U_Conv _from to -> parens (machRepCType to) - MO_S_Conv _from to -> parens (machRepSignedCType to) - - _ -> panic "PprC.pprMachOp_for_C: unknown machop" - -signedOp :: MachOp -> Bool + MO_UU_Conv from to | from == to -> empty + MO_UU_Conv _from to -> parens (machRep_U_CType to) + + MO_SS_Conv from to | from == to -> empty + MO_SS_Conv _from to -> parens (machRep_S_CType to) + + -- TEMPORARY: the old code didn't check this case, so let's leave it out + -- to facilitate comparisons against the old output code. + --MO_FF_Conv from to | from == to -> empty + MO_FF_Conv _from to -> parens (machRep_F_CType to) + + MO_SF_Conv _from to -> parens (machRep_F_CType to) + MO_FS_Conv _from to -> parens (machRep_S_CType to) + + _ -> pprTrace "offending mop" (ptext $ sLit $ show mop) $ + panic "PprC.pprMachOp_for_C: unknown machop" + +signedOp :: MachOp -> Bool -- Argument type(s) are signed ints signedOp (MO_S_Quot _) = True signedOp (MO_S_Rem _) = True signedOp (MO_S_Neg _) = True @@ -571,9 +595,19 @@ signedOp (MO_S_Le _) = True signedOp (MO_S_Gt _) = True signedOp (MO_S_Lt _) = True signedOp (MO_S_Shr _) = True -signedOp (MO_S_Conv _ _) = True +signedOp (MO_SS_Conv _ _) = True +signedOp (MO_SF_Conv _ _) = True signedOp _ = False +floatComparison :: MachOp -> Bool -- comparison between float args +floatComparison (MO_F_Eq _) = True +floatComparison (MO_F_Ne _) = True +floatComparison (MO_F_Ge _) = True +floatComparison (MO_F_Le _) = True +floatComparison (MO_F_Gt _) = True +floatComparison (MO_F_Lt _) = True +floatComparison _ = False + -- --------------------------------------------------------------------- -- tend to be implemented by foreign calls @@ -692,9 +726,13 @@ isFixedPtrReg (CmmLocal _) = False isFixedPtrReg (CmmGlobal r) = isFixedPtrGlobalReg r -- True if (pprAsPtrReg reg) will give an expression with type StgPtr +-- JD: THIS IS HORRIBLE AND SHOULD BE RENAMED, AT THE VERY LEAST. +-- THE GARBAGE WITH THE VNonGcPtr HELPS MATCH THE OLD CODE GENERATOR'S OUTPUT; +-- I'M NOT SURE IF IT SHOULD REALLY STAY THAT WAY. isPtrReg :: CmmReg -> Bool isPtrReg (CmmLocal _) = False -isPtrReg (CmmGlobal (VanillaReg n)) = True -- if we print via pprAsPtrReg +isPtrReg (CmmGlobal (VanillaReg n VGcPtr)) = True -- if we print via pprAsPtrReg +isPtrReg (CmmGlobal (VanillaReg n VNonGcPtr)) = False --if we print via pprAsPtrReg isPtrReg (CmmGlobal reg) = isFixedPtrGlobalReg reg -- True if this global reg has type StgPtr @@ -706,7 +744,7 @@ isFixedPtrGlobalReg SpLim = True isFixedPtrGlobalReg _ = False -- True if in C this register doesn't have the type given by --- (machRepCType (cmmRegRep reg)), so it has to be cast. +-- (machRepCType (cmmRegType reg)), so it has to be cast. isStrangeTypeReg :: CmmReg -> Bool isStrangeTypeReg (CmmLocal _) = False isStrangeTypeReg (CmmGlobal g) = isStrangeTypeGlobal g @@ -731,12 +769,16 @@ pprReg r = case r of CmmGlobal global -> pprGlobalReg global pprAsPtrReg :: CmmReg -> SDoc -pprAsPtrReg (CmmGlobal (VanillaReg n)) = char 'R' <> int n <> ptext (sLit ".p") +pprAsPtrReg (CmmGlobal (VanillaReg n gcp)) + = WARN( gcp /= VGcPtr, ppr n ) char 'R' <> int n <> ptext (sLit ".p") pprAsPtrReg other_reg = pprReg other_reg pprGlobalReg :: GlobalReg -> SDoc pprGlobalReg gr = case gr of - VanillaReg n -> char 'R' <> int n <> ptext (sLit ".w") + VanillaReg n _ -> char 'R' <> int n <> ptext (sLit ".w") + -- pprGlobalReg prints a VanillaReg as a .w regardless + -- Example: R1.w = R1.w & (-0x8UL); + -- JMP_(*R1.p); FloatReg n -> char 'F' <> int n DoubleReg n -> char 'D' <> int n LongReg n -> char 'L' <> int n @@ -753,12 +795,12 @@ pprGlobalReg gr = case gr of GCFun -> ptext (sLit "stg_gc_fun") pprLocalReg :: LocalReg -> SDoc -pprLocalReg (LocalReg uniq _ _) = char '_' <> ppr uniq +pprLocalReg (LocalReg uniq _) = char '_' <> ppr uniq -- ----------------------------------------------------------------------------- -- Foreign Calls -pprCall :: SDoc -> CCallConv -> CmmFormals -> CmmActuals -> CmmSafety +pprCall :: SDoc -> CCallConv -> HintedCmmFormals -> HintedCmmActuals -> CmmSafety -> SDoc pprCall ppr_fn cconv results args _ @@ -781,26 +823,27 @@ pprCall ppr_fn cconv results args _ ppr_assign results (ppr_fn <> parens (commafy (map pprArg args))) <> semi where ppr_assign [] rhs = rhs - ppr_assign [CmmKinded one hint] rhs + ppr_assign [CmmHinted one hint] rhs = pprLocalReg one <> ptext (sLit " = ") - <> pprUnHint hint (localRegRep one) <> rhs + <> pprUnHint hint (localRegType one) <> rhs ppr_assign _other _rhs = panic "pprCall: multiple results" - pprArg (CmmKinded expr hint) - | hint `elem` [PtrHint,SignedHint] - = cCast (machRepHintCType (cmmExprRep expr) hint) expr + pprArg (CmmHinted expr AddrHint) + = cCast (ptext (sLit "void *")) expr -- see comment by machRepHintCType below - pprArg (CmmKinded expr _other) - = pprExpr expr + pprArg (CmmHinted expr SignedHint) + = cCast (machRep_S_CType $ typeWidth $ cmmExprType expr) expr + pprArg (CmmHinted expr _other) + = pprExpr expr - pprUnHint PtrHint rep = parens (machRepCType rep) + pprUnHint AddrHint rep = parens (machRepCType rep) pprUnHint SignedHint rep = parens (machRepCType rep) pprUnHint _ _ = empty pprGlobalRegName :: GlobalReg -> SDoc pprGlobalRegName gr = case gr of - VanillaReg n -> char 'R' <> int n -- without the .w suffix - _ -> pprGlobalReg gr + VanillaReg n _ -> char 'R' <> int n -- without the .w suffix + _ -> pprGlobalReg gr -- Currently we only have these two calling conventions, but this might -- change in the future... @@ -823,7 +866,7 @@ pprDataExterns statics where (_, lbls) = runTE (mapM_ te_Static statics) pprTempDecl :: LocalReg -> SDoc -pprTempDecl l@(LocalReg _ rep _) +pprTempDecl l@(LocalReg _ rep) = hcat [ machRepCType rep, space, pprLocalReg l, semi ] pprExternDecl :: Bool -> CLabel -> SDoc @@ -832,11 +875,11 @@ pprExternDecl in_srt lbl | not (needsCDecl lbl) = empty | Just sz <- foreignLabelStdcallInfo lbl = stdcall_decl sz | otherwise = - hcat [ visibility, label_type (labelType lbl), + hcat [ visibility, label_type lbl, lparen, pprCLabel lbl, text ");" ] where - label_type CodeLabel = ptext (sLit "F_") - label_type DataLabel = ptext (sLit "I_") + label_type lbl | isCFunctionLabel lbl = ptext (sLit "F_") + | otherwise = ptext (sLit "I_") visibility | externallyVisibleCLabel lbl = char 'E' @@ -847,7 +890,7 @@ pprExternDecl in_srt lbl -- add the @n suffix to the label (#2276) stdcall_decl sz = ptext (sLit "extern __attribute__((stdcall)) void ") <> pprCLabel lbl - <> parens (commafy (replicate (sz `quot` wORD_SIZE) (machRepCType wordRep))) + <> parens (commafy (replicate (sz `quot` wORD_SIZE) (machRep_U_CType wordWidth))) <> semi type TEState = (UniqSet LocalReg, FiniteMap CLabel ()) @@ -882,8 +925,8 @@ te_Lit _ = return () te_Stmt :: CmmStmt -> TE () te_Stmt (CmmAssign r e) = te_Reg r >> te_Expr e te_Stmt (CmmStore l r) = te_Expr l >> te_Expr r -te_Stmt (CmmCall _ rs es _ _) = mapM_ (te_temp.kindlessCmm) rs >> - mapM_ (te_Expr.kindlessCmm) es +te_Stmt (CmmCall _ rs es _ _) = mapM_ (te_temp.hintlessCmm) rs >> + mapM_ (te_Expr.hintlessCmm) es te_Stmt (CmmCondBranch e _) = te_Expr e te_Stmt (CmmSwitch e _) = te_Expr e te_Stmt (CmmJump e _) = te_Expr e @@ -907,7 +950,7 @@ te_Reg _ = return () cCast :: SDoc -> CmmExpr -> SDoc cCast ty expr = parens ty <> pprExpr1 expr -cLoad :: CmmExpr -> MachRep -> SDoc +cLoad :: CmmExpr -> CmmType -> SDoc #ifdef BEWARE_LOAD_STORE_ALIGNMENT cLoad expr rep = let decl = machRepCType rep <+> ptext (sLit "x") <> semi @@ -919,41 +962,50 @@ cLoad expr rep = cLoad expr rep = char '*' <> parens (cCast (machRepPtrCType rep) expr) #endif +isCmmWordType :: CmmType -> Bool +-- True of GcPtrReg/NonGcReg of native word size +isCmmWordType ty = not (isFloatType ty) + && typeWidth ty == wordWidth + -- This is for finding the types of foreign call arguments. For a pointer -- argument, we always cast the argument to (void *), to avoid warnings from -- the C compiler. -machRepHintCType :: MachRep -> MachHint -> SDoc -machRepHintCType rep PtrHint = ptext (sLit "void *") -machRepHintCType rep SignedHint = machRepSignedCType rep +machRepHintCType :: CmmType -> ForeignHint -> SDoc +machRepHintCType rep AddrHint = ptext (sLit "void *") +machRepHintCType rep SignedHint = machRep_S_CType (typeWidth rep) machRepHintCType rep _other = machRepCType rep -machRepPtrCType :: MachRep -> SDoc -machRepPtrCType r | r == wordRep = ptext (sLit "P_") - | otherwise = machRepCType r <> char '*' - -machRepCType :: MachRep -> SDoc -machRepCType r | r == wordRep = ptext (sLit "W_") - | otherwise = sized_type - where sized_type = case r of - I8 -> ptext (sLit "StgWord8") - I16 -> ptext (sLit "StgWord16") - I32 -> ptext (sLit "StgWord32") - I64 -> ptext (sLit "StgWord64") - F32 -> ptext (sLit "StgFloat") -- ToDo: correct? - F64 -> ptext (sLit "StgDouble") - _ -> panic "machRepCType" - -machRepSignedCType :: MachRep -> SDoc -machRepSignedCType r | r == wordRep = ptext (sLit "I_") - | otherwise = sized_type - where sized_type = case r of - I8 -> ptext (sLit "StgInt8") - I16 -> ptext (sLit "StgInt16") - I32 -> ptext (sLit "StgInt32") - I64 -> ptext (sLit "StgInt64") - F32 -> ptext (sLit "StgFloat") -- ToDo: correct? - F64 -> ptext (sLit "StgDouble") - _ -> panic "machRepCType" +machRepPtrCType :: CmmType -> SDoc +machRepPtrCType r | isCmmWordType r = ptext (sLit "P_") + | otherwise = machRepCType r <> char '*' + +machRepCType :: CmmType -> SDoc +machRepCType ty | isFloatType ty = machRep_F_CType w + | otherwise = machRep_U_CType w + where + w = typeWidth ty + +machRep_F_CType :: Width -> SDoc +machRep_F_CType W32 = ptext (sLit "StgFloat") -- ToDo: correct? +machRep_F_CType W64 = ptext (sLit "StgDouble") +machRep_F_CType _ = panic "machRep_F_CType" + +machRep_U_CType :: Width -> SDoc +machRep_U_CType w | w == wordWidth = ptext (sLit "W_") +machRep_U_CType W8 = ptext (sLit "StgWord8") +machRep_U_CType W16 = ptext (sLit "StgWord16") +machRep_U_CType W32 = ptext (sLit "StgWord32") +machRep_U_CType W64 = ptext (sLit "StgWord64") +machRep_U_CType _ = panic "machRep_U_CType" + +machRep_S_CType :: Width -> SDoc +machRep_S_CType w | w == wordWidth = ptext (sLit "I_") +machRep_S_CType W8 = ptext (sLit "StgInt8") +machRep_S_CType W16 = ptext (sLit "StgInt16") +machRep_S_CType W32 = ptext (sLit "StgInt32") +machRep_S_CType W64 = ptext (sLit "StgInt64") +machRep_S_CType _ = panic "machRep_S_CType" + -- --------------------------------------------------------------------- -- print strings as valid C strings @@ -982,8 +1034,8 @@ charToC w = -- can safely initialise to static locations. big_doubles - | machRepByteWidth F64 == 2 * wORD_SIZE = True - | machRepByteWidth F64 == wORD_SIZE = False + | widthInBytes W64 == 2 * wORD_SIZE = True + | widthInBytes W64 == wORD_SIZE = False | otherwise = panic "big_doubles" castFloatToIntArray :: STUArray s Int Float -> ST s (STUArray s Int Int) @@ -1000,7 +1052,7 @@ floatToWord r writeArray arr 0 (fromRational r) arr' <- castFloatToIntArray arr i <- readArray arr' 0 - return (CmmInt (toInteger i) wordRep) + return (CmmInt (toInteger i) wordWidth) ) doubleToWords :: Rational -> [CmmLit] @@ -1012,8 +1064,8 @@ doubleToWords r arr' <- castDoubleToIntArray arr i1 <- readArray arr' 0 i2 <- readArray arr' 1 - return [ CmmInt (toInteger i1) wordRep - , CmmInt (toInteger i2) wordRep + return [ CmmInt (toInteger i1) wordWidth + , CmmInt (toInteger i2) wordWidth ] ) | otherwise -- doubles are 1 word @@ -1022,20 +1074,20 @@ doubleToWords r writeArray arr 0 (fromRational r) arr' <- castDoubleToIntArray arr i <- readArray arr' 0 - return [ CmmInt (toInteger i) wordRep ] + return [ CmmInt (toInteger i) wordWidth ] ) -- --------------------------------------------------------------------------- -- Utils wordShift :: Int -wordShift = machRepLogWidth wordRep +wordShift = widthInLog wordWidth commafy :: [SDoc] -> SDoc commafy xs = hsep $ punctuate comma xs -- Print in C hex format: 0x13fa -pprHexVal :: Integer -> MachRep -> SDoc +pprHexVal :: Integer -> Width -> SDoc pprHexVal 0 _ = ptext (sLit "0x0") pprHexVal w rep | w < 0 = parens (char '-' <> ptext (sLit "0x") <> go (-w) <> repsuffix rep) @@ -1048,9 +1100,9 @@ pprHexVal w rep -- warnings about integer overflow from gcc. -- on 32-bit platforms, add "ULL" to 64-bit literals - repsuffix I64 | wORD_SIZE == 4 = ptext (sLit "ULL") + repsuffix W64 | wORD_SIZE == 4 = ptext (sLit "ULL") -- on 64-bit platforms with 32-bit int, add "L" to 64-bit literals - repsuffix I64 | cINT_SIZE == 4 = ptext (sLit "UL") + repsuffix W64 | cINT_SIZE == 4 = ptext (sLit "UL") repsuffix _ = char 'U' go 0 = empty diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index e801aee..4478dfd 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -33,16 +33,17 @@ -- module PprCmm - ( writeCmms, pprCmms, pprCmm, pprStmt, pprExpr, pprSection, pprStatic, pprLit + ( writeCmms, pprCmms, pprCmm, pprStmt, pprExpr, + pprSection, pprStatic, pprLit ) where import BlockId import Cmm import CmmUtils -import MachOp import CLabel + import ForeignCall import Unique import Outputable @@ -52,6 +53,12 @@ import Data.List import System.IO import Data.Maybe +-- Temp Jan08 +import SMRep +import ClosureInfo +#include "../includes/StgFun.h" + + pprCmms :: (Outputable info, Outputable g) => [GenCmm CmmStatic info g] -> SDoc pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms)) where @@ -167,11 +174,13 @@ pprTypeInfo (ConstrInfo layout constr descr) = ptext (sLit "nptrs: ") <> integer (toInteger (snd layout)), ptext (sLit "constructor: ") <> integer (toInteger constr), pprLit descr] -pprTypeInfo (FunInfo layout srt fun_type arity _args slow_entry) = +pprTypeInfo (FunInfo layout srt arity _args slow_entry) = vcat [ptext (sLit "ptrs: ") <> integer (toInteger (fst layout)), ptext (sLit "nptrs: ") <> integer (toInteger (snd layout)), ptext (sLit "srt: ") <> ppr srt, - ptext (sLit "fun_type: ") <> integer (toInteger fun_type), +-- Temp Jan08 + ptext (sLit ("fun_type: ")) <> integer (toInteger (argDescrType _args)), + ptext (sLit "arity: ") <> integer (toInteger arity), --ptext (sLit "args: ") <> ppr args, -- TODO: needs to be printed ptext (sLit "slow: ") <> pprLit slow_entry @@ -187,6 +196,20 @@ pprTypeInfo (ContInfo stack srt) = vcat [ptext (sLit "stack: ") <> ppr stack, ptext (sLit "srt: ") <> ppr srt] +-- Temp Jan08 +argDescrType :: ArgDescr -> StgHalfWord +-- The "argument type" RTS field type +argDescrType (ArgSpec n) = n +argDescrType (ArgGen liveness) + | isBigLiveness liveness = ARG_GEN_BIG + | otherwise = ARG_GEN + +-- Temp Jan08 +isBigLiveness :: Liveness -> Bool +isBigLiveness (BigLiveness _) = True +isBigLiveness (SmallLiveness _) = False + + pprUpdateFrame :: UpdateFrame -> SDoc pprUpdateFrame (UpdateFrame expr args) = hcat [ ptext (sLit "jump") @@ -225,39 +248,28 @@ pprStmt stmt = case stmt of -- rep[lv] = expr; CmmStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi where - rep = ppr ( cmmExprRep expr ) + rep = ppr ( cmmExprType expr ) -- call "ccall" foo(x, y)[r1, r2]; -- ToDo ppr volatile CmmCall (CmmCallee fn cconv) results args safety ret -> - hcat [ if null results - then empty - else parens (commafy $ map ppr results) <> - ptext (sLit " = "), - ptext (sLit "foreign"), space, - doubleQuotes(ppr cconv), space, - target fn, parens ( commafy $ map ppr args ), - brackets (ppr safety), - case ret of CmmMayReturn -> empty - CmmNeverReturns -> ptext (sLit " never returns"), - semi ] + sep [ pp_lhs <+> pp_conv + , nest 2 (pprExpr9 fn <> + parens (commafy (map ppr_ar args))) + <> brackets (ppr safety) + , case ret of CmmMayReturn -> empty + CmmNeverReturns -> ptext $ sLit (" never returns") + ] <> semi where - ---- With the following three functions, I was going somewhere - ---- useful, but I don't remember where. Probably making - ---- emitted Cmm output look better. ---NR, 2 May 2008 - _pp_lhs | null results = empty - | otherwise = commafy (map ppr_ar results) <+> equals + pp_lhs | null results = empty + | otherwise = commafy (map ppr_ar results) <+> equals -- Don't print the hints on a native C-- call - ppr_ar arg = case cconv of - CmmCallConv -> ppr (kindlessCmm arg) - _ -> doubleQuotes (ppr $ cmmKind arg) <+> - ppr (kindlessCmm arg) - _pp_conv = case cconv of + ppr_ar (CmmHinted ar k) = case cconv of + CmmCallConv -> ppr ar + _ -> ppr (ar,k) + pp_conv = case cconv of CmmCallConv -> empty - _ -> ptext (sLit "foreign") <+> doubleQuotes (ppr cconv) - - target (CmmLit lit) = pprLit lit - target fn' = parens (ppr fn') + _ -> ptext (sLit("foreign")) <+> doubleQuotes (ppr cconv) CmmCall (CmmPrim op) results args safety ret -> pprStmt (CmmCall (CmmCallee (CmmLit lbl) CCallConv) @@ -271,6 +283,18 @@ pprStmt stmt = case stmt of CmmReturn params -> genReturn params CmmSwitch arg ids -> genSwitch arg ids +instance Outputable ForeignHint where + ppr NoHint = empty + ppr SignedHint = quotes(text "signed") +-- ppr AddrHint = quotes(text "address") +-- Temp Jan08 + ppr AddrHint = (text "PtrHint") + +-- Just look like a tuple, since it was a tuple before +-- ... is that a good idea? --Isaac Dupree +instance (Outputable a) => Outputable (CmmHinted a) where + ppr (CmmHinted a k) = ppr (a, k) + -- -------------------------------------------------------------------------- -- goto local label. [1], section 6.6 -- @@ -297,9 +321,8 @@ genCondBranch expr ident = -- -- jump foo(a, b, c); -- -genJump :: CmmExpr -> [CmmKinded CmmExpr] -> SDoc +genJump :: CmmExpr -> [CmmHinted CmmExpr] -> SDoc genJump expr args = - hcat [ ptext (sLit "jump") , space , if isTrivialCmmExpr expr @@ -308,23 +331,17 @@ genJump expr args = CmmLoad (CmmReg _) _ -> pprExpr expr _ -> parens (pprExpr expr) , space - , parens ( commafy $ map pprKinded args ) + , parens ( commafy $ map ppr args ) , semi ] -pprKinded :: Outputable a => (CmmKinded a) -> SDoc -pprKinded (CmmKinded a NoHint) = ppr a -pprKinded (CmmKinded a PtrHint) = quotes(text "address") <+> ppr a -pprKinded (CmmKinded a SignedHint) = quotes(text "signed") <+> ppr a -pprKinded (CmmKinded a FloatHint) = quotes(text "float") <+> ppr a -- -------------------------------------------------------------------------- -- Return from a function. [1], Section 6.8.2 of version 1.128 -- -- return (a, b, c); -- -genReturn :: [CmmKinded CmmExpr] -> SDoc +genReturn :: [CmmHinted CmmExpr] -> SDoc genReturn args = - hcat [ ptext (sLit "return") , space , parens ( commafy $ map ppr args ) @@ -376,7 +393,7 @@ pprExpr e CmmRegOff reg i -> pprExpr (CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral i) rep)]) - where rep = cmmRegRep reg + where rep = typeWidth (cmmRegType reg) CmmLit lit -> pprLit lit _other -> pprExpr1 e @@ -488,7 +505,7 @@ pprLit :: CmmLit -> SDoc pprLit lit = case lit of CmmInt i rep -> hcat [ (if i < 0 then parens else id)(integer i) - , (if rep == wordRep + , (if rep == wordWidth then empty else space <> dcolon <+> ppr rep) ] @@ -534,27 +551,37 @@ pprReg r -- We only print the type of the local reg if it isn't wordRep -- pprLocalReg :: LocalReg -> SDoc -pprLocalReg (LocalReg uniq rep follow) - = hcat [ char '_', ppr uniq, ty ] where - ty = if rep == wordRep && follow == GCKindNonPtr - then empty - else dcolon <> ptr <> ppr rep - ptr = if follow == GCKindNonPtr - then empty - else doubleQuotes (text "ptr") +pprLocalReg (LocalReg uniq rep) +-- = ppr rep <> char '_' <> ppr uniq +-- Temp Jan08 + = char '_' <> ppr uniq <> + (if isWord32 rep -- && not (isGcPtrType rep) -- Temp Jan08 -- sigh + then dcolon <> ptr <> ppr rep + else dcolon <> ptr <> ppr rep) + where + ptr = empty + --if isGcPtrType rep + -- then doubleQuotes (text "ptr") + -- else empty -- Stack areas pprArea :: Area -> SDoc -pprArea (RegSlot r) = hcat [ text "slot<", ppr r, text ">" ] -pprArea (CallArea id n n') = - hcat [ text "callslot<", ppr id, char '+', ppr n, char '/', ppr n', text ">" ] +pprArea (RegSlot r) = hcat [ text "slot<", ppr r, text ">" ] +pprArea (CallArea id) = pprAreaId id + +pprAreaId :: AreaId -> SDoc +pprAreaId Old = text "old" +pprAreaId (Young id) = hcat [ text "young<", ppr id, text ">" ] -- needs to be kept in syn with Cmm.hs.GlobalReg -- pprGlobalReg :: GlobalReg -> SDoc pprGlobalReg gr = case gr of - VanillaReg n -> char 'R' <> int n + VanillaReg n _ -> char 'R' <> int n +-- Temp Jan08 +-- VanillaReg n VNonGcPtr -> char 'R' <> int n +-- VanillaReg n VGcPtr -> char 'P' <> int n FloatReg n -> char 'F' <> int n DoubleReg n -> char 'D' <> int n LongReg n -> char 'L' <> int n diff --git a/compiler/cmm/PprCmmZ.hs b/compiler/cmm/PprCmmZ.hs index 1e5f52f..c588466 100644 --- a/compiler/cmm/PprCmmZ.hs +++ b/compiler/cmm/PprCmmZ.hs @@ -7,7 +7,6 @@ where import BlockId import Cmm import CmmExpr -import ForeignCall import PprCmm import Outputable import qualified ZipCfgCmmRep as G @@ -28,34 +27,28 @@ pprCmmGraphLikeCmm g = vcat (swallow blocks) where blocks = Z.postorder_dfs g swallow :: [G.CmmBlock] -> [SDoc] swallow [] = [] - swallow (Z.Block id t : rest) = tail id [] Nothing t rest - tail id prev' out (Z.ZTail (G.CopyOut conv args) t) rest = - if isJust out then panic "multiple CopyOut nodes in one basic block" - else - tail id (prev') (Just (conv, args)) t rest + swallow (Z.Block id off t : rest) = tail (id, off) [] Nothing t rest tail id prev' out (Z.ZTail m t) rest = tail id (mid m : prev') out t rest - tail id prev' out (Z.ZLast Z.LastExit) rest = exit id prev' out rest tail id prev' out (Z.ZLast (Z.LastOther l)) rest = last id prev' out l rest - mid (G.CopyIn _ [] _) = text "// proc point (no parameters)" - mid m@(G.CopyIn {}) = ppr m <+> text "(proc point)" + tail id prev' _ (Z.ZLast Z.LastExit) rest = exit id prev' rest mid m = ppr m - block' id prev' + block' (id, off) prev' | id == Z.lg_entry g, entry_has_no_pred = - vcat (text "" : reverse prev') - | otherwise = hang (ppr id <> colon) 4 (vcat (reverse prev')) + vcat (text "" <> parens (ppr off) : reverse prev') + | otherwise = hang (ppr id <> parens (ppr off) <> colon) 4 (vcat (reverse prev')) last id prev' out l n = let endblock stmt = block' id (stmt : prev') : swallow n in case l of G.LastBranch tgt -> case n of - Z.Block id' t : bs + Z.Block id' _ t : bs | tgt == id', unique_pred id' -> tail id prev' out t bs -- optimize out redundant labels _ -> endblock (ppr $ CmmBranch tgt) l@(G.LastCondBranch expr tid fid) -> let ft id = text "// fall through to " <> ppr id in case n of - Z.Block id' t : bs + Z.Block id' _ t : bs | id' == fid, isNothing out -> tail id (ft fid : ppr (CmmCondBranch expr tid) : prev') Nothing t bs | id' == tid, Just e' <- maybeInvertCmmExpr expr, isNothing out-> @@ -64,35 +57,10 @@ pprCmmGraphLikeCmm g = vcat (swallow blocks) l@(G.LastJump {}) -> endblock $ with_out out l l@(G.LastReturn {}) -> endblock $ with_out out l l@(G.LastSwitch {}) -> endblock $ with_out out l - l@(G.LastCall _ Nothing) -> endblock $ with_out out l - l@(G.LastCall tgt (Just k)) - | Z.Block id' (Z.ZTail (G.CopyIn _ ress srt) t) : bs <- n, - Just (conv, args) <- out, - id' == k -> - let call = CmmCall tgt' ress args (CmmSafe srt) CmmMayReturn - tgt' = CmmCallee tgt (cconv_of_conv conv) - ppcall = ppr call <+> parens (text "ret to" <+> ppr k) - in if unique_pred k then - tail id (ppcall : prev') Nothing t bs - else - endblock (ppcall) - | Z.Block id' t : bs <- n, id' == k, unique_pred k, - Just (conv, args) <- out, - Just (ress, srt) <- findCopyIn t -> - let call = CmmCall tgt' ress args (CmmSafe srt) CmmMayReturn - tgt' = CmmCallee tgt (cconv_of_conv conv) - delayed = - ptext (sLit "// delayed CopyIn follows previous call") - in tail id (delayed : ppr call : prev') Nothing t bs - | otherwise -> endblock $ with_out out l - findCopyIn (Z.ZTail (G.CopyIn _ ress srt) _) = Just (ress, srt) - findCopyIn (Z.ZTail _ t) = findCopyIn t - findCopyIn (Z.ZLast _) = Nothing - exit id prev' out n = -- highly irregular (assertion violation?) + l@(G.LastCall _ _ _)-> endblock $ with_out out l + exit id prev' n = -- highly irregular (assertion violation?) let endblock stmt = block' id (stmt : prev') : swallow n in - case out of Nothing -> endblock (text "// ") - Just (conv, args) -> endblock (ppr (G.CopyOut conv args) $$ - text "// ") + endblock (text "// ") preds = zipPreds g entry_has_no_pred = case lookupBlockEnv preds (Z.lg_entry g) of Nothing -> True @@ -107,13 +75,11 @@ pprCmmGraphLikeCmm g = vcat (swallow blocks) else single in Z.fold_blocks add emptyBlockSet g unique_pred id = elemBlockSet id single_preds - cconv_of_conv (G.ConventionStandard conv _) = conv - cconv_of_conv (G.ConventionPrivate {}) = CmmCallConv -- XXX totally bogus with_out :: Maybe (G.Convention, CmmActuals) -> G.Last -> SDoc with_out Nothing l = ptext (sLit "??no-arguments??") <+> ppr l with_out (Just (conv, args)) l = last l - where last (G.LastCall e k) = + where last (G.LastCall e k _) = hcat [ptext (sLit "... = foreign "), doubleQuotes(ppr conv), space, ppr_target e, parens ( commafy $ map ppr args ), @@ -121,9 +87,13 @@ with_out (Just (conv, args)) l = last l case k of Nothing -> ptext (sLit " never returns") Just _ -> empty, semi ] - last (G.LastReturn) = ppr (CmmReturn args) - last (G.LastJump e) = ppr (CmmJump e args) - last l = ppr (G.CopyOut conv args) $$ ppr l + last (G.LastReturn _) = ppr (CmmReturn $ noHints args) + last (G.LastJump e _) = ppr (CmmJump e $ noHints args) + last l = ppr l ppr_target (CmmLit lit) = pprLit lit ppr_target fn' = parens (ppr fn') commafy xs = hsep $ punctuate comma xs + +-- Anything that uses this is bogus! +noHints :: [a] -> [CmmHinted a] +noHints = map (\v -> CmmHinted v NoHint) diff --git a/compiler/cmm/README b/compiler/cmm/README index c0d1c68..fd87e88 100644 --- a/compiler/cmm/README +++ b/compiler/cmm/README @@ -45,7 +45,6 @@ Sketch of the new arrivals: DFMonad Support for dataflow analysis and dataflow-based transformation. This module needs work. Includes DataflowLattice - for tracking dataflow facts (good) - DFA - monad for iterative dataflow analysis (OK) DFM - monad for iterative dataflow analysis and rewriting (OK) DFTx - monad to track Whalley/Davidson transactions (ugly) type class DataflowAnalysis - operations common to DFA, DFM @@ -89,8 +88,6 @@ Sketch of the new arrivals: CmmContFlowOpt Branch-chain elimination and elimination of unreachable code. - CmmCvt Conversion to and from the new format. - CmmOpt Changed optimization to use 'foldRegsUsed'; eliminated significant duplication of code. diff --git a/compiler/cmm/StackColor.hs b/compiler/cmm/StackColor.hs index 4d544bd..f3c1c32 100644 --- a/compiler/cmm/StackColor.hs +++ b/compiler/cmm/StackColor.hs @@ -8,7 +8,6 @@ import CmmExpr import CmmSpillReload import DFMonad import qualified GraphOps -import MachOp import ZipCfg import ZipCfgCmmRep import ZipDataflow @@ -19,8 +18,6 @@ import UniqSet import Data.List -type M = ExtendWithSpills Middle - fold_edge_facts_b :: LastNode l => (DualLive -> a -> a) -> BackwardTransfers m l DualLive -> LGraph m l -> (BlockId -> DualLive) -> a -> a @@ -32,12 +29,12 @@ fold_edge_facts_b f comp graph env z = last_in _ LastExit = fact_bot dualLiveLattice last_in env (LastOther l) = bt_last_in comp env l in head_fold h (last_in env l) z - head_fold (ZHead h m) out z = head_fold h (bt_middle_in comp out m) (f out z) - head_fold (ZFirst id) out z = f (bt_first_in comp out id) (f out z) + head_fold (ZHead h m) out z = head_fold h (bt_middle_in comp out m) (f out z) + head_fold (ZFirst id _) out z = f (bt_first_in comp out id) (f out z) -foldConflicts :: (RegSet -> a -> a) -> a -> LGraph M Last -> FuelMonad a +foldConflicts :: (RegSet -> a -> a) -> a -> LGraph Middle Last -> FuelMonad a foldConflicts f z g = - do env <- dualLiveness emptyBlockSet $ graphOfLGraph g + do env <- dualLiveness emptyBlockSet g let lookup id = lookupBlockEnv env id `orElse` fact_bot dualLiveLattice f' dual z = f (on_stack dual) z return $ fold_edge_facts_b f' (dualLiveTransfers emptyBlockSet) g lookup z @@ -50,7 +47,7 @@ foldConflicts f z g = type IGraph = Color.Graph LocalReg SlotClass StackPlacement type ClassCount = [(SlotClass, Int)] -buildIGraphAndCounts :: LGraph M Last -> FuelMonad (IGraph, ClassCount) +buildIGraphAndCounts :: LGraph Middle Last -> FuelMonad (IGraph, ClassCount) buildIGraphAndCounts g = igraph_and_counts where igraph_and_counts = foldConflicts add (Color.initGraph, zero) g zero = map (\c -> (c, 0)) allSlotClasses @@ -73,16 +70,14 @@ graphAddConflictSet :: RegSet -> IGraph -> IGraph graphAddConflictSet set graph = GraphOps.addConflicts set slotClass graph slotClass :: LocalReg -> SlotClass -slotClass (LocalReg _ machRep _) = - case machRep of -- the horror, the horror - I8 -> SlotClass32 - I16 -> SlotClass32 - I32 -> SlotClass32 - I64 -> SlotClass64 - I128 -> SlotClass128 - F32 -> SlotClass32 - F64 -> SlotClass64 - F80 -> SlotClass64 +slotClass (LocalReg _ ty) = + case typeWidth ty of -- the horror, the horror + W8 -> SlotClass32 + W16 -> SlotClass32 + W32 -> SlotClass32 + W64 -> SlotClass64 + W128 -> SlotClass128 + W80 -> SlotClass64 {- colorMe :: (IGraph, ClassCount) -> (IGraph, UniqSet LocalReg) diff --git a/compiler/cmm/ZipCfg.hs b/compiler/cmm/ZipCfg.hs index 78eeaed..634bc8c 100644 --- a/compiler/cmm/ZipCfg.hs +++ b/compiler/cmm/ZipCfg.hs @@ -12,7 +12,7 @@ module ZipCfg , splice_tail, splice_head, splice_head_only', splice_head' , of_block_list, to_block_list , graphOfLGraph - , map_blocks, map_nodes, mapM_blocks + , map_blocks, map_one_block, map_nodes, mapM_blocks , postorder_dfs, postorder_dfs_from, postorder_dfs_from_except , fold_layout , fold_blocks, fold_fwd_block @@ -38,7 +38,8 @@ where import BlockId ( BlockId, BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet) -import CmmExpr ( UserOfLocalRegs(..) ) --for an instance +import CmmExpr ( UserOfLocalRegs(..) ) +import PprCmm() import Outputable hiding (empty) import Panic @@ -150,18 +151,22 @@ instance UserOfLocalRegs a => UserOfLocalRegs (ZLast a) where foldRegsUsed _f z LastExit = z -data ZHead m = ZFirst BlockId | ZHead (ZHead m) m +data ZHead m = ZFirst BlockId (Maybe Int) + | ZHead (ZHead m) m -- ZHead is a (reversed) sequence of middle nodes labeled by a BlockId data ZTail m l = ZLast (ZLast l) | ZTail m (ZTail m l) -- ZTail is a sequence of middle nodes followed by a last node -- | Blocks and flow graphs; see Note [Kinds of graphs] -data Block m l = Block BlockId (ZTail m l) +-- In addition to its id, the block carries the number of bytes of stack space +-- used for incoming parameters on entry to the block. +data Block m l = Block BlockId (Maybe Int) (ZTail m l) data Graph m l = Graph { g_entry :: (ZTail m l), g_blocks :: (BlockEnv (Block m l)) } -data LGraph m l = LGraph { lg_entry :: BlockId - , lg_blocks :: BlockEnv (Block m l) } +data LGraph m l = LGraph { lg_entry :: BlockId + , lg_argoffset :: Int -- space (bytes) for incoming args + , lg_blocks :: BlockEnv (Block m l)} -- Invariant: lg_entry is in domain( lg_blocks ) -- | And now the zipper. The focus is between the head and tail. @@ -220,13 +225,13 @@ ht_to_last :: ZHead m -> ZTail m l -> (ZHead m, ZLast l) -- , (???, [, -- N: y:=x; return (y,x)]) -splice_head :: ZHead m -> LGraph m l -> (LGraph m l, ZHead m) -splice_head' :: ZHead m -> Graph m l -> (BlockEnv (Block m l), ZHead m) +splice_head :: ZHead m -> LGraph m l -> (LGraph m l, ZHead m) +splice_head' :: ZHead m -> Graph m l -> (BlockEnv (Block m l), ZHead m) splice_tail :: Graph m l -> ZTail m l -> Graph m l -- | We can also splice a single-entry, no-exit Graph into a head. -splice_head_only :: ZHead m -> LGraph m l -> LGraph m l -splice_head_only' :: ZHead m -> Graph m l -> LGraph m l +splice_head_only :: ZHead m -> LGraph m l -> LGraph m l +splice_head_only' :: ZHead m -> Graph m l -> LGraph m l -- | A safe operation @@ -235,12 +240,12 @@ splice_head_only' :: ZHead m -> Graph m l -> LGraph m l -- layout or dataflow, however, one will want to use 'postorder_dfs' -- in order to get the blocks in an order that relates to the control -- flow in the procedure. -of_block_list :: BlockId -> [Block m l] -> LGraph m l -- N log N +of_block_list :: BlockId -> Int -> [Block m l] -> LGraph m l -- N log N to_block_list :: LGraph m l -> [Block m l] -- N log N -- | Conversion from LGraph to Graph graphOfLGraph :: LastNode l => LGraph m l -> Graph m l -graphOfLGraph (LGraph eid blocks) = Graph (ZLast $ mkBranchNode eid) blocks +graphOfLGraph (LGraph eid _ blocks) = Graph (ZLast $ mkBranchNode eid) blocks -- | Traversal: 'postorder_dfs' returns a list of blocks reachable @@ -282,6 +287,8 @@ fold_blocks :: (Block m l -> a -> a) -> a -> LGraph m l -> a fold_fwd_block :: (BlockId -> a -> a) -> (m -> a -> a) -> (ZLast l -> a -> a) -> Block m l -> a -> a +map_one_block :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> Block m l -> Block m' l' + map_nodes :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> LGraph m l -> LGraph m' l' -- mapping includes the entry id! @@ -350,14 +357,14 @@ instance LastNode l => HavingSuccessors (ZTail m l) where ----- block manipulations -blockId (Block id _) = id +blockId (Block id _ _) = id -- | Convert block between forms. -- These functions are tail-recursive, so we can go as deep as we like -- without fear of stack overflow. ht_to_block head tail = case head of - ZFirst id -> Block id tail + ZFirst id off -> Block id off tail ZHead h m -> ht_to_block h (ZTail m tail) ht_to_last head (ZLast l) = (head, l) @@ -367,11 +374,11 @@ zipht h t = ht_to_block h t zip (ZBlock h t) = ht_to_block h t goto_end (ZBlock h t) = ht_to_last h t -unzip (Block id t) = ZBlock (ZFirst id) t +unzip (Block id off t) = ZBlock (ZFirst id off) t head_id :: ZHead m -> BlockId -head_id (ZFirst id) = id -head_id (ZHead h _) = head_id h +head_id (ZFirst id _) = id +head_id (ZHead h _) = head_id h last (ZBlock _ t) = lastTail t @@ -385,13 +392,13 @@ tailOfLast l = ZLast (LastOther l) -- tedious to write in every client ------------------ simple graph manipulations focus :: BlockId -> LGraph m l -> FGraph m l -- focus on edge out of node with id -focus id (LGraph entry blocks) = +focus id (LGraph entry _ blocks) = case lookupBlockEnv blocks id of Just b -> FGraph entry (unzip b) (delFromUFM blocks id) Nothing -> panic "asked for nonexistent block in flow graph" entry :: LGraph m l -> FGraph m l -- focus on edge out of entry node -entry g@(LGraph eid _) = focus eid g +entry g@(LGraph eid _ _) = focus eid g -- | pull out a block satisfying the predicate, if any splitp_blocks :: (Block m l -> Bool) -> BlockEnv (Block m l) -> @@ -452,7 +459,7 @@ single_exitg (Graph tail blocks) = foldUFM add (exit_count (lastTail tail)) bloc -- Better to geot [A,B,C,D] -postorder_dfs g@(LGraph _ blockenv) = +postorder_dfs g@(LGraph _ _ blockenv) = let FGraph id eblock _ = entry g in zip eblock : postorder_dfs_from_except blockenv eblock (unitUniqSet id) @@ -463,7 +470,7 @@ postorder_dfs_from_except blocks b visited = where -- vnode :: -- Block m l -> ([Block m l] -> BlockSet -> a) -> [Block m l] -> BlockSet -> a - vnode block@(Block id _) cont acc visited = + vnode block@(Block id _ _) cont acc visited = if elemBlockSet id visited then cont acc visited else @@ -489,42 +496,42 @@ postorder_dfs_from blocks b = postorder_dfs_from_except blocks b emptyBlockSet -- 'b1' what its inline successor is going to be, so that if 'b1' ends with -- 'goto b2', the goto can be omitted. -fold_layout f z g@(LGraph eid _) = fold (postorder_dfs g) z +fold_layout f z g@(LGraph eid _ _) = fold (postorder_dfs g) z where fold blocks z = case blocks of [] -> z [b] -> f b Nothing z b1 : b2 : bs -> fold (b2 : bs) (f b1 (nextlabel b2) z) - nextlabel (Block id _) = + nextlabel (Block id _ _) = if id == eid then panic "entry as successor" else Just id -- | The rest of the traversals are straightforward -map_blocks f (LGraph eid blocks) = LGraph eid (mapUFM f blocks) +map_blocks f (LGraph eid off blocks) = LGraph eid off (mapUFM f blocks) + +map_nodes idm middle last (LGraph eid off blocks) = + LGraph (idm eid) off (mapUFM (map_one_block idm middle last) blocks) -map_nodes idm middle last (LGraph eid blocks) = LGraph (idm eid) (mapUFM block blocks) - where block (Block id t) = Block (idm id) (tail t) - tail (ZTail m t) = ZTail (middle m) (tail t) +map_one_block idm middle last (Block id off t) = Block (idm id) off (tail t) + where tail (ZTail m t) = ZTail (middle m) (tail t) tail (ZLast LastExit) = ZLast LastExit tail (ZLast (LastOther l)) = ZLast (LastOther (last l)) -mapM_blocks f (LGraph eid blocks) = blocks' >>= return . LGraph eid +mapM_blocks f (LGraph eid off blocks) = blocks' >>= return . LGraph eid off where blocks' = foldUFM (\b mblocks -> do { blocks <- mblocks ; b <- f b ; return $ insertBlock b blocks }) (return emptyBlockEnv) blocks -fold_blocks f z (LGraph _ blocks) = foldUFM f z blocks -fold_fwd_block first middle last (Block id t) z = tail t (first id z) +fold_blocks f z (LGraph _ _ blocks) = foldUFM f z blocks +fold_fwd_block first middle last (Block id _ t) z = tail t (first id z) where tail (ZTail m t) z = tail t (middle m z) tail (ZLast l) z = last l z -of_block_list e blocks = LGraph e $ foldr insertBlock emptyBlockEnv blocks -to_block_list (LGraph _ blocks) = eltsUFM blocks - - +of_block_list e off blocks = LGraph e off $ foldr insertBlock emptyBlockEnv blocks +to_block_list (LGraph _ _ blocks) = eltsUFM blocks -- We want to be able to scrutinize a single-entry, single-exit 'LGraph' for @@ -568,15 +575,15 @@ prepare_for_splicing' (Graph etail gblocks) single multi = is_exit :: Block m l -> Bool is_exit b = case last (unzip b) of { LastExit -> True; _ -> False } -splice_head head g = +splice_head head g@(LGraph _ off _) = ASSERT (single_exit g) prepare_for_splicing g splice_one_block splice_many_blocks where eid = head_id head splice_one_block tail' = case ht_to_last head tail' of - (head, LastExit) -> (LGraph eid emptyBlockEnv, head) + (head, LastExit) -> (LGraph eid off emptyBlockEnv, head) _ -> panic "spliced LGraph without exit" splice_many_blocks entry exit others = - (LGraph eid (insertBlock (zipht head entry) others), exit) + (LGraph eid off (insertBlock (zipht head entry) others), exit) splice_head' head g = ASSERT (single_exitg g) prepare_for_splicing' g splice_one_block splice_many_blocks @@ -614,25 +621,27 @@ splice_tail g tail = splice_head_only head g = let FGraph eid gentry gblocks = entry g in case gentry of - ZBlock (ZFirst _) tail -> LGraph eid (insertBlock (zipht head tail) gblocks) + ZBlock (ZFirst _ _) tail -> + LGraph eid 0 (insertBlock (zipht head tail) gblocks) _ -> panic "entry not at start of block?!" splice_head_only' head (Graph tail gblocks) = let eblock = zipht head tail in - LGraph (blockId eblock) (insertBlock eblock gblocks) + LGraph (blockId eblock) 0 (insertBlock eblock gblocks) + -- the offset probably should never be used, but well, it's correct for this LGraph --- Translation -translate txm txl (LGraph eid blocks) = +translate txm txl (LGraph eid off blocks) = do blocks' <- foldUFM txblock (return emptyBlockEnv) blocks - return $ LGraph eid blocks' + return $ LGraph eid off blocks' where -- txblock :: -- Block m l -> tm (BlockEnv (Block m' l')) -> tm (BlockEnv (Block m' l')) - txblock (Block id t) expanded = + txblock (Block id boff t) expanded = do blocks' <- expanded - txtail (ZFirst id) t blocks' + txtail (ZFirst id boff) t blocks' -- txtail :: ZHead m' -> ZTail m l -> BlockEnv (Block m' l') -> -- tm (BlockEnv (Block m' l')) txtail h (ZTail m t) blocks' = @@ -675,10 +684,11 @@ pprLast LastExit = text "" pprLast (LastOther l) = ppr l pprBlock :: (Outputable m, Outputable l, LastNode l) => Block m l -> SDoc -pprBlock (Block id tail) = ppr id <> colon $$ ppr tail +pprBlock (Block id args tail) = ppr id <> parens (ppr args) <> colon $$ ppr tail pprLgraph :: (Outputable m, Outputable l, LastNode l) => LGraph m l -> SDoc -pprLgraph g = text "{" $$ nest 2 (vcat $ map ppr blocks) $$ text "}" +pprLgraph g = text "{" <> text "offset" <> parens (ppr $ lg_argoffset g) $$ + nest 2 (vcat $ map ppr blocks) $$ text "}" where blocks = postorder_dfs g pprGraph :: (Outputable m, Outputable l, LastNode l) => Graph m l -> SDoc diff --git a/compiler/cmm/ZipCfgCmmRep.hs b/compiler/cmm/ZipCfgCmmRep.hs index af62168..e030f4b 100644 --- a/compiler/cmm/ZipCfgCmmRep.hs +++ b/compiler/cmm/ZipCfgCmmRep.hs @@ -1,4 +1,4 @@ - + -- This module is pure representation and should be imported only by -- clients that need to manipulate representation and know what @@ -6,29 +6,32 @@ -- instead import MkZipCfgCmm. module ZipCfgCmmRep - ( CmmZ, CmmTopZ, CmmGraph, CmmBlock, CmmAGraph, Middle(..), Last(..), Convention(..) - , ValueDirection(..), CmmBackwardFixedPoint, CmmForwardFixedPoint - , insertBetween, pprCmmGraphLikeCmm + ( CmmZ, CmmTopZ, CmmGraph, CmmBlock, CmmAGraph + , Middle(..), Last(..), MidCallTarget(..) + , Convention(..), ForeignConvention(..) + , ValueDirection(..), ForeignHint(..) + , CmmBackwardFixedPoint, CmmForwardFixedPoint, pprHinted + , insertBetween, mapExpMiddle, mapExpLast, mapExpDeepMiddle, mapExpDeepLast + , foldExpMiddle, foldExpLast, foldExpDeepMiddle, foldExpDeepLast + , joinOuts ) where import BlockId import CmmExpr import Cmm ( GenCmm(..), GenCmmTop(..), CmmStatic, CmmInfo - , CmmCallTarget(..), CmmActuals, CmmFormals, CmmKinded(..) + , CallishMachOp(..), ForeignHint(..) + , CmmActuals, CmmFormals, CmmHinted(..) , CmmStmt(..) -- imported in order to call ppr on Switch and to -- implement pprCmmGraphLikeCmm - , CmmSafety(CmmSafe) -- for pprCmmGraphLikeCmm - , CmmReturnInfo(CmmMayReturn) -- for pprCmmGraphLikeCmm ) +import DFMonad import PprCmm() +import CmmTx import CLabel -import CmmZipUtil -import ClosureInfo import FastString import ForeignCall -import MachOp import qualified ZipCfg as Z import qualified ZipDataflow as DF import ZipCfg @@ -39,7 +42,7 @@ import Maybes import Monad import Outputable import Prelude hiding (zip, unzip, last) -import UniqSet +import qualified Data.List as L import UniqSupply ---------------------------------------------------------------------- @@ -59,36 +62,20 @@ data Middle | MidAssign CmmReg CmmExpr -- Assign to register | MidStore CmmExpr CmmExpr -- Assign to memory location. Size is - -- given by cmmExprRep of the rhs. + -- given by cmmExprType of the rhs. | MidUnsafeCall -- An "unsafe" foreign call; - CmmCallTarget -- just a fat machine instruction + MidCallTarget -- just a fat machine instructoin CmmFormals -- zero or more results CmmActuals -- zero or more arguments - | MidAddToContext -- push a frame on the stack; - -- I will return to this frame + | MidAddToContext -- Push a frame on the stack; + -- I will return to this frame CmmExpr -- The frame's return address; it must be -- preceded by an info table that describes the -- live variables. [CmmExpr] -- The frame's live variables, to go on the -- stack with the first one at the young end - - | CopyIn -- Move incoming parameters or results from conventional - -- locations to registers. Note [CopyIn invariant] - Convention - CmmFormals -- eventually [CmmKind] will be used only for foreign - -- calls and will migrate into 'Convention' (helping to - -- drain "the swamp"), leaving this as [LocalReg] - C_SRT -- Static things kept alive by this block - - | CopyOut Convention CmmActuals - -- Move outgoing parameters or results from registers to - -- conventional locations. Every 'LastReturn', - -- 'LastJump', or 'LastCall' must be dominated by a - -- matching 'CopyOut' in the same basic block. - -- As above, '[CmmKind]' will migrate into the foreign calling - -- convention, leaving the actuals as '[CmmExpr]'. deriving Eq data Last @@ -98,47 +85,54 @@ data Last cml_pred :: CmmExpr, cml_true, cml_false :: BlockId } - - | LastReturn -- Return from a function; values in a previous CopyOut node - - | LastJump CmmExpr -- Tail call to another procedure; args in a CopyOut node - - | LastCall { -- A call (native or safe foreign); args in CopyOut node - cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp! - cml_cont :: Maybe BlockId } -- BlockId of continuation, if call returns - | LastSwitch CmmExpr [Maybe BlockId] -- Table branch -- The scrutinee is zero-based; -- zero -> first block -- one -> second block etc -- Undefined outside range, and when there's a Nothing + | LastReturn Int -- Return from a function; values in previous copy middles + | LastJump CmmExpr Int -- Tail call to another procedure; args in a copy middles + | LastCall { -- A call (native or safe foreign); args in copy middles + cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp! + cml_cont :: Maybe BlockId,-- BlockId of continuation, if call returns + cml_args :: Int } -- liveness info for outgoing args + -- All the last nodes that pass arguments carry the size of the outgoing CallArea + +data MidCallTarget -- The target of a MidUnsafeCall + = ForeignTarget -- A foreign procedure + CmmExpr -- Its address + ForeignConvention -- Its calling convention + + | PrimTarget -- A possibly-side-effecting machine operation + CallishMachOp -- Which one + deriving Eq data Convention - = ConventionStandard CCallConv ValueDirection - | ConventionPrivate - -- Used for control transfers within a (pre-CPS) procedure All - -- jump sites known, never pushed on the stack (hence no SRT) - -- You can choose whatever calling convention you please - -- (provided you make sure all the call sites agree)! - -- This data type eventually to be extended to record the convention. - - deriving Eq + = Native -- Native C-- call/return + + | Foreign -- Foreign call/return + ForeignConvention + + | Private + -- Used for control transfers within a (pre-CPS) procedure All + -- jump sites known, never pushed on the stack (hence no SRT) + -- You can choose whatever calling convention you please + -- (provided you make sure all the call sites agree)! + -- This data type eventually to be extended to record the convention. + deriving( Eq ) + +data ForeignConvention + = ForeignConvention + CCallConv -- Which foreign-call convention + [ForeignHint] -- Extra info about the args + [ForeignHint] -- Extra info about the result + deriving Eq data ValueDirection = Arguments | Results -- Arguments go with procedure definitions, jumps, and arguments to calls -- Results go with returns and with results of calls. deriving Eq -{- -Note [CopyIn invariant] -~~~~~~~~~~~~~~~~~~~~~~~ -One might wish for CopyIn to be a First node, but in practice, the -possibility raises all sorts of hairy issues with graph splicing, -rewriting, and so on. In the end, NR finds it better to make the -placement of CopyIn a dynamic invariant; it should normally be the first -Middle node in the basic block in which it occurs. --} - ---------------------------------------------------------------------- ----- Splicing between blocks -- Given a middle node, a block, and a successor BlockId, @@ -151,41 +145,35 @@ Middle node in the basic block in which it occurs. -- a fresh basic block, enabling some common blockification. -- o For a conditional branch, switch statement, or call, we must insert -- a new basic block. --- o For a jump, or return, this operation is impossible. +-- o For a jump or return, this operation is impossible. insertBetween :: MonadUnique m => CmmBlock -> [Middle] -> BlockId -> m (CmmBlock, [CmmBlock]) insertBetween b ms succId = insert $ goto_end $ unzip b where insert (h, LastOther (LastBranch bid)) = if bid == succId then do (bid', bs) <- newBlocks - return (zipht h $ ZLast $ LastOther (LastBranch bid'), bs) - else panic "tried to insert between non-adjacent blocks" + return (zipht h (ZLast (LastOther (LastBranch bid'))), bs) + else panic "tried invalid block insertBetween" insert (h, LastOther (LastCondBranch c t f)) = do (t', tbs) <- if t == succId then newBlocks else return $ (t, []) (f', fbs) <- if f == succId then newBlocks else return $ (f, []) return (zipht h $ ZLast $ LastOther (LastCondBranch c t' f'), tbs ++ fbs) - insert (h, LastOther (LastCall e (Just k))) = - if k == succId then - do (id', bs) <- newBlocks - return (zipht h $ ZLast $ LastOther (LastCall e (Just id')), bs) - else panic "tried to insert between non-adjacent blocks" - insert (_, LastOther (LastCall _ Nothing)) = - panic "cannot insert after non-returning call" insert (h, LastOther (LastSwitch e ks)) = do (ids, bs) <- mapAndUnzipM mbNewBlocks ks return (zipht h $ ZLast $ LastOther (LastSwitch e ids), join bs) - insert (_, LastOther LastReturn) = panic "cannot insert after return" - insert (_, LastOther (LastJump _)) = panic "cannot insert after jump" + insert (_, LastOther (LastCall _ _ _)) = + panic "unimp: insertBetween after a call -- probably not a good idea" + insert (_, LastOther (LastReturn _)) = panic "cannot insert after return" + insert (_, LastOther (LastJump _ _)) = panic "cannot insert after jump" insert (_, LastExit) = panic "cannot insert after exit" newBlocks = do id <- liftM BlockId $ getUniqueM - return $ (id, [Block id $ + return $ (id, [Block id Nothing $ foldr ZTail (ZLast (LastOther (LastBranch succId))) ms]) mbNewBlocks (Just k) = if k == succId then liftM lift newBlocks else return (Just k, []) mbNewBlocks Nothing = return (Nothing, []) lift (id, bs) = (Just id, bs) - ---------------------------------------------------------------------- ----- Instance declarations for control flow @@ -201,20 +189,20 @@ instance LastNode Last where branchNodeTarget _ = panic "asked for target of non-branch" cmmSuccs :: Last -> [BlockId] -cmmSuccs (LastReturn {}) = [] -cmmSuccs (LastJump {}) = [] -cmmSuccs (LastBranch id) = [id] -cmmSuccs (LastCall _ (Just id)) = [id] -cmmSuccs (LastCall _ Nothing) = [] -cmmSuccs (LastCondBranch _ t f) = [f, t] -- meets layout constraint -cmmSuccs (LastSwitch _ edges) = catMaybes edges +cmmSuccs (LastReturn _) = [] +cmmSuccs (LastJump {}) = [] +cmmSuccs (LastBranch id) = [id] +cmmSuccs (LastCall _ (Just id) _) = [id] +cmmSuccs (LastCall _ Nothing _) = [] +cmmSuccs (LastCondBranch _ t f) = [f, t] -- meets layout constraint +cmmSuccs (LastSwitch _ edges) = catMaybes edges fold_cmm_succs :: (BlockId -> a -> a) -> Last -> a -> a -fold_cmm_succs _f (LastReturn {}) z = z +fold_cmm_succs _f (LastReturn _) z = z fold_cmm_succs _f (LastJump {}) z = z fold_cmm_succs f (LastBranch id) z = f id z -fold_cmm_succs f (LastCall _ (Just id)) z = f id z -fold_cmm_succs _f (LastCall _ Nothing) z = z +fold_cmm_succs f (LastCall _ (Just id) _) z = f id z +fold_cmm_succs _f (LastCall _ Nothing _) z = z fold_cmm_succs f (LastCondBranch _ te fe) z = f te (f fe z) fold_cmm_succs f (LastSwitch _ edges) z = foldl (flip f) z $ catMaybes edges @@ -223,43 +211,164 @@ fold_cmm_succs f (LastSwitch _ edges) z = foldl (flip f) z $ catMaybes edge instance UserOfLocalRegs Middle where foldRegsUsed f z m = middle m - where middle (MidComment {}) = z - middle (MidAssign _lhs expr) = fold f z expr - middle (MidStore addr rval) = fold f (fold f z addr) rval - middle (MidUnsafeCall tgt _ress args) = fold f (fold f z tgt) args - middle (MidAddToContext ra args) = fold f (fold f z ra) args - middle (CopyIn _ _formals _) = z - middle (CopyOut _ actuals) = fold f z actuals + where middle (MidComment {}) = z + middle (MidAssign _lhs expr) = fold f z expr + middle (MidStore addr rval) = fold f (fold f z addr) rval + middle (MidUnsafeCall tgt _ args) = fold f (fold f z tgt) args + middle (MidAddToContext ra args) = fold f (fold f z ra) args fold f z m = foldRegsUsed f z m -- avoid monomorphism restriction +instance UserOfLocalRegs MidCallTarget where + foldRegsUsed _f z (PrimTarget _) = z + foldRegsUsed f z (ForeignTarget e _) = foldRegsUsed f z e + +instance UserOfSlots MidCallTarget where + foldSlotsUsed _f z (PrimTarget _) = z + foldSlotsUsed f z (ForeignTarget e _) = foldSlotsUsed f z e + instance UserOfLocalRegs Last where foldRegsUsed f z l = last l - where last (LastReturn) = z - last (LastJump e) = foldRegsUsed f z e + where last (LastReturn _) = z + last (LastJump e _) = foldRegsUsed f z e last (LastBranch _id) = z - last (LastCall tgt _) = foldRegsUsed f z tgt + last (LastCall tgt _ _) = foldRegsUsed f z tgt last (LastCondBranch e _ _) = foldRegsUsed f z e last (LastSwitch e _tbl) = foldRegsUsed f z e instance DefinerOfLocalRegs Middle where foldRegsDefd f z m = middle m + where middle (MidComment {}) = z + middle (MidAssign _lhs _) = fold f z _lhs + middle (MidStore _ _) = z + middle (MidUnsafeCall _ _ _) = z + middle (MidAddToContext _ _) = z + fold f z m = foldRegsDefd f z m -- avoid monomorphism restriction + +instance DefinerOfLocalRegs Last where + foldRegsDefd _ z _ = z + + +---------------------------------------------------------------------- +----- Instance declarations for stack slot use + +instance UserOfSlots Middle where + foldSlotsUsed f z m = middle m + where middle (MidComment {}) = z + middle (MidAssign _lhs expr) = fold f z expr + middle (MidStore addr rval) = fold f (fold f z addr) rval + middle (MidUnsafeCall tgt _ress args) = fold f (fold f z tgt) args + middle (MidAddToContext ra args) = fold f (fold f z ra) args + fold f z e = foldSlotsUsed f z e -- avoid monomorphism restriction + +instance UserOfSlots Last where + foldSlotsUsed f z l = last l + where last (LastReturn _) = z + last (LastJump e _) = foldSlotsUsed f z e + last (LastBranch _id) = z + last (LastCall tgt _ _) = foldSlotsUsed f z tgt + last (LastCondBranch e _ _) = foldSlotsUsed f z e + last (LastSwitch e _tbl) = foldSlotsUsed f z e + +instance UserOfSlots l => UserOfSlots (ZLast l) where + foldSlotsUsed f z (LastOther l) = foldSlotsUsed f z l + foldSlotsUsed _ z LastExit = z + +instance DefinerOfSlots Middle where + foldSlotsDefd f z m = middle m where middle (MidComment {}) = z - middle (MidAssign _lhs _) = fold f z _lhs + middle (MidAssign _ _) = z + middle (MidStore (CmmStackSlot a i) e) = + f z (a, i, widthInBytes $ typeWidth $ cmmExprType e) middle (MidStore _ _) = z middle (MidUnsafeCall _ _ _) = z middle (MidAddToContext _ _) = z - middle (CopyIn _ _formals _) = fold f z _formals - middle (CopyOut _ _) = z - fold f z m = foldRegsDefd f z m -- avoid monomorphism restriction -instance DefinerOfLocalRegs Last where - foldRegsDefd _ z l = last l - where last (LastReturn) = z - last (LastJump _) = z - last (LastBranch _) = z - last (LastCall _ _) = z - last (LastCondBranch _ _ _) = z - last (LastSwitch _ _) = z +instance DefinerOfSlots Last where + foldSlotsDefd _ z _ = z + +instance DefinerOfSlots l => DefinerOfSlots (ZLast l) where + foldSlotsDefd f z (LastOther l) = foldSlotsDefd f z l + foldSlotsDefd _ z LastExit = z + +---------------------------------------------------------------------- +----- Code for manipulating Middle and Last nodes + +mapExpMiddle :: (CmmExpr -> CmmExpr) -> Middle -> Middle +mapExpMiddle _ m@(MidComment _) = m +mapExpMiddle exp (MidAssign r e) = MidAssign r (exp e) +mapExpMiddle exp (MidStore addr e) = MidStore (exp addr) (exp e) +mapExpMiddle exp (MidUnsafeCall tgt fs as) = + MidUnsafeCall (mapExpMidcall exp tgt) fs (map exp as) +mapExpMiddle exp (MidAddToContext e es) = MidAddToContext (exp e) (map exp es) + +foldExpMiddle :: (CmmExpr -> z -> z) -> Middle -> z -> z +foldExpMiddle _ (MidComment _) z = z +foldExpMiddle exp (MidAssign _ e) z = exp e z +foldExpMiddle exp (MidStore addr e) z = exp addr $ exp e z +foldExpMiddle exp (MidUnsafeCall tgt _ as) z = foldExpMidcall exp tgt $ foldr exp z as +foldExpMiddle exp (MidAddToContext e es) z = exp e $ foldr exp z es + +mapExpLast :: (CmmExpr -> CmmExpr) -> Last -> Last +mapExpLast _ l@(LastBranch _) = l +mapExpLast exp (LastCondBranch e ti fi) = LastCondBranch (exp e) ti fi +mapExpLast exp (LastSwitch e tbl) = LastSwitch (exp e) tbl +mapExpLast exp (LastCall tgt mb_id s) = LastCall (exp tgt) mb_id s +mapExpLast exp (LastJump e s) = LastJump (exp e) s +mapExpLast _ (LastReturn s) = LastReturn s + +foldExpLast :: (CmmExpr -> z -> z) -> Last -> z -> z +foldExpLast _ (LastBranch _) z = z +foldExpLast exp (LastCondBranch e _ _) z = exp e z +foldExpLast exp (LastSwitch e _) z = exp e z +foldExpLast exp (LastCall tgt _ _) z = exp tgt z +foldExpLast exp (LastJump e _) z = exp e z +foldExpLast _ (LastReturn _) z = z + +mapExpMidcall :: (CmmExpr -> CmmExpr) -> MidCallTarget -> MidCallTarget +mapExpMidcall exp (ForeignTarget e c) = ForeignTarget (exp e) c +mapExpMidcall _ m@(PrimTarget _) = m + +foldExpMidcall :: (CmmExpr -> z -> z) -> MidCallTarget -> z -> z +foldExpMidcall exp (ForeignTarget e _) z = exp e z +foldExpMidcall _ (PrimTarget _) z = z + +-- Take a transformer on expressions and apply it recursively. +wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr +wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map f es) +wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (f addr) ty) +wrapRecExp f e = f e + +mapExpDeepMiddle :: (CmmExpr -> CmmExpr) -> Middle -> Middle +mapExpDeepLast :: (CmmExpr -> CmmExpr) -> Last -> Last +mapExpDeepMiddle f = mapExpMiddle $ wrapRecExp f +mapExpDeepLast f = mapExpLast $ wrapRecExp f + +-- Take a folder on expressions and apply it recursively. +wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z +wrapRecExpf f e@(CmmMachOp _ es) z = foldr f (f e z) es +wrapRecExpf f e@(CmmLoad addr _) z = f addr (f e z) +wrapRecExpf f e z = f e z + +foldExpDeepMiddle :: (CmmExpr -> z -> z) -> Middle -> z -> z +foldExpDeepLast :: (CmmExpr -> z -> z) -> Last -> z -> z +foldExpDeepMiddle f = foldExpMiddle $ wrapRecExpf f +foldExpDeepLast f = foldExpLast $ wrapRecExpf f + +---------------------------------------------------------------------- +-- Compute the join of facts live out of a Last node. Useful for most backward +-- analyses. +joinOuts :: DataflowLattice a -> (BlockId -> a) -> Last -> a +joinOuts lattice env l = + let bot = fact_bot lattice + join x y = txVal $ fact_add_to lattice x y + in case l of + (LastReturn _) -> bot + (LastJump _ _) -> bot + (LastBranch id) -> env id + (LastCall _ Nothing _) -> bot + (LastCall _ (Just k) _) -> env k + (LastCondBranch _ t f) -> join (env t) (env f) + (LastSwitch _ tbl) -> foldr join bot (map env $ catMaybes tbl) ---------------------------------------------------------------------- ----- Instance declarations for prettyprinting (avoids recursive imports) @@ -273,6 +382,13 @@ instance Outputable Last where instance Outputable Convention where ppr = pprConvention +instance Outputable ForeignConvention where + ppr = pprForeignConvention + +instance Outputable ValueDirection where + ppr Arguments = ptext $ sLit "args" + ppr Results = ptext $ sLit "results" + instance DF.DebugNodes Middle Last debugPpr :: Bool @@ -280,94 +396,78 @@ debugPpr = debugIsOn pprMiddle :: Middle -> SDoc pprMiddle stmt = pp_stmt <+> pp_debug - where - pp_stmt = case stmt of - - CopyIn conv args _ -> - if null args then ptext (sLit "empty CopyIn") - else commafy (map pprKinded args) <+> equals <+> - ptext (sLit "foreign") <+> doubleQuotes(ppr conv) <+> ptext (sLit "...") - - CopyOut conv args -> - ptext (sLit "PreCopyOut: next, pass") <+> doubleQuotes(ppr conv) <+> - parens (commafy (map pprKinded args)) - - -- // text - MidComment s -> text "//" <+> ftext s - - -- reg = expr; - MidAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi - - -- rep[lv] = expr; - MidStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi - where - rep = ppr ( cmmExprRep expr ) - - -- call "ccall" foo(x, y)[r1, r2]; - -- ToDo ppr volatile - MidUnsafeCall (CmmCallee fn cconv) results args -> - hcat [ if null results - then empty - else parens (commafy $ map ppr results) <> - ptext (sLit " = "), - ptext (sLit "call"), space, - doubleQuotes(ppr cconv), space, - ppr_target fn, parens ( commafy $ map ppr args ), - semi ] - - MidUnsafeCall (CmmPrim op) results args -> - pprMiddle (MidUnsafeCall (CmmCallee (CmmLit lbl) CCallConv) results args) - where - lbl = CmmLabel (mkForeignLabel (mkFastString (show op)) Nothing False) - - MidAddToContext ra args -> - hcat [ ptext (sLit "return via ") - , ppr_target ra, parens (commafy $ map ppr args), semi ] - - pp_debug = - if not debugPpr then empty - else text " //" <+> - case stmt of - CopyIn {} -> text "CopyIn" - CopyOut {} -> text "CopyOut" - MidComment {} -> text "MidComment" - MidAssign {} -> text "MidAssign" - MidStore {} -> text "MidStore" - MidUnsafeCall {} -> text "MidUnsafeCall" - MidAddToContext {} -> text "MidAddToContext" - + where + pp_stmt = case stmt of + -- // text + MidComment s -> text "//" <+> ftext s + + -- reg = expr; + MidAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi + + -- rep[lv] = expr; + MidStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi + where + rep = ppr ( cmmExprType expr ) + + -- call "ccall" foo(x, y)[r1, r2]; + -- ToDo ppr volatile + MidUnsafeCall target results args -> + hsep [ if null results + then empty + else parens (commafy $ map ppr results) <+> equals, + ptext $ sLit "call", + ppr_call_target target <> parens (commafy $ map ppr args) <> semi] + + MidAddToContext ra args -> + hcat [ ptext $ sLit "return via " + , ppr_target ra, parens (commafy $ map ppr args), semi ] + + pp_debug = + if not debugPpr then empty + else text " //" <+> + case stmt of + MidComment {} -> text "MidComment" + MidAssign {} -> text "MidAssign" + MidStore {} -> text "MidStore" + MidUnsafeCall {} -> text "MidUnsafeCall" + MidAddToContext {} -> text "MidAddToContext" + +ppr_fc :: ForeignConvention -> SDoc +ppr_fc (ForeignConvention c _ _) = doubleQuotes (ppr c) + +ppr_call_target :: MidCallTarget -> SDoc +ppr_call_target (ForeignTarget fn c) = ppr_fc c <+> ppr_target fn +ppr_call_target (PrimTarget op) = ppr (CmmLabel (mkForeignLabel (mkFastString (show op)) Nothing False)) ppr_target :: CmmExpr -> SDoc ppr_target t@(CmmLit _) = ppr t ppr_target fn' = parens (ppr fn') - -pprKinded :: Outputable a => CmmKinded a -> SDoc -pprKinded (CmmKinded a NoHint) = ppr a -pprKinded (CmmKinded a PtrHint) = doubleQuotes (text "address") <+> ppr a -pprKinded (CmmKinded a SignedHint) = doubleQuotes (text "signed") <+> ppr a -pprKinded (CmmKinded a FloatHint) = doubleQuotes (text "float") <+> ppr a +pprHinted :: Outputable a => CmmHinted a -> SDoc +pprHinted (CmmHinted a NoHint) = ppr a +pprHinted (CmmHinted a AddrHint) = doubleQuotes (text "address") <+> ppr a +pprHinted (CmmHinted a SignedHint) = doubleQuotes (text "signed") <+> ppr a pprLast :: Last -> SDoc -pprLast stmt = (case stmt of - LastBranch ident -> ptext (sLit "goto") <+> ppr ident <> semi - LastCondBranch expr t f -> genFullCondBranch expr t f - LastJump expr -> hcat [ ptext (sLit "jump"), space, pprFun expr - , ptext (sLit "(...)"), semi] - LastReturn -> hcat [ ptext (sLit "return"), space - , ptext (sLit "(...)"), semi] - LastSwitch arg ids -> ppr $ CmmSwitch arg ids - LastCall tgt k -> genBareCall tgt k - ) <> - if debugPpr then empty - else text " //" <+> - case stmt of - LastBranch {} -> text "LastBranch" - LastCondBranch {} -> text "LastCondBranch" - LastJump {} -> text "LastJump" - LastReturn {} -> text "LastReturn" - LastSwitch {} -> text "LastSwitch" - LastCall {} -> text "LastCall" +pprLast stmt = pp_stmt <+> pp_debug + where + pp_stmt = case stmt of + LastBranch ident -> ptext (sLit "goto") <+> ppr ident <> semi + LastCondBranch expr t f -> genFullCondBranch expr t f + LastJump expr _ -> hcat [ ptext (sLit "jump"), space, pprFun expr + , ptext (sLit "(...)"), semi] + LastReturn _ -> hcat [ ptext (sLit "return"), space + , ptext (sLit "(...)"), semi] + LastSwitch arg ids -> ppr $ CmmSwitch arg ids + LastCall tgt k _ -> genBareCall tgt k + + pp_debug = text " //" <+> case stmt of + LastBranch {} -> text "LastBranch" + LastCondBranch {} -> text "LastCondBranch" + LastJump {} -> text "LastJump" + LastReturn {} -> text "LastReturn" + LastSwitch {} -> text "LastSwitch" + LastCall {} -> text "LastCall" genBareCall :: CmmExpr -> Maybe BlockId -> SDoc genBareCall fn k = @@ -393,119 +493,12 @@ genFullCondBranch expr t f = ] pprConvention :: Convention -> SDoc -pprConvention (ConventionStandard c _) = ppr c -pprConvention (ConventionPrivate {} ) = text "" +pprConvention (Native {}) = empty +pprConvention (Foreign c) = ppr c +pprConvention (Private {}) = text "" + +pprForeignConvention :: ForeignConvention -> SDoc +pprForeignConvention (ForeignConvention c as rs) = ppr c <> ppr as <> ppr rs commafy :: [SDoc] -> SDoc commafy xs = hsep $ punctuate comma xs - - ----------------------------------------------------------------- --- | The purpose of this function is to print a Cmm zipper graph "as if it were" --- a Cmm program. The objective is dodgy, so it's unsurprising parts of the --- code are dodgy as well. - -pprCmmGraphLikeCmm :: CmmGraph -> SDoc -pprCmmGraphLikeCmm g = vcat (swallow blocks) - where blocks = Z.postorder_dfs g - swallow :: [CmmBlock] -> [SDoc] - swallow [] = [] - swallow (Z.Block id t : rest) = tail id [] Nothing t rest - tail id prev' out (Z.ZTail (CopyOut conv args) t) rest = - if isJust out then panic "multiple CopyOut nodes in one basic block" - else - tail id (prev') (Just (conv, args)) t rest - tail id prev' out (Z.ZTail m t) rest = tail id (mid m : prev') out t rest - tail id prev' out (Z.ZLast Z.LastExit) rest = exit id prev' out rest - tail id prev' out (Z.ZLast (Z.LastOther l)) rest = last id prev' out l rest - mid (CopyIn _ [] _) = text "// proc point (no parameters)" - mid m@(CopyIn {}) = ppr m <+> text "(proc point)" - mid m = ppr m - block' id prev' - | id == Z.lg_entry g, entry_has_no_pred = - vcat (text "" : reverse prev') - | otherwise = hang (ppr id <> colon) 4 (vcat (reverse prev')) - last id prev' out l n = - let endblock stmt = block' id (stmt : prev') : swallow n in - case l of - LastBranch tgt -> - case n of - Z.Block id' t : bs - | tgt == id', unique_pred id' - -> tail id prev' out t bs -- optimize out redundant labels - _ -> endblock (ppr $ CmmBranch tgt) - l@(LastCondBranch expr tid fid) -> - let ft id = text "// fall through to " <> ppr id in - case n of - Z.Block id' t : bs - | id' == fid, isNothing out -> - tail id (ft fid : ppr (CmmCondBranch expr tid) : prev') Nothing t bs - | id' == tid, Just e' <- maybeInvertCmmExpr expr, isNothing out-> - tail id (ft tid : ppr (CmmCondBranch e' fid) : prev') Nothing t bs - _ -> endblock $ with_out out l - l@(LastJump {}) -> endblock $ with_out out l - l@(LastReturn {}) -> endblock $ with_out out l - l@(LastSwitch {}) -> endblock $ with_out out l - l@(LastCall _ Nothing) -> endblock $ with_out out l - l@(LastCall tgt (Just k)) - | Z.Block id' (Z.ZTail (CopyIn _ ress srt) t) : bs <- n, - Just (conv, args) <- out, - id' == k -> - let call = CmmCall tgt' ress args (CmmSafe srt) CmmMayReturn - tgt' = CmmCallee tgt (cconv_of_conv conv) - ppcall = ppr call <+> parens (text "ret to" <+> ppr k) - in if unique_pred k then - tail id (ppcall : prev') Nothing t bs - else - endblock (ppcall) - | Z.Block id' t : bs <- n, id' == k, unique_pred k, - Just (conv, args) <- out, - Just (ress, srt) <- findCopyIn t -> - let call = CmmCall tgt' ress args (CmmSafe srt) CmmMayReturn - tgt' = CmmCallee tgt (cconv_of_conv conv) - delayed = - ptext (sLit "// delayed CopyIn follows previous call") - in tail id (delayed : ppr call : prev') Nothing t bs - | otherwise -> endblock $ with_out out l - findCopyIn (Z.ZTail (CopyIn _ ress srt) _) = Just (ress, srt) - findCopyIn (Z.ZTail _ t) = findCopyIn t - findCopyIn (Z.ZLast _) = Nothing - exit id prev' out n = -- highly irregular (assertion violation?) - let endblock stmt = block' id (stmt : prev') : swallow n in - case out of Nothing -> endblock (text "// ") - Just (conv, args) -> endblock (ppr (CopyOut conv args) $$ - text "// ") - preds = zipPreds g - entry_has_no_pred = case lookupBlockEnv preds (Z.lg_entry g) of - Nothing -> True - Just s -> isEmptyUniqSet s - single_preds = - let add b single = - let id = Z.blockId b - in case lookupBlockEnv preds id of - Nothing -> single - Just s -> if sizeUniqSet s == 1 then - extendBlockSet single id - else single - in Z.fold_blocks add emptyBlockSet g - unique_pred id = elemBlockSet id single_preds - cconv_of_conv (ConventionStandard conv _) = conv - cconv_of_conv (ConventionPrivate {}) = CmmCallConv -- XXX totally bogus - -with_out :: Maybe (Convention, CmmActuals) -> Last -> SDoc -with_out Nothing l = ptext (sLit "??no-arguments??") <+> ppr l -with_out (Just (conv, args)) l = last l - where last (LastCall e k) = - hcat [ptext (sLit "... = foreign "), - doubleQuotes(ppr conv), space, - ppr_target e, parens ( commafy $ map ppr args ), - ptext (sLit " \"safe\""), - case k of Nothing -> ptext (sLit " never returns") - Just _ -> empty, - semi ] - last (LastReturn) = ppr (CmmReturn args) - last (LastJump e) = ppr (CmmJump e args) - last l = ppr (CopyOut conv args) $$ ppr l - ppr_target (CmmLit lit) = ppr lit - ppr_target fn' = parens (ppr fn') - commafy xs = hsep $ punctuate comma xs diff --git a/compiler/cmm/ZipCfgExtras.hs b/compiler/cmm/ZipCfgExtras.hs index bd26aca..acddbae 100644 --- a/compiler/cmm/ZipCfgExtras.hs +++ b/compiler/cmm/ZipCfgExtras.hs @@ -24,7 +24,7 @@ exit :: LGraph m l -> FGraph m l -- focus on edge into default exit n -- (fails if there isn't one) focusp :: (Block m l -> Bool) -> LGraph m l -> Maybe (FGraph m l) -- focus on start of block satisfying predicate -unfocus :: FGraph m l -> LGraph m l -- lose focus +-- unfocus :: FGraph m l -> LGraph m l -- lose focus -- | We can insert a single-entry, single-exit subgraph at -- the current focus. @@ -37,16 +37,16 @@ splice_focus_exit :: FGraph m l -> LGraph m l -> FGraph m l _unused :: () _unused = all `seq` () - where all = ( exit, focusp, unfocus {- , splice_focus_entry, splice_focus_exit -} + where all = ( exit, focusp --, unfocus {- , splice_focus_entry, splice_focus_exit -} , foldM_fwd_block (\_ a -> Just a) ) -unfocus (FGraph e bz bs) = LGraph e (insertBlock (zip bz) bs) +--unfocus (FGraph e bz bs) = LGraph e (insertBlock (zip bz) bs) -focusp p (LGraph entry blocks) = +focusp p (LGraph entry _ blocks) = fmap (\(b, bs) -> FGraph entry (unzip b) bs) (splitp_blocks p blocks) -exit g@(LGraph eid _) = FGraph eid (ZBlock h (ZLast l)) others +exit g@(LGraph eid _ _) = FGraph eid (ZBlock h (ZLast l)) others where FGraph _ b others = focusp is_exit g `orElse` panic "no exit in flow graph" (h, l) = goto_end b @@ -65,7 +65,7 @@ splice_focus_exit (FGraph eid (ZBlock head tail) blocks) g = foldM_fwd_block :: Monad m => (BlockId -> a -> m a) -> (mid -> a -> m a) -> (ZLast l -> a -> m a) -> Block mid l -> a -> m a -foldM_fwd_block first middle last (Block id t) z = do { z <- first id z; tail t z } +foldM_fwd_block first middle last (Block id _ t) z = do { z <- first id z; tail t z } where tail (ZTail m t) z = do { z <- middle m z; tail t z } tail (ZLast l) z = last l z diff --git a/compiler/cmm/ZipDataflow.hs b/compiler/cmm/ZipDataflow.hs index 97b146c..de2f53d 100644 --- a/compiler/cmm/ZipDataflow.hs +++ b/compiler/cmm/ZipDataflow.hs @@ -5,6 +5,7 @@ module ZipDataflow ( DebugNodes(), RewritingDepth(..), LastOutFacts(..) , zdfSolveFrom, zdfRewriteFrom + , zdfSolveFromL , ForwardTransfers(..), BackwardTransfers(..) , ForwardRewrites(..), BackwardRewrites(..) , ForwardFixedPoint, BackwardFixedPoint @@ -14,12 +15,14 @@ module ZipDataflow , zdfDecoratedGraph -- not yet implemented , zdfFpContents , zdfFpLastOuts + , zdfBRewriteFromL, zdfFRewriteFromL ) where import BlockId import CmmTx import DFMonad +import OptimizationFuel as F import MkZipCfg import ZipCfg import qualified ZipCfg as G @@ -263,6 +266,15 @@ class DataflowSolverDirection transfers fixedpt where -> a -- ^ Fact flowing in (at entry or exit) -> Graph m l -- ^ Graph to be analyzed -> FuelMonad (fixedpt m l a ()) -- ^ Answers + zdfSolveFromL :: (DebugNodes m l, Outputable a) + => BlockEnv a -- Initial facts (unbound == bottom) + -> PassName + -> DataflowLattice a -- Lattice + -> transfers m l a -- Dataflow transfer functions + -> a -- Fact flowing in (at entry or exit) + -> LGraph m l -- Graph to be analyzed + -> FuelMonad (fixedpt m l a ()) -- Answers + zdfSolveFromL b p l t a g = zdfSolveFrom b p l t a $ quickGraph g -- There are exactly two instances: forward and backward instance DataflowSolverDirection ForwardTransfers ForwardFixedPoint @@ -307,6 +319,59 @@ class DataflowSolverDirection transfers fixedpt => -> Graph m l -> FuelMonad (fixedpt m l a (Graph m l)) +-- Temporarily lifting from Graph to LGraph -- an experiment to see how we +-- can eliminate some hysteresis between Graph and LGraph. +-- Perhaps Graph should be confined to dataflow code. +-- Trading space for time +quickGraph :: LastNode l => LGraph m l -> Graph m l +quickGraph g = Graph (ZLast $ mkBranchNode $ lg_entry g) $ lg_blocks g + +quickLGraph :: LastNode l => Int -> Graph m l -> FuelMonad (LGraph m l) +quickLGraph args (Graph (ZLast (LastOther l)) blockenv) + | isBranchNode l = return $ LGraph (branchNodeTarget l) args blockenv +quickLGraph args g = F.lGraphOfGraph g args + +fixptWithLGraph :: LastNode l => Int -> CommonFixedPoint m l fact (Graph m l) -> + FuelMonad (CommonFixedPoint m l fact (LGraph m l)) +fixptWithLGraph args cfp = + do fp_c <- quickLGraph args $ fp_contents cfp + return $ cfp {fp_contents = fp_c} + +ffixptWithLGraph :: LastNode l => Int -> ForwardFixedPoint m l fact (Graph m l) -> + FuelMonad (ForwardFixedPoint m l fact (LGraph m l)) +ffixptWithLGraph args fp = + do common <- fixptWithLGraph args $ ffp_common fp + return $ fp {ffp_common = common} + +zdfFRewriteFromL :: (DebugNodes m l, Outputable a) + => RewritingDepth -- whether to rewrite a rewritten graph + -> BlockEnv a -- initial facts (unbound == botton) + -> PassName + -> DataflowLattice a + -> ForwardTransfers m l a + -> ForwardRewrites m l a + -> a -- fact flowing in (at entry or exit) + -> LGraph m l + -> FuelMonad (ForwardFixedPoint m l a (LGraph m l)) +zdfFRewriteFromL d b p l t r a g@(LGraph _ args _) = + do fp <- zdfRewriteFrom d b p l t r a $ quickGraph g + ffixptWithLGraph args fp + +zdfBRewriteFromL :: (DebugNodes m l, Outputable a) + => RewritingDepth -- whether to rewrite a rewritten graph + -> BlockEnv a -- initial facts (unbound == botton) + -> PassName + -> DataflowLattice a + -> BackwardTransfers m l a + -> BackwardRewrites m l a + -> a -- fact flowing in (at entry or exit) + -> LGraph m l + -> FuelMonad (BackwardFixedPoint m l a (LGraph m l)) +zdfBRewriteFromL d b p l t r a g@(LGraph _ args _) = + do fp <- zdfRewriteFrom d b p l t r a $ quickGraph g + fixptWithLGraph args fp + + data RewritingDepth = RewriteShallow | RewriteDeep -- When a transformation proposes to rewrite a node, -- you can either ask the system to @@ -363,25 +428,15 @@ rewrite_f_agraph depth start_facts name lattice transfers rewrites in_fact g = areturn :: AGraph m l -> DFM a (Graph m l) areturn g = liftToDFM $ liftUniq $ graphOfAGraph g - -{- -graphToLGraph :: LastNode l => Graph m l -> DFM a (LGraph m l) -graphToLGraph (Graph (ZLast (LastOther l)) blockenv) - | isBranchNode l = return $ LGraph (branchNodeTarget l) blockenv -graphToLGraph (Graph tail blockenv) = - do id <- freshBlockId "temporary entry label" - return $ LGraph id $ insertBlock (Block id tail) blockenv --} - -- | Here we prefer not simply to slap on 'goto eid' because this -- introduces an unnecessary basic block at each rewrite, and we don't -- want to stress out the finite map more than necessary lgraphToGraph :: LastNode l => LGraph m l -> Graph m l -lgraphToGraph (LGraph eid blocks) = +lgraphToGraph (LGraph eid _ blocks) = if flip any (eltsUFM blocks) $ \block -> any (== eid) (succs block) then Graph (ZLast (mkBranchNode eid)) blocks else -- common case: entry is not a branch target - let Block _ entry = lookupBlockEnv blocks eid `orElse` panic "missing entry!" + let Block _ _ entry = lookupBlockEnv blocks eid `orElse` panic "missing entry!" in Graph entry (delFromUFM blocks eid) @@ -473,7 +528,7 @@ forward_sol check_maybe = forw solve finish in_fact (Graph entry blockenv) fuel = let blocks = G.postorder_dfs_from blockenv entry set_or_save = mk_set_or_save (isJust . lookupBlockEnv blockenv) - set_successor_facts (Block id tail) fuel = + set_successor_facts (Block id _ tail) fuel = do { idfact <- getFact id ; (last_outs, fuel) <- case check_maybe fuel $ fr_first rewrites idfact id of @@ -588,10 +643,10 @@ forward_rew check_maybe = forw in do { solve depth name start transfers rewrites in_fact g fuel ; eid <- freshBlockId "temporary entry id" ; (rewritten, fuel) <- - rew_tail (ZFirst eid) in_fact entry emptyBlockEnv fuel + rew_tail (ZFirst eid Nothing) in_fact entry emptyBlockEnv fuel ; (rewritten, fuel) <- rewrite_blocks blocks rewritten fuel ; a <- finish - ; return (a, lgraphToGraph (LGraph eid rewritten), fuel) + ; return (a, lgraphToGraph (LGraph eid 0 rewritten), fuel) } don't_rewrite facts finish in_fact g fuel = do { solve depth name facts transfers rewrites in_fact g fuel @@ -614,8 +669,8 @@ forward_rew check_maybe = forw rewrite_blocks :: [Block m l] -> (BlockEnv (Block m l)) -> Fuel -> DFM a (BlockEnv (Block m l), Fuel) rewrite_blocks [] rewritten fuel = return (rewritten, fuel) - rewrite_blocks (G.Block id t : bs) rewritten fuel = - do let h = ZFirst id + rewrite_blocks (G.Block id off t : bs) rewritten fuel = + do let h = ZFirst id off a <- getFact id case check_maybe fuel $ fr_first rewrites a id of Nothing -> do { (rewritten, fuel) <- @@ -625,7 +680,7 @@ forward_rew check_maybe = forw Just g -> do { markGraphRewritten ; g <- areturn g ; (outfact, g, fuel) <- inner_rew getExitFact a g fuel - ; let (blocks, h) = splice_head' (ZFirst id) g + ; let (blocks, h) = splice_head' h g ; (rewritten, fuel) <- rew_tail h outfact t (blocks `plusUFM` rewritten) fuel ; rewrite_blocks bs rewritten fuel } @@ -756,15 +811,16 @@ backward_sol check_maybe = back in do { fuel <- run "backward" name set_block_fact blocks fuel ; eid <- freshBlockId "temporary entry id" - ; fuel <- set_block_fact (Block eid entry) fuel + ; fuel <- set_block_fact (Block eid Nothing entry) fuel ; a <- getFact eid ; forgetFact eid ; return (a, fuel) } - set_head_fact (G.ZFirst id) a fuel = + set_head_fact (G.ZFirst id _) a fuel = case check_maybe fuel $ br_first rewrites a id of - Nothing -> do { my_trace "set_head_fact" (ppr id) $ + Nothing -> do { my_trace "set_head_fact" (ppr id <+> text "=" <+> + ppr (bt_first_in transfers a id)) $ setFact id $ bt_first_in transfers a id ; return fuel } Just g -> do { (a, fuel) <- subsolve g a fuel @@ -839,16 +895,19 @@ backward_rew check_maybe = back rewrite start g exit_fact fuel = let Graph entry blockenv = g blocks = reverse $ G.postorder_dfs_from blockenv entry - in do { solve depth name start transfers rewrites g exit_fact fuel - ; env <- getAllFacts + in do { (FP env in_fact _ _ _, _) <- -- don't drop the entry fact! + solve depth name start transfers rewrites g exit_fact fuel + --; env <- getAllFacts ; my_trace "facts after solving" (ppr env) $ return () ; eid <- freshBlockId "temporary entry id" ; (rewritten, fuel) <- rewrite_blocks True blocks emptyBlockEnv fuel -- We can't have the fact check fail on the bogus entry, which _may_ change - ; (rewritten, fuel) <- rewrite_blocks False [Block eid entry] rewritten fuel - ; a <- getFact eid - ; return (a, lgraphToGraph (LGraph eid rewritten), fuel) - } + ; (rewritten, fuel) <- rewrite_blocks False [Block eid Nothing entry] rewritten fuel + ; my_trace "eid" (ppr eid) $ return () + ; my_trace "exit_fact" (ppr exit_fact) $ return () + ; my_trace "in_fact" (ppr in_fact) $ return () + ; return (in_fact, lgraphToGraph (LGraph eid 0 rewritten), fuel) + } -- Remember: the entry fact computed by @solve@ accounts for rewriting don't_rewrite facts g exit_fact fuel = do { (fp, _) <- solve depth name facts transfers rewrites g exit_fact fuel @@ -901,12 +960,13 @@ backward_rew check_maybe = back return () ; (a, g, fuel) <- inner_rew g a fuel ; let Graph t newblocks = G.splice_tail g tail - ; propagate check fuel h a t (newblocks `plusUFM` rewritten) } - propagate check fuel (ZFirst id) a tail rewritten = + ; my_trace "propagating facts" (ppr a) $ + propagate check fuel h a t (newblocks `plusUFM` rewritten) } + propagate check fuel (ZFirst id off) a tail rewritten = case maybeRewriteWithFuel fuel $ br_first rewrites a id of Nothing -> do { if check then checkFactMatch id $ bt_first_in transfers a id else return () - ; return (insertBlock (Block id tail) rewritten, fuel) } + ; return (insertBlock (Block id off tail) rewritten, fuel) } Just g -> do { markGraphRewritten ; g <- areturn g @@ -915,7 +975,7 @@ backward_rew check_maybe = back ; (a, g, fuel) <- inner_rew g a fuel ; if check then checkFactMatch id a else return () ; let Graph t newblocks = G.splice_tail g tail - ; let r = insertBlock (Block id t) (newblocks `plusUFM` rewritten) + ; let r = insertBlock (Block id off t) (newblocks `plusUFM` rewritten) ; return (r, fuel) } in fixed_pt_and_fuel @@ -978,13 +1038,14 @@ run dir name do_block blocks b = unchanged depth = my_nest depth (text "facts for" <+> graphId <+> text "are unchanged") - graphId = case blocks of { Block id _ : _ -> ppr id ; [] -> text "" } + graphId = case blocks of { Block id _ _ : _ -> ppr id ; [] -> text "" } show_blocks = my_trace "Blocks:" (vcat (map pprBlock blocks)) - pprBlock (Block id t) = nest 2 (pprFact (id, t)) + pprBlock (Block id off t) = nest 2 (pprFact' (id, off, t)) pprFacts depth n env = my_nest depth (text "facts for iteration" <+> pp_i n <+> text "are:" $$ (nest 2 $ vcat $ map pprFact $ ufmToList env)) - pprFact (id, a) = hang (ppr id <> colon) 4 (ppr a) + pprFact (id, a) = hang (ppr id <> colon) 4 (ppr a) + pprFact' (id, off, a) = hang (ppr id <> parens (ppr off) <> colon) 4 (ppr a) f4sep :: [SDoc] -> SDoc diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs index 6677693..1928308 100644 --- a/compiler/codeGen/CgBindery.lhs +++ b/compiler/codeGen/CgBindery.lhs @@ -286,7 +286,7 @@ getCgIdInfo id name = idName id in if isExternalName name then do - let ext_lbl = CmmLit (CmmLabel (mkClosureLabel name)) + let ext_lbl = CmmLit (CmmLabel (mkClosureLabel name $ idCafInfo id)) return (stableIdInfo id ext_lbl (mkLFImported id)) else if isVoidArg (idCgRep id) then @@ -447,10 +447,7 @@ bindNewToTemp id return temp_reg where uniq = getUnique id - temp_reg = LocalReg uniq (argMachRep (idCgRep id)) kind - kind = if isFollowableArg (idCgRep id) - then GCKindPtr - else GCKindNonPtr + temp_reg = LocalReg uniq (argMachRep (idCgRep id)) lf_info = mkLFArgument id -- Always used of things we -- know nothing about diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs index 752769f..87c69b6 100644 --- a/compiler/codeGen/CgCallConv.hs +++ b/compiler/codeGen/CgCallConv.hs @@ -39,7 +39,6 @@ import CgUtils import CgMonad import SMRep -import MachOp import Cmm import CLabel @@ -149,7 +148,7 @@ mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness mkLiveness name size bits | size > mAX_SMALL_BITMAP_SIZE -- Bitmap does not fit in one word = do { let lbl = mkBitmapLabel (getUnique name) - ; emitRODataLits lbl ( mkWordCLit (fromIntegral size) + ; emitRODataLits "mkLiveness" lbl ( mkWordCLit (fromIntegral size) : map mkWordCLit bits) ; return (BigLiveness lbl) } @@ -196,7 +195,7 @@ mkRegLiveness regs ptrs nptrs all_non_ptrs = 0xff reg_bits [] = 0 - reg_bits ((id, VanillaReg i) : regs) | isFollowableArg (idCgRep id) + reg_bits ((id, VanillaReg i _) : regs) | isFollowableArg (idCgRep id) = (1 `shiftL` (i - 1)) .|. reg_bits regs reg_bits (_ : regs) = reg_bits regs @@ -264,8 +263,8 @@ slowCallPattern _ = panic "CgStackery.slowCallPattern" ------------------------------------------------------------------------- dataReturnConvPrim :: CgRep -> CmmReg -dataReturnConvPrim PtrArg = CmmGlobal (VanillaReg 1) -dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1) +dataReturnConvPrim PtrArg = CmmGlobal (VanillaReg 1 VGcPtr) +dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1 VNonGcPtr) dataReturnConvPrim LongArg = CmmGlobal (LongReg 1) dataReturnConvPrim FloatArg = CmmGlobal (FloatReg 1) dataReturnConvPrim DoubleArg = CmmGlobal (DoubleReg 1) @@ -288,7 +287,7 @@ getSequelAmode = do { EndOfBlockInfo virt_sp sequel <- getEndOfBlockInfo ; case sequel of OnStack -> do { sp_rel <- getSpRelOffset virt_sp - ; returnFC (CmmLoad sp_rel wordRep) } + ; returnFC (CmmLoad sp_rel bWord) } UpdateCode -> returnFC (CmmLit (CmmLabel mkUpdInfoLabel)) CaseAlts lbl _ _ -> returnFC (CmmLit (CmmLabel lbl)) @@ -361,7 +360,7 @@ assign_regs args supply where go [] acc supply = (acc, []) -- Return the results reversed (doesn't matter) go ((VoidArg,_) : args) acc supply -- Skip void arguments; they aren't passed, and - = go args acc supply -- there's nothign to bind them to + = go args acc supply -- there's nothing to bind them to go ((rep,arg) : args) acc supply = case assign_reg rep supply of Just (reg, supply') -> go args ((arg,reg):acc) supply' @@ -370,9 +369,9 @@ assign_regs args supply assign_reg :: CgRep -> AvailRegs -> Maybe (GlobalReg, AvailRegs) assign_reg FloatArg (vs, f:fs, ds, ls) = Just (FloatReg f, (vs, fs, ds, ls)) assign_reg DoubleArg (vs, fs, d:ds, ls) = Just (DoubleReg d, (vs, fs, ds, ls)) -assign_reg LongArg (vs, fs, ds, l:ls) = Just (LongReg l, (vs, fs, ds, ls)) -assign_reg PtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) -assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) +assign_reg LongArg (vs, fs, ds, l:ls) = pprTrace "longArg" (ppr l) $ Just (LongReg l, (vs, fs, ds, ls)) +assign_reg PtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v VGcPtr, (vs, fs, ds, ls)) +assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v VNonGcPtr, (vs, fs, ds, ls)) -- PtrArg and NonPtrArg both go in a vanilla register assign_reg other not_enough_regs = Nothing @@ -430,11 +429,11 @@ mkRegTbl_allRegs regs_in_use mkRegTbl' regs_in_use vanillas floats doubles longs = (ok_vanilla, ok_float, ok_double, ok_long) where - ok_vanilla = mapCatMaybes (select VanillaReg) vanillas + ok_vanilla = mapCatMaybes (select (\i -> VanillaReg i VNonGcPtr)) vanillas + -- ptrhood isn't looked at, hence we can use any old rep. ok_float = mapCatMaybes (select FloatReg) floats ok_double = mapCatMaybes (select DoubleReg) doubles ok_long = mapCatMaybes (select LongReg) longs - -- rep isn't looked at, hence we can use any old rep. select :: (Int -> GlobalReg) -> Int{-cand-} -> Maybe Int -- one we've unboxed the Int, we make a GlobalReg diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs index 49c782e..859b220 100644 --- a/compiler/codeGen/CgCase.lhs +++ b/compiler/codeGen/CgCase.lhs @@ -36,7 +36,6 @@ import ClosureInfo import SMRep import CmmUtils import Cmm -import MachOp import StgSyn import StaticFlags @@ -164,8 +163,8 @@ cgCase (StgOpApp op@(StgFCallOp fcall _) args _) do -- *must* be an unboxed tuple alt. -- exactly like the cgInlinePrimOp case for unboxed tuple alts.. { res_tmps <- mapFCs bindNewToTemp non_void_res_ids - ; let res_hints = map (typeHint.idType) non_void_res_ids - ; cgForeignCall (zipWith CmmKinded res_tmps res_hints) fcall args live_in_alts + ; let res_hints = map (typeForeignHint.idType) non_void_res_ids + ; cgForeignCall (zipWith CmmHinted res_tmps res_hints) fcall args live_in_alts ; cgExpr rhs } where (_, res_ids, _, rhs) = head alts @@ -340,7 +339,7 @@ cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts (_,e) <- getArgAmode arg return e do_enum_primop primop - = do tmp <- newNonPtrTemp wordRep + = do tmp <- newTemp bWord cgPrimOp [tmp] primop args live_in_alts returnFC (CmmReg (CmmLocal tmp)) @@ -612,6 +611,6 @@ restoreCurrentCostCentre Nothing _freeit = nopC restoreCurrentCostCentre (Just slot) freeit = do { sp_rel <- getSpRelOffset slot ; whenC freeit (freeStackSlots [slot]) - ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel wordRep)) } + ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel bWord)) } \end{code} diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index 80949e7..b7f9f3b 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -38,7 +38,6 @@ import CgCallConv import CgUtils import ClosureInfo import SMRep -import MachOp import Cmm import CmmUtils import CLabel @@ -85,7 +84,7 @@ cgTopRhsClosure id ccs binder_info upd_flag args body = do ; mod_name <- getModuleName ; let descr = closureDescription mod_name name closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr - closure_label = mkLocalClosureLabel name + closure_label = mkLocalClosureLabel name $ idCafInfo id cg_id_info = stableIdInfo id (mkLblExpr closure_label) lf_info closure_rep = mkStaticClosureFields closure_info ccs True [] @@ -259,6 +258,7 @@ closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do -- in update frame CAF/DICT functions will be -- subsumed by this enclosing cc { enterCostCentre cl_info cc body + ; stmtsC [CmmComment $ mkFastString $ showSDoc $ ppr body] ; cgExpr body } } @@ -282,7 +282,7 @@ closureCodeBody binder_info cl_info cc args body (sp_top, stk_args) = mkVirtStkOffsets vSp other_args -- Allocate the global ticky counter - ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) + ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) (clHasCafRefs cl_info) ; emitTickyCounter cl_info args sp_top -- ...and establish the ticky-counter @@ -355,7 +355,8 @@ mkSlowEntryCode cl_info reg_args | otherwise = return noStmts where name = closureName cl_info - slow_lbl = mkSlowEntryLabel name + has_caf_refs = clHasCafRefs cl_info + slow_lbl = mkSlowEntryLabel name has_caf_refs load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry] save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts @@ -372,13 +373,13 @@ mkSlowEntryCode cl_info reg_args (argMachRep rep)) save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets - mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegRep reg ) + mk_save (rep,reg) offset = ASSERT( argMachRep rep `cmmEqType` globalRegType reg ) CmmStore (cmmRegOffW spReg offset) (CmmReg (CmmGlobal reg)) stk_adj_pop = CmmAssign spReg (cmmRegOffW spReg final_stk_offset) stk_adj_push = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset)) - jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name)) [] + jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name has_caf_refs)) [] \end{code} @@ -565,7 +566,7 @@ link_caf cl_info is_upd = do -- so that the garbage collector can find them -- This must be done *before* the info table pointer is overwritten, -- because the old info table ptr is needed for reversion - ; emitRtsCallWithVols (sLit "newCAF") [CmmKinded (CmmReg nodeReg) PtrHint] [node] False + ; emitRtsCallWithVols (sLit "newCAF") [CmmHinted (CmmReg nodeReg) AddrHint] [node] False -- node is live, so save it. -- Overwrite the closure with a (static) indirection diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index ff012ef..b22e56f 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -47,6 +47,7 @@ import Constants import TyCon import DataCon import Id +import IdInfo import Type import PrelInfo import Outputable @@ -82,7 +83,7 @@ cgTopRhsCon id con args ; let name = idName id lf_info = mkConLFInfo con - closure_label = mkClosureLabel name + closure_label = mkClosureLabel name $ idCafInfo id caffy = any stgArgHasCafRefs args (closure_info, amodes_w_offsets) = layOutStaticConstr con amodes closure_rep = mkStaticClosureFields @@ -142,7 +143,8 @@ at all. \begin{code} buildDynCon binder cc con [] = returnFC (taggedStableIdInfo binder - (mkLblExpr (mkClosureLabel (dataConName con))) + (mkLblExpr (mkClosureLabel (dataConName con) + (idCafInfo binder))) (mkConLFInfo con) con) \end{code} @@ -174,7 +176,7 @@ buildDynCon binder cc con [arg_amode] , (_, CmmLit (CmmInt val _)) <- arg_amode , let val_int = (fromIntegral val) :: Int , val_int <= mAX_INTLIKE && val_int >= mIN_INTLIKE - = do { let intlike_lbl = mkRtsDataLabel (sLit "stg_INTLIKE_closure") + = do { let intlike_lbl = mkRtsGcPtrLabel (sLit "stg_INTLIKE_closure") offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) -- INTLIKE closures consist of a header and one word payload intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW) @@ -185,7 +187,7 @@ buildDynCon binder cc con [arg_amode] , (_, CmmLit (CmmInt val _)) <- arg_amode , let val_int = (fromIntegral val) :: Int , val_int <= mAX_CHARLIKE && val_int >= mIN_CHARLIKE - = do { let charlike_lbl = mkRtsDataLabel (sLit "stg_CHARLIKE_closure") + = do { let charlike_lbl = mkRtsGcPtrLabel (sLit "stg_CHARLIKE_closure") offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) -- CHARLIKE closures consist of a header and one word payload charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW) @@ -401,9 +403,8 @@ cgTyCon tycon -- code appears to put it before --- NR 16 Aug 2007 ; extra <- if isEnumerationTyCon tycon then do - tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel - (tyConName tycon)) - [ CmmLabelOff (mkLocalClosureLabel (dataConName con)) (tagForCon con) + tbl <- getCmm (emitRODataLits "cgTyCon" (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) (tagForCon con) | con <- tyConDataCons tycon]) return [tbl] else diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs index f22071e..3b75267 100644 --- a/compiler/codeGen/CgExpr.lhs +++ b/compiler/codeGen/CgExpr.lhs @@ -37,7 +37,7 @@ import CgHpc import CgUtils import ClosureInfo import Cmm -import MachOp +import CmmUtils import VarSet import Literal import PrimOp @@ -48,6 +48,7 @@ import Maybes import ListSetOps import BasicTypes import Util +import FastString import Outputable \end{code} @@ -128,18 +129,15 @@ cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, nonVoidArg rep] - arg_tmps <- sequence [ - if isFollowableArg (typeCgRep (stgArgType stg_arg)) - then assignPtrTemp arg - else assignNonPtrTemp arg - | (arg, stg_arg) <- arg_exprs] - let arg_hints = zipWith CmmKinded arg_tmps (map (typeHint.stgArgType) stg_args) + arg_tmps <- sequence [ assignTemp arg + | (arg, stg_arg) <- arg_exprs] + let arg_hints = zipWith CmmHinted arg_tmps (map (typeForeignHint.stgArgType) stg_args) {- Now, allocate some result regs. -} (res_reps,res_regs,res_hints) <- newUnboxedTupleRegs res_ty ccallReturnUnboxedTuple (zip res_reps (map (CmmReg . CmmLocal) res_regs)) $ - emitForeignCall (zipWith CmmKinded res_regs res_hints) fcall + emitForeignCall (zipWith CmmHinted res_regs res_hints) fcall arg_hints emptyVarSet{-no live vars-} -- tagToEnum# is special: we need to pull the constructor out of the table, @@ -148,10 +146,7 @@ cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do cgExpr (StgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty) = ASSERT(isEnumerationTyCon tycon) do { (rep,amode) <- getArgAmode arg - ; amode' <- if isFollowableArg rep - then assignPtrTemp amode - else assignNonPtrTemp amode - -- We're going to use it twice, + ; amode' <- assignTemp amode -- We're going to use it twice, -- so save in a temp if non-trivial ; stmtC (CmmAssign nodeReg (tagToClosure tycon amode')) ; performReturn emitReturnInstr } @@ -173,9 +168,7 @@ cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty) performReturn emitReturnInstr | ReturnsPrim rep <- result_info - = do res <- if isFollowableArg (typeCgRep res_ty) - then newPtrTemp (argMachRep (typeCgRep res_ty)) - else newNonPtrTemp (argMachRep (typeCgRep res_ty)) + = do res <- newTemp (typeCmmType res_ty) cgPrimOp [res] primop args emptyVarSet performPrimReturn (primRepToCgRep rep) (CmmReg (CmmLocal res)) @@ -186,9 +179,7 @@ cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty) | ReturnsAlg tycon <- result_info, isEnumerationTyCon tycon -- c.f. cgExpr (...TagToEnumOp...) - = do tag_reg <- if isFollowableArg (typeCgRep res_ty) - then newPtrTemp wordRep - else newNonPtrTemp wordRep + = do tag_reg <- newTemp bWord -- The tag is a word cgPrimOp [tag_reg] primop args emptyVarSet stmtC (CmmAssign nodeReg (tagToClosure tycon @@ -455,16 +446,14 @@ cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder Little helper for primitives that return unboxed tuples. \begin{code} -newUnboxedTupleRegs :: Type -> FCode ([CgRep], [LocalReg], [MachHint]) +newUnboxedTupleRegs :: Type -> FCode ([CgRep], [LocalReg], [ForeignHint]) newUnboxedTupleRegs res_ty = let ty_args = tyConAppArgs (repType res_ty) - (reps,hints) = unzip [ (rep, typeHint ty) | ty <- ty_args, + (reps,hints) = unzip [ (rep, typeForeignHint ty) | ty <- ty_args, let rep = typeCgRep ty, nonVoidArg rep ] - make_new_temp rep = if isFollowableArg rep - then newPtrTemp (argMachRep rep) - else newNonPtrTemp (argMachRep rep) + make_new_temp rep = newTemp (argMachRep rep) in do regs <- mapM make_new_temp reps return (reps,regs,hints) diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs index b3d779e..6e33806 100644 --- a/compiler/codeGen/CgForeignCall.hs +++ b/compiler/codeGen/CgForeignCall.hs @@ -34,7 +34,6 @@ import TysPrim import CLabel import Cmm import CmmUtils -import MachOp import SMRep import ForeignCall import ClosureInfo @@ -49,7 +48,7 @@ import Control.Monad -- Code generation for Foreign Calls cgForeignCall - :: CmmFormals -- where to put the results + :: HintedCmmFormals -- where to put the results -> ForeignCall -- the op -> [StgArg] -- arguments -> StgLiveVars -- live vars, in case we need to save them @@ -63,16 +62,16 @@ cgForeignCall results fcall stg_args live | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, nonVoidArg rep] - arg_hints = zipWith CmmKinded - arg_exprs (map (typeHint.stgArgType) stg_args) + arg_hints = zipWith CmmHinted + arg_exprs (map (typeForeignHint.stgArgType) stg_args) -- in emitForeignCall results fcall arg_hints live emitForeignCall - :: CmmFormals -- where to put the results + :: HintedCmmFormals -- where to put the results -> ForeignCall -- the op - -> [CmmKinded CmmExpr] -- arguments + -> [CmmHinted CmmExpr] -- arguments -> StgLiveVars -- live vars, in case we need to save them -> Code @@ -86,18 +85,18 @@ emitForeignCall results (CCall (CCallSpec target cconv safety)) args live = case target of StaticTarget lbl -> (args, CmmLit (CmmLabel (mkForeignLabel lbl call_size False))) - DynamicTarget -> case args of (CmmKinded fn _):rest -> (rest, fn) + DynamicTarget -> case args of (CmmHinted fn _):rest -> (rest, fn) -- in the stdcall calling convention, the symbol needs @size appended -- to it, where size is the total number of bytes of arguments. We -- attach this info to the CLabel here, and the CLabel pretty printer -- will generate the suffix when the label is printed. call_size - | StdCallConv <- cconv = Just (sum (map (arg_size.cmmExprRep.kindlessCmm) args)) + | StdCallConv <- cconv = Just (sum (map (arg_size.cmmExprType.hintlessCmm) args)) | otherwise = Nothing -- ToDo: this might not be correct for 64-bit API - arg_size rep = max (machRepByteWidth rep) wORD_SIZE + arg_size rep = max (widthInBytes (typeWidth rep)) wORD_SIZE emitForeignCall _ (DNCall _) _ _ = panic "emitForeignCall: DNCall" @@ -106,9 +105,9 @@ emitForeignCall _ (DNCall _) _ _ -- alternative entry point, used by CmmParse emitForeignCall' :: Safety - -> CmmFormals -- where to put the results + -> HintedCmmFormals -- where to put the results -> CmmCallTarget -- the op - -> [CmmKinded CmmExpr] -- arguments + -> [CmmHinted CmmExpr] -- arguments -> Maybe [GlobalReg] -- live vars, in case we need to save them -> C_SRT -- the SRT of the calls continuation -> CmmReturnInfo @@ -124,8 +123,8 @@ emitForeignCall' safety results target args vols srt ret | otherwise = do -- Both 'id' and 'new_base' are GCKindNonPtr because they're -- RTS only objects and are not subject to garbage collection - id <- newNonPtrTemp wordRep - new_base <- newNonPtrTemp (cmmRegRep (CmmGlobal BaseReg)) + id <- newTemp bWord + new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) temp_args <- load_args_into_temps args temp_target <- load_target_into_temp target let (caller_save, caller_load) = callerSaveVolatileRegs vols @@ -134,16 +133,16 @@ emitForeignCall' safety results target args vols srt ret -- The CmmUnsafe arguments are only correct because this part -- of the code hasn't been moved into the CPS pass yet. -- Once that happens, this function will just emit a (CmmSafe srt) call, - -- and the CPS will will be the one to convert that + -- and the CPS will be the one to convert that -- to this sequence of three CmmUnsafe calls. stmtC (CmmCall (CmmCallee suspendThread CCallConv) - [ CmmKinded id PtrHint ] - [ CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint ] + [ CmmHinted id AddrHint ] + [ CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint ] CmmUnsafe ret) stmtC (CmmCall temp_target results temp_args CmmUnsafe ret) stmtC (CmmCall (CmmCallee resumeThread CCallConv) - [ CmmKinded new_base PtrHint ] - [ CmmKinded (CmmReg (CmmLocal id)) PtrHint ] + [ CmmHinted new_base AddrHint ] + [ CmmHinted (CmmReg (CmmLocal id)) AddrHint ] CmmUnsafe ret) -- Assign the result to BaseReg: we -- might now have a different Capability! @@ -163,9 +162,9 @@ resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "resumeThread"))) -- This is a HACK; really it should be done in the back end, but -- it's easier to generate the temporaries here. load_args_into_temps = mapM arg_assign_temp - where arg_assign_temp (CmmKinded e hint) = do + where arg_assign_temp (CmmHinted e hint) = do tmp <- maybe_assign_temp e - return (CmmKinded tmp hint) + return (CmmHinted tmp hint) load_target_into_temp (CmmCallee expr conv) = do tmp <- maybe_assign_temp expr @@ -179,7 +178,7 @@ maybe_assign_temp e -- don't use assignTemp, it uses its own notion of "trivial" -- expressions, which are wrong here. -- this is a NonPtr because it only duplicates an existing - reg <- newNonPtrTemp (cmmExprRep e) --TODO FIXME NOW + reg <- newTemp (cmmExprType e) --TODO FIXME NOW stmtC (CmmAssign (CmmLocal reg) e) return (CmmReg (CmmLocal reg)) @@ -201,13 +200,13 @@ emitSaveThreadState = do emitCloseNursery = stmtC $ CmmStore nursery_bdescr_free (cmmOffsetW stgHp 1) emitLoadThreadState = do - tso <- newNonPtrTemp wordRep -- TODO FIXME NOW + tso <- newTemp bWord -- TODO FIXME NOW stmtsC [ -- tso = CurrentTSO; CmmAssign (CmmLocal tso) stgCurrentTSO, -- Sp = tso->sp; CmmAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_SP) - wordRep), + bWord), -- SpLim = tso->stack + RESERVED_STACK_WORDS; CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal tso)) tso_STACK) rESERVED_STACK_WORDS) @@ -216,21 +215,21 @@ emitLoadThreadState = do -- and load the current cost centre stack from the TSO when profiling: when opt_SccProfilingOn $ stmtC (CmmStore curCCSAddr - (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) wordRep)) + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord)) emitOpenNursery = stmtsC [ -- Hp = CurrentNursery->free - 1; - CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free wordRep) (-1)), + CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free gcWord) (-1)), -- HpLim = CurrentNursery->start + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; CmmAssign hpLim (cmmOffsetExpr - (CmmLoad nursery_bdescr_start wordRep) + (CmmLoad nursery_bdescr_start bWord) (cmmOffset (CmmMachOp mo_wordMul [ - CmmMachOp (MO_S_Conv I32 wordRep) - [CmmLoad nursery_bdescr_blocks I32], + CmmMachOp (MO_SS_Conv W32 wordWidth) + [CmmLoad nursery_bdescr_blocks b32], CmmLit (mkIntCLit bLOCK_SIZE) ]) (-1) diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index 66d41d3..2529891 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -42,9 +42,9 @@ import ClosureInfo import SMRep import Cmm -import MachOp import CmmUtils import Id +import IdInfo import DataCon import TyCon import CostCentre @@ -191,7 +191,7 @@ mkStaticClosureFields cl_info ccs caf_refs payload = mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field where - info_lbl = infoTableLabelFromCI cl_info + info_lbl = infoTableLabelFromCI cl_info $ clHasCafRefs cl_info -- CAFs must have consistent layout, regardless of whether they -- are actually updatable or not. The layout of a CAF is: @@ -226,7 +226,6 @@ mkStaticClosureFields cl_info ccs caf_refs payload | caf_refs = mkIntCLit 0 | otherwise = mkIntCLit 1 - mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field @@ -245,14 +244,14 @@ mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_fi padLitToWord :: CmmLit -> [CmmLit] padLitToWord lit = lit : padding pad_length - where rep = cmmLitRep lit - pad_length = wORD_SIZE - machRepByteWidth rep :: Int + where width = typeWidth (cmmLitType lit) + pad_length = wORD_SIZE - widthInBytes width :: Int padding n | n <= 0 = [] - | n `rem` 2 /= 0 = CmmInt 0 I8 : padding (n-1) - | n `rem` 4 /= 0 = CmmInt 0 I16 : padding (n-2) - | n `rem` 8 /= 0 = CmmInt 0 I32 : padding (n-4) - | otherwise = CmmInt 0 I64 : padding (n-8) + | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1) + | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2) + | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4) + | otherwise = CmmInt 0 W64 : padding (n-8) \end{code} %************************************************************************ @@ -309,7 +308,7 @@ hpStkCheck cl_info is_fun reg_save_code code -- Strictly speaking, we should tag node here. But if -- node doesn't point to the closure, the code for the closure -- cannot depend on the value of R1 anyway, so we're safe. - closure_lbl = closureLabelFromCI cl_info + closure_lbl = closureLabelFromCI cl_info (clHasCafRefs cl_info) full_save_code = node_asst `plusStmts` reg_save_code @@ -410,7 +409,7 @@ unbxTupleHeapCheck regs ptrs nptrs fail_code code ; code } where full_fail_code = fail_code `plusStmts` oneStmt assign_liveness - assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho! + assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9 VNonGcPtr)) -- Ho ho ho! (CmmLit (mkWordCLit liveness)) liveness = mkRegLiveness regs ptrs nptrs rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut"))) @@ -495,10 +494,8 @@ hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code hpChkGen bytes liveness reentry = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen where - assigns = mkStmts [ - CmmAssign (CmmGlobal (VanillaReg 9)) liveness, - CmmAssign (CmmGlobal (VanillaReg 10)) reentry - ] + assigns = mkStmts [ mk_vanilla_assignment 9 liveness, + mk_vanilla_assignment 10 reentry ] -- a heap check where R1 points to the closure to enter on return, and -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP). @@ -511,10 +508,12 @@ stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code stkChkGen bytes liveness reentry = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen where - assigns = mkStmts [ - CmmAssign (CmmGlobal (VanillaReg 9)) liveness, - CmmAssign (CmmGlobal (VanillaReg 10)) reentry - ] + assigns = mkStmts [ mk_vanilla_assignment 9 liveness, + mk_vanilla_assignment 10 reentry ] + +mk_vanilla_assignment :: Int -> CmmExpr -> CmmStmt +mk_vanilla_assignment n e + = CmmAssign (CmmGlobal (VanillaReg n (vgcFlag (cmmExprType e)))) e stkChkNodePoints :: CmmExpr -> Code stkChkNodePoints bytes @@ -554,7 +553,8 @@ allocDynClosure cl_info use_cc blame_cc amodes_with_offsets -- Remember, virtHp points to last allocated word, -- ie 1 *before* the info-ptr word of new object. - info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info)) + info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info + (clHasCafRefs cl_info))) hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..] -- SAY WHAT WE ARE ABOUT TO DO diff --git a/compiler/codeGen/CgHpc.hs b/compiler/codeGen/CgHpc.hs index 0d0fdb1..768a307 100644 --- a/compiler/codeGen/CgHpc.hs +++ b/compiler/codeGen/CgHpc.hs @@ -18,7 +18,6 @@ module CgHpc (cgTickBox, initHpc, hpcTable) where import Cmm import CLabel import Module -import MachOp import CmmUtils import CgUtils import CgMonad @@ -35,14 +34,14 @@ import Data.Word cgTickBox :: Module -> Int -> Code cgTickBox mod n = do - let tick_box = (cmmIndex I64 + let tick_box = (cmmIndex W64 (CmmLit $ CmmLabel $ mkHpcTicksLabel $ mod) (fromIntegral n) ) stmtsC [ CmmStore tick_box - (CmmMachOp (MO_Add I64) - [ CmmLoad tick_box I64 - , CmmLit (CmmInt 1 I64) + (CmmMachOp (MO_Add W64) + [ CmmLoad tick_box b64 + , CmmLit (CmmInt 1 W64) ]) ] @@ -56,7 +55,7 @@ hpcTable this_mod (HpcInfo hpc_tickCount _) = do ] emitData Data $ [ CmmDataLabel (mkHpcTicksLabel this_mod) ] ++ - [ CmmStaticLit (CmmInt 0 I64) + [ CmmStaticLit (CmmInt 0 W64) | _ <- take hpc_tickCount [0::Int ..] ] where @@ -70,24 +69,24 @@ hpcTable this_mod (NoHpcInfo {}) = error "TODO: impossible" initHpc :: Module -> HpcInfo -> Code initHpc this_mod (HpcInfo tickCount hashNo) - = do { id <- newNonPtrTemp wordRep -- TODO FIXME NOW + = do { id <- newTemp bWord ; emitForeignCall' PlayRisky - [CmmKinded id NoHint] + [CmmHinted id NoHint] (CmmCallee (CmmLit $ CmmLabel $ mkForeignLabel mod_alloc Nothing False) CCallConv ) - [ CmmKinded (mkLblExpr mkHpcModuleNameLabel) PtrHint - , CmmKinded (word32 tickCount) NoHint - , CmmKinded (word32 hashNo) NoHint - , CmmKinded (CmmLit $ CmmLabel $ mkHpcTicksLabel $ this_mod) PtrHint + [ CmmHinted (mkLblExpr mkHpcModuleNameLabel) AddrHint + , CmmHinted (word32 tickCount) NoHint + , CmmHinted (word32 hashNo) NoHint + , CmmHinted (CmmLit $ CmmLabel $ mkHpcTicksLabel $ this_mod) AddrHint ] (Just []) NoC_SRT -- No SRT b/c we PlayRisky CmmMayReturn } where - word32 i = CmmLit (CmmInt (fromIntegral (fromIntegral i :: Word32)) I32) + word32 i = CmmLit (CmmInt (fromIntegral (fromIntegral i :: Word32)) W32) mod_alloc = mkFastString "hs_hpc_module" diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 14004ce..9fbe4fb 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -40,7 +40,6 @@ import CgMonad import CmmUtils import Cmm -import MachOp import CLabel import StgSyn import Name @@ -64,13 +63,13 @@ import Outputable -- representation as a list of 'CmmAddr' is handled later -- in the pipeline by 'cmmToRawCmm'. -emitClosureCodeAndInfoTable :: ClosureInfo -> CmmFormalsWithoutKinds -> CgStmts -> Code +emitClosureCodeAndInfoTable :: ClosureInfo -> CmmFormals -> CgStmts -> Code emitClosureCodeAndInfoTable cl_info args body = do { blks <- cgStmtsToBlocks body ; info <- mkCmmInfo cl_info ; emitInfoTableAndCode (infoLblToEntryLbl info_lbl) info args blks } where - info_lbl = infoTableLabelFromCI cl_info + info_lbl = infoTableLabelFromCI cl_info $ clHasCafRefs cl_info -- We keep the *zero-indexed* tag in the srt_len field of the info -- table of a data constructor. @@ -107,17 +106,17 @@ mkCmmInfo cl_info = do LFReEntrant _ arity _ arg_descr -> FunInfo (ptrs, nptrs) srt - (argDescrType arg_descr) (fromIntegral arity) arg_descr - (CmmLabel (mkSlowEntryLabel name)) + (CmmLabel (mkSlowEntryLabel name has_caf_refs)) LFThunk _ _ _ (SelectorThunk offset) _ -> ThunkSelectorInfo (fromIntegral offset) srt LFThunk _ _ _ _ _ -> ThunkInfo (ptrs, nptrs) srt _ -> panic "unexpected lambda form in mkCmmInfo" where - info_lbl = infoTableLabelFromCI cl_info + info_lbl = infoTableLabelFromCI cl_info has_caf_refs + has_caf_refs = clHasCafRefs cl_info cl_type = smRepClosureTypeInt (closureSMRep cl_info) @@ -235,12 +234,9 @@ stack_layout ((off, bind):binds) sizeW | off == sizeW - 1 = (Just stack_bind) : (stack_layout binds (sizeW - rep_size)) where rep_size = cgRepSizeW (cgIdInfoArgRep bind) - stack_bind = LocalReg unique machRep kind + stack_bind = LocalReg unique machRep unique = getUnique (cgIdInfoId bind) machRep = argMachRep (cgIdInfoArgRep bind) - kind = if isFollowableArg (cgIdInfoArgRep bind) - then GCKindPtr - else GCKindNonPtr stack_layout binds@((off, _):_) sizeW | otherwise = Nothing : (stack_layout binds (sizeW - 1)) @@ -344,13 +340,13 @@ stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE closureInfoPtr :: CmmExpr -> CmmExpr -- Takes a closure pointer and returns the info table pointer -closureInfoPtr e = CmmLoad e wordRep +closureInfoPtr e = CmmLoad e bWord entryCode :: CmmExpr -> CmmExpr -- Takes an info pointer (the first word of a closure) -- and returns its entry code entryCode e | tablesNextToCode = e - | otherwise = CmmLoad e wordRep + | otherwise = CmmLoad e bWord getConstrTag :: CmmExpr -> CmmExpr -- Takes a closure pointer, and return the *zero-indexed* @@ -358,7 +354,7 @@ getConstrTag :: CmmExpr -> CmmExpr -- This lives in the SRT field of the info table -- (constructors don't need SRTs). getConstrTag closure_ptr - = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableConstrTag info_table] + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableConstrTag info_table] where info_table = infoTable (closureInfoPtr closure_ptr) @@ -366,7 +362,7 @@ cmmGetClosureType :: CmmExpr -> CmmExpr -- Takes a closure pointer, and return the closure type -- obtained from the info table cmmGetClosureType closure_ptr - = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableClosureType info_table] + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableClosureType info_table] where info_table = infoTable (closureInfoPtr closure_ptr) @@ -387,21 +383,21 @@ infoTableSrtBitmap :: CmmExpr -> CmmExpr -- Takes an info table pointer (from infoTable) and returns the srt_bitmap -- field of the info table infoTableSrtBitmap info_tbl - = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) bHalfWord infoTableClosureType :: CmmExpr -> CmmExpr -- Takes an info table pointer (from infoTable) and returns the closure type -- field of the info table. infoTableClosureType info_tbl - = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) bHalfWord infoTablePtrs :: CmmExpr -> CmmExpr infoTablePtrs info_tbl - = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) bHalfWord infoTableNonPtrs :: CmmExpr -> CmmExpr infoTableNonPtrs info_tbl - = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) bHalfWord funInfoTable :: CmmExpr -> CmmExpr -- Takes the info pointer of a function, @@ -427,7 +423,7 @@ funInfoTable info_ptr emitInfoTableAndCode :: CLabel -- Label of entry or ret -> CmmInfo -- ...the info table - -> CmmFormalsWithoutKinds -- ...args + -> CmmFormals -- ...args -> [CmmBasicBlock] -- ...and body -> Code diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs index 51c07b2..e624f4b 100644 --- a/compiler/codeGen/CgMonad.lhs +++ b/compiler/codeGen/CgMonad.lhs @@ -74,6 +74,7 @@ import BlockId import Cmm import CmmUtils import CLabel +import PprCmm import StgSyn (SRT) import SMRep import Module @@ -746,7 +747,7 @@ emitData sect lits where data_block = CmmData sect lits -emitProc :: CmmInfo -> CLabel -> CmmFormalsWithoutKinds -> [CmmBasicBlock] -> Code +emitProc :: CmmInfo -> CLabel -> CmmFormals -> [CmmBasicBlock] -> Code emitProc info lbl args blocks = do { let proc_block = CmmProc info lbl args (ListGraph blocks) ; state <- getState @@ -767,7 +768,8 @@ getCmm code = do { state1 <- getState ; ((), state2) <- withState code (state1 { cgs_tops = nilOL }) ; setState $ state2 { cgs_tops = cgs_tops state1 } - ; return (Cmm (fromOL (cgs_tops state2))) } + ; return (Cmm (fromOL (cgs_tops state2))) + } -- ---------------------------------------------------------------------------- -- CgStmts diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index 85a4151..05e45b5 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -28,7 +28,6 @@ import CgUtils import Cmm import CLabel import CmmUtils -import MachOp import PrimOp import SMRep import Constants @@ -38,7 +37,7 @@ import FastString -- --------------------------------------------------------------------------- -- Code generation for PrimOps -cgPrimOp :: CmmFormalsWithoutKinds -- where to put the results +cgPrimOp :: CmmFormals -- where to put the results -> PrimOp -- the op -> [StgArg] -- arguments -> StgLiveVars -- live vars, in case we need to save them @@ -50,7 +49,7 @@ cgPrimOp results op args live emitPrimOp results op non_void_args live -emitPrimOp :: CmmFormalsWithoutKinds -- where to put the results +emitPrimOp :: CmmFormals -- where to put the results -> PrimOp -- the op -> [CmmExpr] -- arguments -> StgLiveVars -- live vars, in case we need to save them @@ -122,10 +121,10 @@ emitPrimOp [res] ParOp [arg] live -- later, we might want to inline it. vols <- getVolatileRegs live emitForeignCall' PlayRisky - [CmmKinded res NoHint] + [CmmHinted res NoHint] (CmmCallee newspark CCallConv) - [ (CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint) - , (CmmKinded arg PtrHint) ] + [ (CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint) + , (CmmHinted arg AddrHint) ] (Just vols) NoC_SRT -- No SRT b/c we do PlayRisky CmmMayReturn @@ -133,7 +132,7 @@ emitPrimOp [res] ParOp [arg] live newspark = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "newSpark"))) emitPrimOp [res] ReadMutVarOp [mutv] live - = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize)) + = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) emitPrimOp [] WriteMutVarOp [mutv,var] live = do @@ -143,8 +142,8 @@ emitPrimOp [] WriteMutVarOp [mutv,var] live [{-no results-}] (CmmCallee (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) CCallConv) - [ (CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint) - , (CmmKinded mutv PtrHint) ] + [ (CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint) + , (CmmHinted mutv AddrHint) ] (Just vols) NoC_SRT -- No SRT b/c we do PlayRisky CmmMayReturn @@ -154,7 +153,7 @@ emitPrimOp [] WriteMutVarOp [mutv,var] live emitPrimOp [res] SizeofByteArrayOp [arg] live = stmtC $ CmmAssign (CmmLocal res) (CmmMachOp mo_wordMul [ - cmmLoadIndexW arg fixedHdrSize, + cmmLoadIndexW arg fixedHdrSize bWord, CmmLit (mkIntCLit wORD_SIZE) ]) @@ -174,14 +173,14 @@ emitPrimOp [res] ByteArrayContents_Char [arg] live -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) emitPrimOp [res] StableNameToIntOp [arg] live - = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize)) + = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)) -- #define eqStableNamezh(r,sn1,sn2) \ -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) emitPrimOp [res] EqStableNameOp [arg1,arg2] live = stmtC (CmmAssign (CmmLocal res) (CmmMachOp mo_wordEq [ - cmmLoadIndexW arg1 fixedHdrSize, - cmmLoadIndexW arg2 fixedHdrSize + cmmLoadIndexW arg1 fixedHdrSize bWord, + cmmLoadIndexW arg2 fixedHdrSize bWord ])) @@ -223,117 +222,117 @@ emitPrimOp [] WriteArrayOp [obj,ix,v] live = doWritePtrArrayOp obj ix v -- IndexXXXoffAddr -emitPrimOp res IndexOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args -emitPrimOp res IndexOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args -emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res IndexOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res IndexOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res IndexOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args -emitPrimOp res IndexOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res IndexOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res IndexOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Word args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Addr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Float args live = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res IndexOffAddrOp_Double args live = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res IndexOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing b64 res args -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. -emitPrimOp res ReadOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args -emitPrimOp res ReadOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args -emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res ReadOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res ReadOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res ReadOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args -emitPrimOp res ReadOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res ReadOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res ReadOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Word args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Addr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Float args live = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res ReadOffAddrOp_Double args live = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res ReadOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing b64 res args -- IndexXXXArray -emitPrimOp res IndexByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args -emitPrimOp res IndexByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args -emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res IndexByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res IndexByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res IndexByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args -emitPrimOp res IndexByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res IndexByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res IndexByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Word args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Addr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Float args live = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res IndexByteArrayOp_Double args live = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res IndexByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing b64 res args -- ReadXXXArray, identical to IndexXXXArray. -emitPrimOp res ReadByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args -emitPrimOp res ReadByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args -emitPrimOp res ReadByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res ReadByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res ReadByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res ReadByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args -emitPrimOp res ReadByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res ReadByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res ReadByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Word args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Addr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Float args live = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res ReadByteArrayOp_Double args live = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res ReadByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res ReadByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing b64 res args -- WriteXXXoffAddr -emitPrimOp res WriteOffAddrOp_Char args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteOffAddrOp_WideChar args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteOffAddrOp_Int args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Word args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Addr args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Float args live = doWriteOffAddrOp Nothing F32 res args -emitPrimOp res WriteOffAddrOp_Double args live = doWriteOffAddrOp Nothing F64 res args -emitPrimOp res WriteOffAddrOp_StablePtr args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Int8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteOffAddrOp_Int16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteOffAddrOp_Int32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteOffAddrOp_Int64 args live = doWriteOffAddrOp Nothing I64 res args -emitPrimOp res WriteOffAddrOp_Word8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteOffAddrOp_Word16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteOffAddrOp_Word32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteOffAddrOp_Word64 args live = doWriteOffAddrOp Nothing I64 res args +emitPrimOp res WriteOffAddrOp_Char args live = doWriteOffAddrOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteOffAddrOp_WideChar args live = doWriteOffAddrOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteOffAddrOp_Int args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Word args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Addr args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Float args live = doWriteOffAddrOp Nothing f32 res args +emitPrimOp res WriteOffAddrOp_Double args live = doWriteOffAddrOp Nothing f64 res args +emitPrimOp res WriteOffAddrOp_StablePtr args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Int8 args live = doWriteOffAddrOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteOffAddrOp_Int16 args live = doWriteOffAddrOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteOffAddrOp_Int32 args live = doWriteOffAddrOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteOffAddrOp_Int64 args live = doWriteOffAddrOp Nothing b64 res args +emitPrimOp res WriteOffAddrOp_Word8 args live = doWriteOffAddrOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteOffAddrOp_Word16 args live = doWriteOffAddrOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteOffAddrOp_Word32 args live = doWriteOffAddrOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteOffAddrOp_Word64 args live = doWriteOffAddrOp Nothing b64 res args -- WriteXXXArray -emitPrimOp res WriteByteArrayOp_Char args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteByteArrayOp_WideChar args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteByteArrayOp_Int args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Word args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Addr args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Float args live = doWriteByteArrayOp Nothing F32 res args -emitPrimOp res WriteByteArrayOp_Double args live = doWriteByteArrayOp Nothing F64 res args -emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Int8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteByteArrayOp_Int16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteByteArrayOp_Int32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteByteArrayOp_Int64 args live = doWriteByteArrayOp Nothing I64 res args -emitPrimOp res WriteByteArrayOp_Word8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteByteArrayOp_Word16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteByteArrayOp_Word32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteByteArrayOp_Word64 args live = doWriteByteArrayOp Nothing I64 res args +emitPrimOp res WriteByteArrayOp_Char args live = doWriteByteArrayOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteByteArrayOp_WideChar args live = doWriteByteArrayOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteByteArrayOp_Int args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Word args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Addr args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Float args live = doWriteByteArrayOp Nothing f32 res args +emitPrimOp res WriteByteArrayOp_Double args live = doWriteByteArrayOp Nothing f64 res args +emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Int8 args live = doWriteByteArrayOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteByteArrayOp_Int16 args live = doWriteByteArrayOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteByteArrayOp_Int32 args live = doWriteByteArrayOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteByteArrayOp_Int64 args live = doWriteByteArrayOp Nothing b64 res args +emitPrimOp res WriteByteArrayOp_Word8 args live = doWriteByteArrayOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteByteArrayOp_Word16 args live = doWriteByteArrayOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteByteArrayOp_Word32 args live = doWriteByteArrayOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteByteArrayOp_Word64 args live = doWriteByteArrayOp Nothing b64 res args -- The rest just translate straightforwardly @@ -342,16 +341,16 @@ emitPrimOp [res] op [arg] live = stmtC (CmmAssign (CmmLocal res) arg) | Just (mop,rep) <- narrowOp op - = stmtC (CmmAssign (CmmLocal res) (CmmMachOp (mop rep wordRep) [ - CmmMachOp (mop wordRep rep) [arg]])) + = stmtC (CmmAssign (CmmLocal res) $ + CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]) emitPrimOp [res] op args live | Just prim <- callishOp op = do vols <- getVolatileRegs live emitForeignCall' PlayRisky - [CmmKinded res NoHint] + [CmmHinted res NoHint] (CmmPrim prim) - [CmmKinded a NoHint | a<-args] -- ToDo: hints? + [CmmHinted a NoHint | a<-args] -- ToDo: hints? (Just vols) NoC_SRT -- No SRT b/c we do PlayRisky CmmMayReturn @@ -376,12 +375,13 @@ nopOp _ = False -- These PrimOps turn into double casts -narrowOp Narrow8IntOp = Just (MO_S_Conv, I8) -narrowOp Narrow16IntOp = Just (MO_S_Conv, I16) -narrowOp Narrow32IntOp = Just (MO_S_Conv, I32) -narrowOp Narrow8WordOp = Just (MO_U_Conv, I8) -narrowOp Narrow16WordOp = Just (MO_U_Conv, I16) -narrowOp Narrow32WordOp = Just (MO_U_Conv, I32) +narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width) +narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8) +narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16) +narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32) +narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8) +narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16) +narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32) narrowOp _ = Nothing -- Native word signless ops @@ -412,7 +412,7 @@ translateOp AddrRemOp = Just mo_wordURem -- Native word signed ops translateOp IntMulOp = Just mo_wordMul -translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordRep) +translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordWidth) translateOp IntQuotOp = Just mo_wordSQuot translateOp IntRemOp = Just mo_wordSRem translateOp IntNegOp = Just mo_wordSNeg @@ -445,53 +445,53 @@ translateOp AddrLtOp = Just mo_wordULt -- Char# ops -translateOp CharEqOp = Just (MO_Eq wordRep) -translateOp CharNeOp = Just (MO_Ne wordRep) -translateOp CharGeOp = Just (MO_U_Ge wordRep) -translateOp CharLeOp = Just (MO_U_Le wordRep) -translateOp CharGtOp = Just (MO_U_Gt wordRep) -translateOp CharLtOp = Just (MO_U_Lt wordRep) +translateOp CharEqOp = Just (MO_Eq wordWidth) +translateOp CharNeOp = Just (MO_Ne wordWidth) +translateOp CharGeOp = Just (MO_U_Ge wordWidth) +translateOp CharLeOp = Just (MO_U_Le wordWidth) +translateOp CharGtOp = Just (MO_U_Gt wordWidth) +translateOp CharLtOp = Just (MO_U_Lt wordWidth) -- Double ops -translateOp DoubleEqOp = Just (MO_Eq F64) -translateOp DoubleNeOp = Just (MO_Ne F64) -translateOp DoubleGeOp = Just (MO_S_Ge F64) -translateOp DoubleLeOp = Just (MO_S_Le F64) -translateOp DoubleGtOp = Just (MO_S_Gt F64) -translateOp DoubleLtOp = Just (MO_S_Lt F64) +translateOp DoubleEqOp = Just (MO_F_Eq W64) +translateOp DoubleNeOp = Just (MO_F_Ne W64) +translateOp DoubleGeOp = Just (MO_F_Ge W64) +translateOp DoubleLeOp = Just (MO_F_Le W64) +translateOp DoubleGtOp = Just (MO_F_Gt W64) +translateOp DoubleLtOp = Just (MO_F_Lt W64) -translateOp DoubleAddOp = Just (MO_Add F64) -translateOp DoubleSubOp = Just (MO_Sub F64) -translateOp DoubleMulOp = Just (MO_Mul F64) -translateOp DoubleDivOp = Just (MO_S_Quot F64) -translateOp DoubleNegOp = Just (MO_S_Neg F64) +translateOp DoubleAddOp = Just (MO_F_Add W64) +translateOp DoubleSubOp = Just (MO_F_Sub W64) +translateOp DoubleMulOp = Just (MO_F_Mul W64) +translateOp DoubleDivOp = Just (MO_F_Quot W64) +translateOp DoubleNegOp = Just (MO_F_Neg W64) -- Float ops -translateOp FloatEqOp = Just (MO_Eq F32) -translateOp FloatNeOp = Just (MO_Ne F32) -translateOp FloatGeOp = Just (MO_S_Ge F32) -translateOp FloatLeOp = Just (MO_S_Le F32) -translateOp FloatGtOp = Just (MO_S_Gt F32) -translateOp FloatLtOp = Just (MO_S_Lt F32) +translateOp FloatEqOp = Just (MO_F_Eq W32) +translateOp FloatNeOp = Just (MO_F_Ne W32) +translateOp FloatGeOp = Just (MO_F_Ge W32) +translateOp FloatLeOp = Just (MO_F_Le W32) +translateOp FloatGtOp = Just (MO_F_Gt W32) +translateOp FloatLtOp = Just (MO_F_Lt W32) -translateOp FloatAddOp = Just (MO_Add F32) -translateOp FloatSubOp = Just (MO_Sub F32) -translateOp FloatMulOp = Just (MO_Mul F32) -translateOp FloatDivOp = Just (MO_S_Quot F32) -translateOp FloatNegOp = Just (MO_S_Neg F32) +translateOp FloatAddOp = Just (MO_F_Add W32) +translateOp FloatSubOp = Just (MO_F_Sub W32) +translateOp FloatMulOp = Just (MO_F_Mul W32) +translateOp FloatDivOp = Just (MO_F_Quot W32) +translateOp FloatNegOp = Just (MO_F_Neg W32) -- Conversions -translateOp Int2DoubleOp = Just (MO_S_Conv wordRep F64) -translateOp Double2IntOp = Just (MO_S_Conv F64 wordRep) +translateOp Int2DoubleOp = Just (MO_SF_Conv wordWidth W64) +translateOp Double2IntOp = Just (MO_FS_Conv W64 wordWidth) -translateOp Int2FloatOp = Just (MO_S_Conv wordRep F32) -translateOp Float2IntOp = Just (MO_S_Conv F32 wordRep) +translateOp Int2FloatOp = Just (MO_SF_Conv wordWidth W32) +translateOp Float2IntOp = Just (MO_FS_Conv W32 wordWidth) -translateOp Float2DoubleOp = Just (MO_S_Conv F32 F64) -translateOp Double2FloatOp = Just (MO_S_Conv F64 F32) +translateOp Float2DoubleOp = Just (MO_FF_Conv W32 W64) +translateOp Double2FloatOp = Just (MO_FF_Conv W64 W32) -- Word comparisons masquerading as more exotic things. @@ -540,6 +540,10 @@ callishOp _ = Nothing ------------------------------------------------------------------------------ -- Helpers for translating various minor variants of array indexing. +-- Bytearrays outside the heap; hence non-pointers +doIndexOffAddrOp, doIndexByteArrayOp + :: Maybe MachOp -> CmmType + -> [LocalReg] -> [CmmExpr] -> Code doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx] = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx doIndexOffAddrOp _ _ _ _ @@ -550,10 +554,14 @@ doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx] doIndexByteArrayOp _ _ _ _ = panic "CgPrimOp: doIndexByteArrayOp" +doReadPtrArrayOp :: LocalReg -> CmmExpr -> CmmExpr -> Code doReadPtrArrayOp res addr idx - = mkBasicIndexedRead arrPtrsHdrSize Nothing wordRep res addr idx + = mkBasicIndexedRead arrPtrsHdrSize Nothing gcWord res addr idx +doWriteOffAddrOp, doWriteByteArrayOp + :: Maybe MachOp -> CmmType + -> [LocalReg] -> [CmmExpr] -> Code doWriteOffAddrOp maybe_pre_write_cast rep [] [addr,idx,val] = mkBasicIndexedWrite 0 maybe_pre_write_cast rep addr idx val doWriteOffAddrOp _ _ _ _ @@ -564,17 +572,22 @@ doWriteByteArrayOp maybe_pre_write_cast rep [] [addr,idx,val] doWriteByteArrayOp _ _ _ _ = panic "CgPrimOp: doWriteByteArrayOp" +doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> Code doWritePtrArrayOp addr idx val = do stmtC (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) - mkBasicIndexedWrite arrPtrsHdrSize Nothing wordRep addr idx val + mkBasicIndexedWrite arrPtrsHdrSize Nothing bWord addr idx val +mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType + -> LocalReg -> CmmExpr -> CmmExpr -> Code mkBasicIndexedRead off Nothing read_rep res base idx = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)) mkBasicIndexedRead off (Just cast) read_rep res base idx = stmtC (CmmAssign (CmmLocal res) (CmmMachOp cast [ cmmLoadIndexOffExpr off read_rep base idx])) +mkBasicIndexedWrite :: ByteOff -> Maybe MachOp -> CmmType + -> CmmExpr -> CmmExpr -> CmmExpr -> Code mkBasicIndexedWrite off Nothing write_rep base idx val = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) val) mkBasicIndexedWrite off (Just cast) write_rep base idx val @@ -583,11 +596,11 @@ mkBasicIndexedWrite off (Just cast) write_rep base idx val -- ---------------------------------------------------------------------------- -- Misc utils -cmmIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmIndexOffExpr :: ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr cmmIndexOffExpr off rep base idx - = cmmIndexExpr rep (cmmOffsetB base off) idx + = cmmIndexExpr (typeWidth rep) (cmmOffsetB base off) idx -cmmLoadIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmLoadIndexOffExpr :: ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr cmmLoadIndexOffExpr off rep base idx = CmmLoad (cmmIndexOffExpr off rep base idx) rep diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index c2a8a1b..c85beb5 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -45,7 +45,6 @@ import CgMonad import SMRep import Cmm -import MachOp import CmmUtils import CLabel @@ -70,7 +69,7 @@ import Control.Monad -- Expression representing the current cost centre stack curCCS :: CmmExpr -curCCS = CmmLoad curCCSAddr wordRep +curCCS = CmmLoad curCCSAddr bWord -- Address of current CCS variable, for storing into curCCSAddr :: CmmExpr @@ -84,7 +83,7 @@ mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) costCentreFrom :: CmmExpr -- A closure pointer -> CmmExpr -- The cost centre from that closure -costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) wordRep +costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) bWord staticProfHdr :: CostCentreStack -> [CmmLit] -- The profiling header words in a static closure @@ -122,13 +121,13 @@ profAlloc words ccs = ifProfiling $ stmtC (addToMemE alloc_rep (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc) - (CmmMachOp (MO_U_Conv wordRep alloc_rep) $ + (CmmMachOp (MO_UU_Conv wordWidth alloc_rep) $ [CmmMachOp mo_wordSub [words, CmmLit (mkIntCLit profHdrSize)]])) -- subtract the "profiling overhead", which is the -- profiling header in a closure. where - alloc_rep = REP_CostCentreStack_mem_alloc + alloc_rep = typeWidth REP_CostCentreStack_mem_alloc -- ---------------------------------------------------------------------- -- Setting the cost centre in a new closure @@ -162,7 +161,7 @@ emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) push_em ccs [] = return ccs push_em ccs (cc:rest) = do - tmp <- newNonPtrTemp wordRep -- TODO FIXME NOW + tmp <- newTemp bWord -- TODO FIXME NOW pushCostCentre tmp ccs cc push_em (CmmReg (CmmLocal tmp)) rest @@ -267,7 +266,7 @@ enterCostCentreThunk closure = ifProfiling $ do stmtC $ CmmStore curCCSAddr (costCentreFrom closure) -enter_ccs_fun stack = emitRtsCall (sLit "EnterFunCCS") [CmmKinded stack PtrHint] False +enter_ccs_fun stack = emitRtsCall (sLit "EnterFunCCS") [CmmHinted stack AddrHint] False -- ToDo: vols enter_ccs_fsub = enteringPAP 0 @@ -280,7 +279,7 @@ enter_ccs_fsub = enteringPAP 0 enteringPAP :: Integer -> Code enteringPAP n = stmtC (CmmStore (CmmLit (CmmLabel (mkRtsDataLabel (sLit "entering_PAP")))) - (CmmLit (CmmInt n cIntRep))) + (CmmLit (CmmInt n cIntWidth))) ifProfiling :: Code -> Code ifProfiling code @@ -340,7 +339,7 @@ emitCostCentreStackDecl ccs | otherwise = pprPanic "emitCostCentreStackDecl" (ppr ccs) zero = mkIntCLit 0 -zero64 = CmmInt 0 I64 +zero64 = CmmInt 0 W64 sizeof_ccs_words :: Int sizeof_ccs_words @@ -359,12 +358,12 @@ sizeof_ccs_words emitRegisterCC :: CostCentre -> Code emitRegisterCC cc = do - { tmp <- newNonPtrTemp cIntRep + { tmp <- newTemp cInt ; stmtsC [ CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_link) - (CmmLoad cC_LIST wordRep), + (CmmLoad cC_LIST bWord), CmmStore cC_LIST cc_lit, - CmmAssign (CmmLocal tmp) (CmmLoad cC_ID cIntRep), + CmmAssign (CmmLocal tmp) (CmmLoad cC_ID cInt), CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg (CmmLocal tmp)), CmmStore cC_ID (cmmRegOffB (CmmLocal tmp) 1) ] @@ -378,12 +377,12 @@ emitRegisterCC cc = do emitRegisterCCS :: CostCentreStack -> Code emitRegisterCCS ccs = do - { tmp <- newNonPtrTemp cIntRep + { tmp <- newTemp cInt ; stmtsC [ CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) - (CmmLoad cCS_LIST wordRep), + (CmmLoad cCS_LIST bWord), CmmStore cCS_LIST ccs_lit, - CmmAssign (CmmLocal tmp) (CmmLoad cCS_ID cIntRep), + CmmAssign (CmmLocal tmp) (CmmLoad cCS_ID cInt), CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg (CmmLocal tmp)), CmmStore cCS_ID (cmmRegOffB (CmmLocal tmp) 1) ] @@ -405,7 +404,7 @@ emitSetCCC :: CostCentre -> Code emitSetCCC cc | not opt_SccProfilingOn = nopC | otherwise = do - tmp <- newNonPtrTemp wordRep -- TODO FIXME NOW + tmp <- newTemp bWord -- TODO FIXME NOW ASSERT( sccAbleCostCentre cc ) pushCostCentre tmp curCCS cc stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp))) @@ -414,14 +413,14 @@ emitSetCCC cc pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code pushCostCentre result ccs cc - = emitRtsCallWithResult result PtrHint - (sLit "PushCostCentre") [CmmKinded ccs PtrHint, - CmmKinded (CmmLit (mkCCostCentre cc)) PtrHint] + = emitRtsCallWithResult result AddrHint + (sLit "PushCostCentre") [CmmHinted ccs AddrHint, + CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint] False bumpSccCount :: CmmExpr -> CmmStmt bumpSccCount ccs - = addToMem REP_CostCentreStack_scc_count + = addToMem (typeWidth REP_CostCentreStack_scc_count) (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1 ----------------------------------------------------------------------------- @@ -475,13 +474,13 @@ ldvEnter cl_ptr where -- don't forget to substract node's tag ldv_wd = ldvWord cl_ptr - new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep) + new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord) (CmmLit (mkWordCLit lDV_CREATE_MASK))) (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE))) loadEra :: CmmExpr -loadEra = CmmMachOp (MO_U_Conv cIntRep wordRep) - [CmmLoad (mkLblExpr (mkRtsDataLabel (sLit "era"))) cIntRep] +loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth) + [CmmLoad (mkLblExpr (mkRtsDataLabel $ sLit("era"))) cInt] ldvWord :: CmmExpr -> CmmExpr -- Takes the address of a closure, and returns diff --git a/compiler/codeGen/CgStackery.lhs b/compiler/codeGen/CgStackery.lhs index b8db38d..d6d9e5c 100644 --- a/compiler/codeGen/CgStackery.lhs +++ b/compiler/codeGen/CgStackery.lhs @@ -274,7 +274,6 @@ to reflect the frame pushed. \begin{code} pushUpdateFrame :: CmmExpr -> Code -> Code - pushUpdateFrame updatee code = do { when debugIsOn $ do diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs index 475196a..4f89099 100644 --- a/compiler/codeGen/CgTailCall.lhs +++ b/compiler/codeGen/CgTailCall.lhs @@ -41,6 +41,7 @@ import Type import Id import StgSyn import PrimOp +import FastString import Outputable import Control.Monad @@ -116,7 +117,7 @@ performTailCall fun_info arg_amodes pending_assts ; EndOfBlockInfo sp _ <- getEndOfBlockInfo ; this_pkg <- getThisPackage - ; case (getCallMethod fun_name lf_info (length arg_amodes)) of + ; case (getCallMethod fun_name fun_has_cafs lf_info (length arg_amodes)) of -- Node must always point to things we enter EnterIt -> do @@ -183,8 +184,10 @@ performTailCall fun_info arg_amodes pending_assts } } where - fun_name = idName (cgIdInfoId fun_info) + fun_id = cgIdInfoId fun_info + fun_name = idName fun_id lf_info = cgIdInfoLF fun_info + fun_has_cafs = idCafInfo fun_id untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg)) -- Test if closure is a constructor maybeSwitchOnCons enterClosure eob diff --git a/compiler/codeGen/CgTicky.hs b/compiler/codeGen/CgTicky.hs index 2494740..b23b34c 100644 --- a/compiler/codeGen/CgTicky.hs +++ b/compiler/codeGen/CgTicky.hs @@ -52,12 +52,12 @@ import CgMonad import SMRep import Cmm -import MachOp import CmmUtils import CLabel import Name import Id +import IdInfo import StaticFlags import BasicTypes import FastString @@ -106,7 +106,7 @@ emitTickyCounter cl_info args on_stk ] } where name = closureName cl_info - ticky_ctr_label = mkRednCountsLabel name + ticky_ctr_label = mkRednCountsLabel name NoCafRefs arg_descr = map (showTypeCategory . idType) args fun_descr mod_name = ppr_for_ticky_name mod_name name @@ -172,13 +172,13 @@ registerTickyCtr ctr_lbl = emitIf test (stmtsC register_stmts) where -- krc: code generator doesn't handle Not, so we test for Eq 0 instead - test = CmmMachOp (MO_Eq wordRep) + test = CmmMachOp (MO_Eq wordWidth) [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl - oFFSET_StgEntCounter_registeredp)) wordRep, + oFFSET_StgEntCounter_registeredp)) bWord, CmmLit (mkIntCLit 0)] register_stmts = [ CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link)) - (CmmLoad ticky_entry_ctrs wordRep) + (CmmLoad ticky_entry_ctrs bWord) , CmmStore ticky_entry_ctrs (mkLblExpr ctr_lbl) , CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_registeredp)) @@ -288,13 +288,13 @@ tickyAllocHeap hp if hp == 0 then [] -- Inside the stmtC to avoid control else [ -- dependency on the argument -- Bump the allcoation count in the StgEntCounter - addToMem REP_StgEntCounter_allocs + addToMem (typeWidth REP_StgEntCounter_allocs) (CmmLit (cmmLabelOffB ticky_ctr oFFSET_StgEntCounter_allocs)) hp, -- Bump ALLOC_HEAP_ctr - addToMemLbl cLongRep (mkRtsDataLabel (sLit "ALLOC_HEAP_ctr")) 1, - -- Bump ALLOC_HEAP_tot - addToMemLbl cLongRep (mkRtsDataLabel (sLit "ALLOC_HEAP_tot")) hp] } + addToMemLbl cLongWidth (mkRtsDataLabel $ sLit "ALLOC_HEAP_ctr") 1, + -- Bump ALLOC_HEAP_tot + addToMemLbl cLongWidth (mkRtsDataLabel $ sLit "ALLOC_HEAP_tot") hp] } -- ----------------------------------------------------------------------------- -- Ticky utils @@ -304,7 +304,7 @@ ifTicky code | opt_DoTickyProfiling = code | otherwise = nopC -addToMemLbl :: MachRep -> CLabel -> Int -> CmmStmt +addToMemLbl :: Width -> CLabel -> Int -> CmmStmt addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n -- All the ticky-ticky counters are declared "unsigned long" in C @@ -313,27 +313,28 @@ bumpTickyCounter lbl = bumpTickyCounter' (cmmLabelOffB (mkRtsDataLabel lbl) 0) bumpTickyCounter' :: CmmLit -> Code -- krc: note that we're incrementing the _entry_count_ field of the ticky counter -bumpTickyCounter' lhs = stmtC (addToMem cLongRep (CmmLit lhs) 1) - -addToMemLong = addToMem cLongRep +bumpTickyCounter' lhs = stmtC (addToMemLong (CmmLit lhs) 1) bumpHistogram :: LitString -> Int -> Code bumpHistogram lbl n --- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongRep)) +-- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLong)) = return () -- TEMP SPJ Apr 07 bumpHistogramE :: LitString -> CmmExpr -> Code bumpHistogramE lbl n - = do t <- newNonPtrTemp cLongRep + = do t <- newTemp cLong stmtC (CmmAssign (CmmLocal t) n) - emitIf (CmmMachOp (MO_U_Le cLongRep) [CmmReg (CmmLocal t), eight]) $ + emitIf (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight]) $ stmtC (CmmAssign (CmmLocal t) eight) - stmtC (addToMemLong (cmmIndexExpr cLongRep + stmtC (addToMemLong (cmmIndexExpr cLongWidth (CmmLit (CmmLabel (mkRtsDataLabel lbl))) (CmmReg (CmmLocal t))) 1) where - eight = CmmLit (CmmInt 8 cLongRep) + eight = CmmLit (CmmInt 8 cLongWidth) + +------------------------------------------------------------------ +addToMemLong = addToMem cLongWidth ------------------------------------------------------------------ -- Showing the "type category" for ticky-ticky profiling diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 4de3537..fd49cb7 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -20,8 +20,7 @@ module CgUtils ( emitRODataLits, mkRODataLits, emitIf, emitIfThenElse, emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, - assignNonPtrTemp, newNonPtrTemp, - assignPtrTemp, newPtrTemp, + assignTemp, newTemp, emitSimultaneously, emitSwitch, emitLitSwitch, tagToClosure, @@ -47,7 +46,7 @@ module CgUtils ( packHalfWordsCLit, blankWord, - getSRTInfo + getSRTInfo, clHasCafRefs ) where #include "HsVersions.h" @@ -58,13 +57,13 @@ import CgMonad import TyCon import DataCon import Id +import IdInfo import Constants import SMRep import PprCmm ( {- instances -} ) import Cmm import CLabel import CmmUtils -import MachOp import ForeignCall import ClosureInfo import StgSyn (SRT(..)) @@ -103,24 +102,24 @@ cgLit (MachStr s) = mkByteStringCLit (bytesFS s) cgLit other_lit = return (mkSimpleLit other_lit) mkSimpleLit :: Literal -> CmmLit -mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordRep +mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth mkSimpleLit MachNullAddr = zeroCLit -mkSimpleLit (MachInt i) = CmmInt i wordRep -mkSimpleLit (MachInt64 i) = CmmInt i I64 -mkSimpleLit (MachWord i) = CmmInt i wordRep -mkSimpleLit (MachWord64 i) = CmmInt i I64 -mkSimpleLit (MachFloat r) = CmmFloat r F32 -mkSimpleLit (MachDouble r) = CmmFloat r F64 +mkSimpleLit (MachInt i) = CmmInt i wordWidth +mkSimpleLit (MachInt64 i) = CmmInt i W64 +mkSimpleLit (MachWord i) = CmmInt i wordWidth +mkSimpleLit (MachWord64 i) = CmmInt i W64 +mkSimpleLit (MachFloat r) = CmmFloat r W32 +mkSimpleLit (MachDouble r) = CmmFloat r W64 mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn) where is_dyn = False -- ToDo: fix me mkLtOp :: Literal -> MachOp -- On signed literals we must do a signed comparison -mkLtOp (MachInt _) = MO_S_Lt wordRep -mkLtOp (MachFloat _) = MO_S_Lt F32 -mkLtOp (MachDouble _) = MO_S_Lt F64 -mkLtOp lit = MO_U_Lt (cmmLitRep (mkSimpleLit lit)) +mkLtOp (MachInt _) = MO_S_Lt wordWidth +mkLtOp (MachFloat _) = MO_F_Lt W32 +mkLtOp (MachDouble _) = MO_F_Lt W64 +mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit))) --------------------------------------------------- @@ -151,7 +150,7 @@ cmmOffsetLitB = cmmOffsetLit cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr -- The second arg is a *word* offset; need to change it to bytes cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n) -cmmOffsetExprW e wd_off = cmmIndexExpr wordRep e wd_off +cmmOffsetExprW e wd_off = cmmIndexExpr wordWidth e wd_off cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n) @@ -165,9 +164,8 @@ cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off) cmmLabelOffW :: CLabel -> WordOff -> CmmLit cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off) -cmmLoadIndexW :: CmmExpr -> Int -> CmmExpr -cmmLoadIndexW base off - = CmmLoad (cmmOffsetW base off) wordRep +cmmLoadIndexW :: CmmExpr -> Int -> CmmType -> CmmExpr +cmmLoadIndexW base off ty = CmmLoad (cmmOffsetW base off) ty ----------------------- cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord :: CmmExpr -> CmmExpr -> CmmExpr @@ -184,7 +182,7 @@ cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2] cmmNegate :: CmmExpr -> CmmExpr cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) -cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e] +cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprWidth e)) [e] blankWord :: CmmStatic blankWord = CmmUninitialised wORD_SIZE @@ -244,7 +242,7 @@ dataConTagZ con = dataConTag con - fIRST_TAG -- Making literals mkWordCLit :: StgWord -> CmmLit -mkWordCLit wd = CmmInt (fromIntegral wd) wordRep +mkWordCLit wd = CmmInt (fromIntegral wd) wordWidth packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit -- Make a single word literal in which the lower_half_word is @@ -267,18 +265,18 @@ packHalfWordsCLit lower_half_word upper_half_word -- -------------------------------------------------------------------------- -addToMem :: MachRep -- rep of the counter +addToMem :: Width -- rep of the counter -> CmmExpr -- Address -> Int -- What to add (a word) -> CmmStmt -addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) rep)) +addToMem width ptr n = addToMemE width ptr (CmmLit (CmmInt (toInteger n) width)) -addToMemE :: MachRep -- rep of the counter +addToMemE :: Width -- rep of the counter -> CmmExpr -- Address -> CmmExpr -- What to add (a word-typed expression) -> CmmStmt -addToMemE rep ptr n - = CmmStore ptr (CmmMachOp (MO_Add rep) [CmmLoad ptr rep, n]) +addToMemE width ptr n + = CmmStore ptr (CmmMachOp (MO_Add width) [CmmLoad ptr (cmmBits width), n]) ------------------------------------------------------------------------- -- @@ -289,9 +287,9 @@ addToMemE rep ptr n tagToClosure :: TyCon -> CmmExpr -> CmmExpr tagToClosure tycon tag - = CmmLoad (cmmOffsetExprW closure_tbl tag) wordRep + = CmmLoad (cmmOffsetExprW closure_tbl tag) gcWord where closure_tbl = CmmLit (CmmLabel lbl) - lbl = mkClosureTableLabel (tyConName tycon) + lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs ------------------------------------------------------------------------- -- @@ -334,24 +332,24 @@ emitIfThenElse cond then_part else_part ; labelC join_id } -emitRtsCall :: LitString -> [CmmKinded CmmExpr] -> Bool -> Code +emitRtsCall :: LitString -> [CmmHinted CmmExpr] -> Bool -> Code emitRtsCall fun args safe = emitRtsCall' [] fun args Nothing safe -- The 'Nothing' says "save all global registers" -emitRtsCallWithVols :: LitString -> [CmmKinded CmmExpr] -> [GlobalReg] -> Bool -> Code +emitRtsCallWithVols :: LitString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Bool -> Code emitRtsCallWithVols fun args vols safe = emitRtsCall' [] fun args (Just vols) safe -emitRtsCallWithResult :: LocalReg -> MachHint -> LitString - -> [CmmKinded CmmExpr] -> Bool -> Code +emitRtsCallWithResult :: LocalReg -> ForeignHint -> LitString + -> [CmmHinted CmmExpr] -> Bool -> Code emitRtsCallWithResult res hint fun args safe - = emitRtsCall' [CmmKinded res hint] fun args Nothing safe + = emitRtsCall' [CmmHinted res hint] fun args Nothing safe -- Make a call to an RTS C procedure emitRtsCall' - :: CmmFormals + :: [CmmHinted LocalReg] -> LitString - -> [CmmKinded CmmExpr] + -> [CmmHinted CmmExpr] -> Maybe [GlobalReg] -> Bool -- True <=> CmmSafe call -> Code @@ -393,7 +391,8 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) vol_list = case vols of Nothing -> all_of_em; Just regs -> regs - all_of_em = [ VanillaReg n | n <- [0..mAX_Vanilla_REG] ] + all_of_em = [ VanillaReg n VNonGcPtr | n <- [0..mAX_Vanilla_REG] ] + -- The VNonGcPtr is a lie, but I don't think it matters ++ [ FloatReg n | n <- [0..mAX_Float_REG] ] ++ [ DoubleReg n | n <- [0..mAX_Double_REG] ] ++ [ LongReg n | n <- [0..mAX_Long_REG] ] @@ -407,7 +406,7 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) callerRestoreGlobalReg reg next | callerSaves reg = CmmAssign (CmmGlobal reg) - (CmmLoad (get_GlobalReg_addr reg) (globalRegRep reg)) + (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg)) : next | otherwise = next @@ -423,14 +422,14 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) get_GlobalReg_addr :: GlobalReg -> CmmExpr get_GlobalReg_addr BaseReg = regTableOffset 0 get_GlobalReg_addr mid = get_Regtable_addr_from_offset - (globalRegRep mid) (baseRegOffset mid) + (globalRegType mid) (baseRegOffset mid) -- Calculate a literal representing an offset into the register table. -- Used when we don't have an actual BaseReg to offset from. regTableOffset n = CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n)) -get_Regtable_addr_from_offset :: MachRep -> Int -> CmmExpr +get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr get_Regtable_addr_from_offset rep offset = #ifdef REG_Base CmmRegOff (CmmGlobal BaseReg) offset @@ -448,28 +447,28 @@ callerSaves :: GlobalReg -> Bool callerSaves BaseReg = True #endif #ifdef CALLER_SAVES_R1 -callerSaves (VanillaReg 1) = True +callerSaves (VanillaReg 1 _) = True #endif #ifdef CALLER_SAVES_R2 -callerSaves (VanillaReg 2) = True +callerSaves (VanillaReg 2 _) = True #endif #ifdef CALLER_SAVES_R3 -callerSaves (VanillaReg 3) = True +callerSaves (VanillaReg 3 _) = True #endif #ifdef CALLER_SAVES_R4 -callerSaves (VanillaReg 4) = True +callerSaves (VanillaReg 4 _) = True #endif #ifdef CALLER_SAVES_R5 -callerSaves (VanillaReg 5) = True +callerSaves (VanillaReg 5 _) = True #endif #ifdef CALLER_SAVES_R6 -callerSaves (VanillaReg 6) = True +callerSaves (VanillaReg 6 _) = True #endif #ifdef CALLER_SAVES_R7 -callerSaves (VanillaReg 7) = True +callerSaves (VanillaReg 7 _) = True #endif #ifdef CALLER_SAVES_R8 -callerSaves (VanillaReg 8) = True +callerSaves (VanillaReg 8 _) = True #endif #ifdef CALLER_SAVES_F1 callerSaves (FloatReg 1) = True @@ -518,16 +517,16 @@ callerSaves _ = False baseRegOffset :: GlobalReg -> Int -baseRegOffset (VanillaReg 1) = oFFSET_StgRegTable_rR1 -baseRegOffset (VanillaReg 2) = oFFSET_StgRegTable_rR2 -baseRegOffset (VanillaReg 3) = oFFSET_StgRegTable_rR3 -baseRegOffset (VanillaReg 4) = oFFSET_StgRegTable_rR4 -baseRegOffset (VanillaReg 5) = oFFSET_StgRegTable_rR5 -baseRegOffset (VanillaReg 6) = oFFSET_StgRegTable_rR6 -baseRegOffset (VanillaReg 7) = oFFSET_StgRegTable_rR7 -baseRegOffset (VanillaReg 8) = oFFSET_StgRegTable_rR8 -baseRegOffset (VanillaReg 9) = oFFSET_StgRegTable_rR9 -baseRegOffset (VanillaReg 10) = oFFSET_StgRegTable_rR10 +baseRegOffset (VanillaReg 1 _) = oFFSET_StgRegTable_rR1 +baseRegOffset (VanillaReg 2 _) = oFFSET_StgRegTable_rR2 +baseRegOffset (VanillaReg 3 _) = oFFSET_StgRegTable_rR3 +baseRegOffset (VanillaReg 4 _) = oFFSET_StgRegTable_rR4 +baseRegOffset (VanillaReg 5 _) = oFFSET_StgRegTable_rR5 +baseRegOffset (VanillaReg 6 _) = oFFSET_StgRegTable_rR6 +baseRegOffset (VanillaReg 7 _) = oFFSET_StgRegTable_rR7 +baseRegOffset (VanillaReg 8 _) = oFFSET_StgRegTable_rR8 +baseRegOffset (VanillaReg 9 _) = oFFSET_StgRegTable_rR9 +baseRegOffset (VanillaReg 10 _) = oFFSET_StgRegTable_rR10 baseRegOffset (FloatReg 1) = oFFSET_StgRegTable_rF1 baseRegOffset (FloatReg 2) = oFFSET_StgRegTable_rF2 baseRegOffset (FloatReg 3) = oFFSET_StgRegTable_rF3 @@ -565,15 +564,15 @@ mkDataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info graph mkDataLits lbl lits = CmmData Data (CmmDataLabel lbl : map CmmStaticLit lits) -emitRODataLits :: CLabel -> [CmmLit] -> Code +emitRODataLits :: String -> CLabel -> [CmmLit] -> Code -- Emit a read-only data block -emitRODataLits lbl lits +emitRODataLits caller lbl lits = emitData section (CmmDataLabel lbl : map CmmStaticLit lits) - where section | any needsRelocation lits = RelocatableReadOnlyData - | otherwise = ReadOnlyData - needsRelocation (CmmLabel _) = True - needsRelocation (CmmLabelOff _ _) = True - needsRelocation _ = False + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False mkRODataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info graph mkRODataLits lbl lits @@ -602,30 +601,17 @@ mkByteStringCLit bytes -- ------------------------------------------------------------------------- -assignNonPtrTemp :: CmmExpr -> FCode CmmExpr --- For a non-trivial expression, e, create a local --- variable and assign the expression to it -assignNonPtrTemp e - | isTrivialCmmExpr e = return e - | otherwise = do { reg <- newNonPtrTemp (cmmExprRep e) - ; stmtC (CmmAssign (CmmLocal reg) e) - ; return (CmmReg (CmmLocal reg)) } - -assignPtrTemp :: CmmExpr -> FCode CmmExpr +assignTemp :: CmmExpr -> FCode CmmExpr -- For a non-trivial expression, e, create a local -- variable and assign the expression to it -assignPtrTemp e +assignTemp e | isTrivialCmmExpr e = return e - | otherwise = do { reg <- newPtrTemp (cmmExprRep e) + | otherwise = do { reg <- newTemp (cmmExprType e) ; stmtC (CmmAssign (CmmLocal reg) e) ; return (CmmReg (CmmLocal reg)) } -newNonPtrTemp :: MachRep -> FCode LocalReg -newNonPtrTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep GCKindNonPtr) } - -newPtrTemp :: MachRep -> FCode LocalReg -newPtrTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep GCKindPtr) } - +newTemp :: CmmType -> FCode LocalReg +newTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep) } ------------------------------------------------------------------------- -- @@ -727,7 +713,7 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C -- if we can knock off a bunch of default cases with one if, then do so | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches - = do { (assign_tag, tag_expr') <- assignNonPtrTemp' tag_expr + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch)) branch = CmmCondBranch cond deflt ; stmts <- mk_switch tag_expr' branches mb_deflt @@ -736,7 +722,7 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C } | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches - = do { (assign_tag, tag_expr') <- assignNonPtrTemp' tag_expr + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch)) branch = CmmCondBranch cond deflt ; stmts <- mk_switch tag_expr' branches mb_deflt @@ -745,7 +731,7 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C } | otherwise -- Use an if-tree - = do { (assign_tag, tag_expr') <- assignNonPtrTemp' tag_expr + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr -- To avoid duplication ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt lo_tag (mid_tag-1) via_C @@ -810,9 +796,9 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C is_lo (t,_) = t < mid_tag -assignNonPtrTemp' e +assignTemp' e | isTrivialCmmExpr e = return (CmmNop, e) - | otherwise = do { reg <- newNonPtrTemp (cmmExprRep e) + | otherwise = do { reg <- newTemp (cmmExprType e) ; return (CmmAssign (CmmLocal reg) e, CmmReg (CmmLocal reg)) } emitLitSwitch :: CmmExpr -- Tag to switch on @@ -828,7 +814,7 @@ emitLitSwitch :: CmmExpr -- Tag to switch on emitLitSwitch scrut [] deflt = emitCgStmts deflt emitLitSwitch scrut branches deflt_blk - = do { scrut' <- assignNonPtrTemp scrut + = do { scrut' <- assignTemp scrut ; deflt_blk_id <- forkCgStmts deflt_blk ; blk <- mk_lit_switch scrut' deflt_blk_id (sortLe le branches) ; emitCgStmts blk } @@ -842,8 +828,9 @@ mk_lit_switch scrut deflt_blk_id [(lit,blk)] = return (consCgStmt if_stmt blk) where cmm_lit = mkSimpleLit lit - rep = cmmLitRep cmm_lit - cond = CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit] + rep = cmmLitType cmm_lit + ne = if isFloatType rep then MO_F_Ne else MO_Ne + cond = CmmMachOp (ne (typeWidth rep)) [scrut, CmmLit cmm_lit] if_stmt = CmmCondBranch cond deflt_blk_id mk_lit_switch scrut deflt_blk_id branches @@ -920,11 +907,11 @@ doSimultaneously1 vertices ; stmtC from_temp } go_via_temp (CmmAssign dest src) - = do { tmp <- newNonPtrTemp (cmmRegRep dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong + = do { tmp <- newTemp (cmmRegType dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong ; stmtC (CmmAssign (CmmLocal tmp) src) ; return (CmmAssign dest (CmmReg (CmmLocal tmp))) } go_via_temp (CmmStore dest src) - = do { tmp <- newNonPtrTemp (cmmExprRep src) -- TODO FIXME NOW if the pair of assignemnts move across a call this will be wrong + = do { tmp <- newTemp (cmmExprType src) -- TODO FIXME NOW if the pair of assignemnts move across a call this will be wrong ; stmtC (CmmAssign (CmmLocal tmp) src) ; return (CmmStore dest (CmmReg (CmmLocal tmp))) } in @@ -932,7 +919,7 @@ doSimultaneously1 vertices mustFollow :: CmmStmt -> CmmStmt -> Bool CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt -CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprRep e)) stmt +CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprType e)) stmt CmmNop `mustFollow` stmt = False CmmComment _ `mustFollow` stmt = False @@ -952,7 +939,7 @@ reg `regUsedIn` CmmReg reg' = reg == reg' reg `regUsedIn` CmmRegOff reg' _ = reg == reg' reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es -locUsedIn :: CmmExpr -> MachRep -> CmmExpr -> Bool +locUsedIn :: CmmExpr -> CmmType -> CmmExpr -> Bool -- (locUsedIn a r e) checks whether writing to r[a] could affect the value of -- 'e'. Returns True if it's not sure. locUsedIn loc rep (CmmLit _) = False @@ -961,7 +948,7 @@ locUsedIn loc rep (CmmReg reg') = False locUsedIn loc rep (CmmRegOff reg' _) = False locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es -possiblySameLoc :: CmmExpr -> MachRep -> CmmExpr -> MachRep -> Bool +possiblySameLoc :: CmmExpr -> CmmType -> CmmExpr -> CmmType -> Bool -- Assumes that distinct registers (eg Hp, Sp) do not -- point to the same location, nor any offset thereof. possiblySameLoc (CmmReg r1) rep1 (CmmReg r2) rep2 = r1==r2 @@ -970,8 +957,8 @@ possiblySameLoc (CmmRegOff r1 0) rep1 (CmmReg r2) rep2 = r1==r2 possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2 = r1==r2 && end1 > start2 && end2 > start1 where - end1 = start1 + machRepByteWidth rep1 - end2 = start2 + machRepByteWidth rep2 + end1 = start1 + widthInBytes (typeWidth rep1) + end2 = start2 + widthInBytes (typeWidth rep2) possiblySameLoc l1 rep1 (CmmLit _) rep2 = False possiblySameLoc l1 rep1 l2 rep2 = True -- Conservative @@ -999,7 +986,7 @@ getSRTInfo = do | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] -> do id <- newUnique let srt_desc_lbl = mkLargeSRTLabel id - emitRODataLits srt_desc_lbl + emitRODataLits "getSRTInfo" srt_desc_lbl ( cmmLabelOffW srt_lbl off : mkWordCLit (fromIntegral len) : map mkWordCLit bmp) @@ -1011,3 +998,9 @@ getSRTInfo = do -- The fromIntegral converts to StgHalfWord srt_escape = (-1) :: StgHalfWord + +clHasCafRefs :: ClosureInfo -> CafInfo +clHasCafRefs (ClosureInfo {closureSRT = srt}) = + case srt of NoC_SRT -> NoCafRefs + _ -> MayHaveCafRefs +clHasCafRefs (ConInfo {}) = NoCafRefs diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs index dcb41b4..df32299 100644 --- a/compiler/codeGen/ClosureInfo.lhs +++ b/compiler/codeGen/ClosureInfo.lhs @@ -37,7 +37,7 @@ module ClosureInfo ( slopSize, closureName, infoTableLabelFromCI, - closureLabelFromCI, closureSRT, + closureLabelFromCI, closureLFInfo, isLFThunk,closureSMRep, closureUpdReqd, closureNeedsUpdSpace, closureIsThunk, closureSingleEntry, closureReEntrant, isConstrClosure_maybe, @@ -76,6 +76,7 @@ import Packages import PackageConfig import StaticFlags import Id +import IdInfo import DataCon import Name import OccName @@ -576,28 +577,29 @@ data CallMethod Int -- Its arity getCallMethod :: Name -- Function being applied + -> CafInfo -- Can it refer to CAF's? -> LambdaFormInfo -- Its info -> Int -- Number of available arguments -> CallMethod -getCallMethod name lf_info n_args +getCallMethod name _ lf_info n_args | nodeMustPointToIt lf_info && opt_Parallel = -- If we're parallel, then we must always enter via node. -- The reason is that the closure may have been -- fetched since we allocated it. EnterIt -getCallMethod name (LFReEntrant _ arity _ _) n_args +getCallMethod name caf (LFReEntrant _ arity _ _) n_args | n_args == 0 = ASSERT( arity /= 0 ) ReturnIt -- No args at all | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel name) arity + | otherwise = DirectEntry (enterIdLabel name caf) arity -getCallMethod name (LFCon con) n_args +getCallMethod name _ (LFCon con) n_args = ASSERT( n_args == 0 ) ReturnCon con -getCallMethod name (LFThunk _ _ updatable std_form_info is_fun) n_args +getCallMethod name caf (LFThunk _ _ updatable std_form_info is_fun) n_args | is_fun -- it *might* be a function, so we must "call" it (which is -- always safe) = SlowCall -- We cannot just enter it [in eval/apply, the entry code @@ -620,12 +622,12 @@ getCallMethod name (LFThunk _ _ updatable std_form_info is_fun) n_args | otherwise -- Jump direct to code for single-entry thunks = ASSERT( n_args == 0 ) - JumpToIt (thunkEntryLabel name std_form_info updatable) + JumpToIt (thunkEntryLabel name caf std_form_info updatable) -getCallMethod name (LFUnknown True) n_args +getCallMethod name _ (LFUnknown True) n_args = SlowCall -- Might be a function -getCallMethod name (LFUnknown False) n_args +getCallMethod name _ (LFUnknown False) n_args | n_args > 0 = WARN( True, ppr name <+> ppr n_args ) SlowCall -- Note [Unsafe coerce complications] @@ -633,15 +635,15 @@ getCallMethod name (LFUnknown False) n_args | otherwise = EnterIt -- Not a function -getCallMethod name (LFBlackHole _) n_args +getCallMethod name _ (LFBlackHole _) n_args = SlowCall -- Presumably the black hole has by now -- been updated, but we don't know with -- what, so we slow call it -getCallMethod name (LFLetNoEscape 0) n_args +getCallMethod name _ (LFLetNoEscape 0) n_args = JumpToIt (enterReturnPtLabel (nameUnique name)) -getCallMethod name (LFLetNoEscape arity) n_args +getCallMethod name _ (LFLetNoEscape arity) n_args | n_args == arity = DirectEntry (enterReturnPtLabel (nameUnique name)) arity | otherwise = pprPanic "let-no-escape: " (ppr name <+> ppr arity) @@ -882,10 +884,10 @@ isToplevClosure _ = False Label generation. \begin{code} -infoTableLabelFromCI :: ClosureInfo -> CLabel +infoTableLabelFromCI :: ClosureInfo -> CafInfo -> CLabel infoTableLabelFromCI (ClosureInfo { closureName = name, closureLFInfo = lf_info, - closureSMRep = rep }) + closureSMRep = rep }) caf = case lf_info of LFBlackHole info -> info @@ -895,32 +897,32 @@ infoTableLabelFromCI (ClosureInfo { closureName = name, LFThunk _ _ upd_flag (ApThunk arity) _ -> mkApInfoTableLabel upd_flag arity - LFThunk{} -> mkLocalInfoTableLabel name + LFThunk{} -> mkLocalInfoTableLabel name caf - LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name + LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name caf other -> panic "infoTableLabelFromCI" infoTableLabelFromCI (ConInfo { closureCon = con, - closureSMRep = rep }) - | isStaticRep rep = mkStaticInfoTableLabel name - | otherwise = mkConInfoTableLabel name + closureSMRep = rep }) caf + | isStaticRep rep = mkStaticInfoTableLabel name caf + | otherwise = mkConInfoTableLabel name caf where name = dataConName con -- ClosureInfo for a closure (as opposed to a constructor) is always local -closureLabelFromCI (ClosureInfo { closureName = nm }) = mkLocalClosureLabel nm -closureLabelFromCI _ = panic "closureLabelFromCI" +closureLabelFromCI (ClosureInfo { closureName = nm }) caf = mkLocalClosureLabel nm caf +closureLabelFromCI _ _ = panic "closureLabelFromCI" -- thunkEntryLabel is a local help function, not exported. It's used from both -- entryLabelFromCI and getCallMethod. -thunkEntryLabel thunk_id (ApThunk arity) is_updatable +thunkEntryLabel thunk_id _ (ApThunk arity) is_updatable = enterApLabel is_updatable arity -thunkEntryLabel thunk_id (SelectorThunk offset) upd_flag +thunkEntryLabel thunk_id _ (SelectorThunk offset) upd_flag = enterSelectorLabel upd_flag offset -thunkEntryLabel thunk_id _ is_updatable - = enterIdLabel thunk_id +thunkEntryLabel thunk_id caf _ is_updatable + = enterIdLabel thunk_id caf enterApLabel is_updatable arity | tablesNextToCode = mkApInfoTableLabel is_updatable arity diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs index 4221342..14d7457 100644 --- a/compiler/codeGen/CodeGen.lhs +++ b/compiler/codeGen/CodeGen.lhs @@ -38,7 +38,6 @@ import CLabel import Cmm import CmmUtils import PprCmm -import MachOp import StgSyn import PrelNames @@ -51,6 +50,7 @@ import CostCentre import Id import Name import OccName +import Outputable import TyCon import Module import ErrUtils @@ -198,7 +198,7 @@ mkModuleInit way cost_centre_info this_mod main_mod imported_mods hpc_info jump_to_init = stmtC (CmmJump (mkLblExpr real_init_lbl) []) - mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) wordRep + mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) bWord -- Main refers to GHC.TopHandler.runIO, so make sure we call the -- init function for GHC.TopHandler. @@ -224,7 +224,7 @@ mkModuleInit way cost_centre_info this_mod main_mod imported_mods hpc_info -- The return-code pops the work stack by -- incrementing Sp, and then jumpd to the popped item ret_code = stmtsC [ CmmAssign spReg (cmmRegOffW spReg 1) - , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) wordRep) [] ] + , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) bWord) [] ] rec_descent_init = if opt_SccProfilingOn || isHpcUsed hpc_info @@ -303,8 +303,8 @@ mkSRT these (id,[]) = nopC mkSRT these (id,ids) = do { ids <- mapFCs remap ids ; id <- remap id - ; emitRODataLits (mkSRTLabel (idName id)) - (map (CmmLabel . mkClosureLabel . idName) ids) + ; emitRODataLits "CodeGen.mkSRT" (mkSRTLabel (idName id) (idCafInfo id)) + (map (\id -> CmmLabel $ mkClosureLabel (idName id) (idCafInfo id)) ids) } where -- Sigh, better map all the ids against the environment in @@ -326,7 +326,7 @@ cgTopRhs bndr (StgRhsCon cc con args) cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body) = ASSERT(null fvs) -- There should be no free variables - setSRTLabel (mkSRTLabel (idName bndr)) $ + setSRTLabel (mkSRTLabel (idName bndr) (idCafInfo bndr)) $ setSRT srt $ forkStatics (cgTopRhsClosure bndr cc bi upd_flag args body) \end{code} diff --git a/compiler/codeGen/SMRep.lhs b/compiler/codeGen/SMRep.lhs index 28d1707..987562c 100644 --- a/compiler/codeGen/SMRep.lhs +++ b/compiler/codeGen/SMRep.lhs @@ -24,14 +24,15 @@ module SMRep ( -- Argument/return representations CgRep(..), nonVoidArg, - argMachRep, primRepToCgRep, primRepHint, + argMachRep, primRepToCgRep, +-- Temp primRepHint, typeHint, isFollowableArg, isVoidArg, isFloatingArg, is64BitArg, separateByPtrFollowness, cgRepSizeW, cgRepSizeB, retAddrSizeW, - typeCgRep, idCgRep, tyConCgRep, typeHint, + typeCgRep, idCgRep, tyConCgRep, -- Closure repesentation SMRep(..), ClosureType(..), @@ -45,10 +46,10 @@ module SMRep ( #include "../includes/MachDeps.h" +import CmmExpr -- CmmType and friends import Id import Type import TyCon -import MachOp import StaticFlags import Constants import Outputable @@ -136,12 +137,12 @@ instance Outputable CgRep where ppr FloatArg = ptext (sLit "F_") ppr DoubleArg = ptext (sLit "D_") -argMachRep :: CgRep -> MachRep -argMachRep PtrArg = wordRep -argMachRep NonPtrArg = wordRep -argMachRep LongArg = I64 -argMachRep FloatArg = F32 -argMachRep DoubleArg = F64 +argMachRep :: CgRep -> CmmType +argMachRep PtrArg = gcWord +argMachRep NonPtrArg = bWord +argMachRep LongArg = b64 +argMachRep FloatArg = f32 +argMachRep DoubleArg = f64 argMachRep VoidArg = panic "argMachRep:VoidRep" primRepToCgRep :: PrimRep -> CgRep @@ -155,17 +156,6 @@ primRepToCgRep AddrRep = NonPtrArg primRepToCgRep FloatRep = FloatArg primRepToCgRep DoubleRep = DoubleArg -primRepHint :: PrimRep -> MachHint -primRepHint VoidRep = panic "primRepHint:VoidRep" -primRepHint PtrRep = PtrHint -primRepHint IntRep = SignedHint -primRepHint WordRep = NoHint -primRepHint Int64Rep = SignedHint -primRepHint Word64Rep = NoHint -primRepHint AddrRep = PtrHint -- NB! PtrHint, but NonPtrArg -primRepHint FloatRep = FloatHint -primRepHint DoubleRep = FloatHint - idCgRep :: Id -> CgRep idCgRep x = typeCgRep . idType $ x @@ -174,9 +164,6 @@ tyConCgRep = primRepToCgRep . tyConPrimRep typeCgRep :: Type -> CgRep typeCgRep = primRepToCgRep . typePrimRep - -typeHint :: Type -> MachHint -typeHint = primRepHint . typePrimRep \end{code} Whether or not the thing is a pointer that the garbage-collector diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs new file mode 100644 index 0000000..56cd1d5 --- /dev/null +++ b/compiler/codeGen/StgCmm.hs @@ -0,0 +1,400 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmm ( codeGen ) where + +#define FAST_STRING_NOT_NEEDED +#include "HsVersions.h" + +import StgCmmProf +import StgCmmMonad +import StgCmmEnv +import StgCmmBind +import StgCmmCon +import StgCmmLayout +import StgCmmHeap +import StgCmmUtils +import StgCmmClosure +import StgCmmHpc +import StgCmmTicky + +import MkZipCfgCmm +import Cmm +import CmmUtils +import CLabel +import PprCmm + +import StgSyn +import PrelNames +import DynFlags +import StaticFlags + +import HscTypes +import CostCentre +import Id +import IdInfo +import Type +import DataCon +import Name +import OccName +import TyCon +import Module +import ErrUtils +import Outputable + +codeGen :: DynFlags + -> Module + -> [TyCon] + -> [Module] -- Directly-imported modules + -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. + -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs + -> HpcInfo + -> IO [CmmZ] -- Output + +codeGen dflags this_mod data_tycons imported_mods + cost_centre_info stg_binds hpc_info + = do { showPass dflags "New CodeGen" + ; let way = buildTag dflags + main_mod = mainModIs dflags + +-- Why? +-- ; mapM_ (\x -> seq x (return ())) data_tycons + + ; code_stuff <- initC dflags this_mod $ do + { cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds + ; cmm_tycons <- mapM cgTyCon data_tycons + ; cmm_init <- getCmm (mkModuleInit way cost_centre_info + this_mod main_mod + imported_mods hpc_info) + ; return (cmm_binds ++ concat cmm_tycons ++ [cmm_init]) + } + -- Put datatype_stuff after code_stuff, because the + -- datatype closure table (for enumeration types) to + -- (say) PrelBase_True_closure, which is defined in + -- code_stuff + + -- N.B. returning '[Cmm]' and not 'Cmm' here makes it + -- possible for object splitting to split up the + -- pieces later. + + ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "New Cmm" (pprCmms code_stuff) + + ; return code_stuff } + + +--------------------------------------------------------------- +-- Top-level bindings +--------------------------------------------------------------- + +{- 'cgTopBinding' is only used for top-level bindings, since they need +to be allocated statically (not in the heap) and need to be labelled. +No unboxed bindings can happen at top level. + +In the code below, the static bindings are accumulated in the +@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@. +This is so that we can write the top level processing in a compositional +style, with the increasing static environment being plumbed as a state +variable. -} + +cgTopBinding :: DynFlags -> (StgBinding,[(Id,[Id])]) -> FCode () +cgTopBinding dflags (StgNonRec id rhs, _srts) + = do { id' <- maybeExternaliseId dflags id + --; mapM_ (mkSRT [id']) srts + ; (id,info) <- cgTopRhs id' rhs + ; addBindC id info -- Add the *un-externalised* Id to the envt, + -- so we find it when we look up occurrences + } + +cgTopBinding dflags (StgRec pairs, _srts) + = do { let (bndrs, rhss) = unzip pairs + ; bndrs' <- mapFCs (maybeExternaliseId dflags) bndrs + ; let pairs' = zip bndrs' rhss + --; mapM_ (mkSRT bndrs') srts + ; fixC (\ new_binds -> do + { addBindsC new_binds + ; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' }) + ; return () } + +--mkSRT :: [Id] -> (Id,[Id]) -> FCode () +--mkSRT these (id,ids) +-- | null ids = nopC +-- | otherwise +-- = do { ids <- mapFCs remap ids +-- ; id <- remap id +-- ; emitRODataLits (mkSRTLabel (idName id) (idCafInfo id)) +-- (map (\id -> CmmLabel $ mkClosureLabel (idName id) (idCafInfo id)) ids) +-- } +-- where +-- -- Sigh, better map all the ids against the environment in +-- -- case they've been externalised (see maybeExternaliseId below). +-- remap id = case filter (==id) these of +-- (id':_) -> returnFC id' +-- [] -> do { info <- getCgIdInfo id; return (cgIdInfoId info) } + +-- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs +-- to enclose the listFCs in cgTopBinding, but that tickled the +-- statics "error" call in initC. I DON'T UNDERSTAND WHY! + +cgTopRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) + -- The Id is passed along for setting up a binding... + -- It's already been externalised if necessary + +cgTopRhs bndr (StgRhsCon _cc con args) + = forkStatics (cgTopRhsCon bndr con args) + +cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body) + = ASSERT(null fvs) -- There should be no free variables + setSRTLabel (mkSRTLabel (idName bndr) (idCafInfo bndr)) $ + forkStatics (cgTopRhsClosure bndr cc bi upd_flag srt args body) + + + +--------------------------------------------------------------- +-- Module initialisation code +--------------------------------------------------------------- + +{- The module initialisation code looks like this, roughly: + + FN(__stginit_Foo) { + JMP_(__stginit_Foo_1_p) + } + + FN(__stginit_Foo_1_p) { + ... + } + + We have one version of the init code with a module version and the + 'way' attached to it. The version number helps to catch cases + where modules are not compiled in dependency order before being + linked: if a module has been compiled since any modules which depend on + it, then the latter modules will refer to a different version in their + init blocks and a link error will ensue. + + The 'way' suffix helps to catch cases where modules compiled in different + ways are linked together (eg. profiled and non-profiled). + + We provide a plain, unadorned, version of the module init code + which just jumps to the version with the label and way attached. The + reason for this is that when using foreign exports, the caller of + startupHaskell() must supply the name of the init function for the "top" + module in the program, and we don't want to require that this name + has the version and way info appended to it. + +We initialise the module tree by keeping a work-stack, + * pointed to by Sp + * that grows downward + * Sp points to the last occupied slot +-} + +mkModuleInit + :: String -- the "way" + -> CollectedCCs -- cost centre info + -> Module + -> Module -- name of the Main module + -> [Module] + -> HpcInfo + -> FCode () +mkModuleInit way cost_centre_info this_mod main_mod imported_mods hpc_info + = do { -- Allocate the static boolean that records if this + -- module has been registered already + emitData Data [CmmDataLabel moduleRegdLabel, + CmmStaticLit zeroCLit] + + ; init_hpc <- initHpc this_mod hpc_info + ; init_prof <- initCostCentres cost_centre_info + + -- We emit a recursive descent module search for all modules + -- and *choose* to chase it in :Main, below. + -- In this way, Hpc enabled modules can interact seamlessly with + -- not Hpc enabled moduled, provided Main is compiled with Hpc. + + ; emitSimpleProc real_init_lbl $ withFreshLabel "ret_block" $ \retId -> catAGraphs + [ check_already_done retId + , init_prof + , init_hpc + , catAGraphs $ map (registerImport way) all_imported_mods + , mkBranch retId ] + -- Make the "plain" procedure jump to the "real" init procedure + ; emitSimpleProc plain_init_lbl jump_to_init + + -- When compiling the module in which the 'main' function lives, + -- (that is, this_mod == main_mod) + -- we inject an extra stg_init procedure for stg_init_ZCMain, for the + -- RTS to invoke. We must consult the -main-is flag in case the + -- user specified a different function to Main.main + + -- Notice that the recursive descent is optional, depending on what options + -- are enabled. + + + ; whenC (this_mod == main_mod) + (emitSimpleProc plain_main_init_lbl rec_descent_init) + } + where + plain_init_lbl = mkPlainModuleInitLabel this_mod + real_init_lbl = mkModuleInitLabel this_mod way + plain_main_init_lbl = mkPlainModuleInitLabel rOOT_MAIN + + jump_to_init = mkJump (mkLblExpr real_init_lbl) [] + + + -- Main refers to GHC.TopHandler.runIO, so make sure we call the + -- init function for GHC.TopHandler. + extra_imported_mods + | this_mod == main_mod = [gHC_TOP_HANDLER] + | otherwise = [] + all_imported_mods = imported_mods ++ extra_imported_mods + + mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) bWord + check_already_done retId + = mkCmmIfThenElse (cmmNeWord (CmmLit zeroCLit) mod_reg_val) + (mkLabel retId Nothing <*> mkReturn []) mkNop + <*> -- Set mod_reg to 1 to record that we've been here + mkStore (mkLblExpr moduleRegdLabel) (CmmLit (mkIntCLit 1)) + + -- The return-code pops the work stack by + -- incrementing Sp, and then jumpd to the popped item + ret_code = mkAssign spReg (cmmRegOffW spReg 1) + <*> mkJump (CmmLoad (cmmRegOffW spReg (-1)) bWord) [] + + rec_descent_init = if opt_SccProfilingOn || isHpcUsed hpc_info + then jump_to_init + else ret_code + +----------------------- +registerImport :: String -> Module -> CmmAGraph +registerImport way mod + | mod == gHC_PRIM + = mkNop + | otherwise -- Push the init procedure onto the work stack + = mkCmmCall init_lbl [] [] NoC_SRT + where + init_lbl = mkLblExpr $ mkModuleInitLabel mod way + + + +--------------------------------------------------------------- +-- Generating static stuff for algebraic data types +--------------------------------------------------------------- + +{- [These comments are rather out of date] + +Macro Kind of constructor +CONST_INFO_TABLE@ Zero arity (no info -- compiler uses static closure) +CHARLIKE_INFO_TABLE Charlike (no info -- compiler indexes fixed array) +INTLIKE_INFO_TABLE Intlike; the one macro generates both info tbls +SPEC_INFO_TABLE SPECish, and bigger than or equal to MIN_UPD_SIZE +GEN_INFO_TABLE GENish (hence bigger than or equal to MIN_UPD_SIZE@) + +Possible info tables for constructor con: + +* _con_info: + Used for dynamically let(rec)-bound occurrences of + the constructor, and for updates. For constructors + which are int-like, char-like or nullary, when GC occurs, + the closure tries to get rid of itself. + +* _static_info: + Static occurrences of the constructor macro: STATIC_INFO_TABLE. + +For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure; +it's place is taken by the top level defn of the constructor. + +For charlike and intlike closures there is a fixed array of static +closures predeclared. +-} + +cgTyCon :: TyCon -> FCode [CmmZ] -- All constructors merged together +cgTyCon tycon + = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon) + + -- Generate a table of static closures for an enumeration type + -- Put the table after the data constructor decls, because the + -- datatype closure table (for enumeration types) + -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff + -- Note that the closure pointers are tagged. + + -- N.B. comment says to put table after constructor decls, but + -- code puts it before --- NR 16 Aug 2007 + ; extra <- cgEnumerationTyCon tycon + + ; return (extra ++ constrs) + } + +cgEnumerationTyCon :: TyCon -> FCode [CmmZ] +cgEnumerationTyCon tycon + | isEnumerationTyCon tycon + = do { tbl <- getCmm $ + emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) + (tagForCon con) + | con <- tyConDataCons tycon] + ; return [tbl] } + | otherwise + = return [] + +cgDataCon :: DataCon -> FCode () +-- Generate the entry code, info tables, and (for niladic constructor) +-- the static closure, for a constructor. +cgDataCon data_con + = do { let + -- To allow the debuggers, interpreters, etc to cope with + -- static data structures (ie those built at compile + -- time), we take care that info-table contains the + -- information we need. + (static_cl_info, _) = layOutStaticConstr data_con arg_reps + (dyn_cl_info, arg_things) = layOutDynConstr data_con arg_reps + + emit_info cl_info ticky_code + = do { code_blks <- getCode (mk_code ticky_code) + ; emitClosureCodeAndInfoTable cl_info [] code_blks } + + mk_code ticky_code + = -- NB: We don't set CC when entering data (WDP 94/06) + do { ticky_code + ; ldvEnter (CmmReg nodeReg) + ; tickyReturnOldCon (length arg_things) + ; emitReturn [cmmOffsetB (CmmReg nodeReg) + (tagForCon data_con)] } + -- The case continuation code expects a tagged pointer + + arg_reps :: [(PrimRep, Type)] + arg_reps = [(typePrimRep ty, ty) | ty <- dataConRepArgTys data_con] + + -- Dynamic closure code for non-nullary constructors only + ; whenC (not (isNullaryRepDataCon data_con)) + (emit_info dyn_cl_info tickyEnterDynCon) + + -- Dynamic-Closure first, to reduce forward references + ; emit_info static_cl_info tickyEnterStaticCon } + + +--------------------------------------------------------------- +-- Stuff to support splitting +--------------------------------------------------------------- + +-- If we're splitting the object, we need to externalise all the +-- top-level names (and then make sure we only use the externalised +-- one in any C label we use which refers to this name). + +maybeExternaliseId :: DynFlags -> Id -> FCode Id +maybeExternaliseId dflags id + | dopt Opt_SplitObjs dflags, -- Externalise the name for -split-objs + isInternalName name = do { mod <- getModuleName + ; returnFC (setIdName id (externalise mod)) } + | otherwise = returnFC id + where + externalise mod = mkExternalName uniq mod new_occ loc + name = idName id + uniq = nameUnique name + new_occ = mkLocalOcc uniq (nameOccName name) + loc = nameSrcSpan name + -- We want to conjure up a name that can't clash with any + -- existing name. So we generate + -- Mod_$L243foo + -- where 243 is the unique. diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs new file mode 100644 index 0000000..0e8d853 --- /dev/null +++ b/compiler/codeGen/StgCmmBind.hs @@ -0,0 +1,615 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: bindings +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmBind ( + cgTopRhsClosure, + cgBind, + emitBlackHoleCode + ) where + +#include "HsVersions.h" + +import StgCmmMonad +import StgCmmExpr +import StgCmmEnv +import StgCmmCon +import StgCmmHeap +import StgCmmProf +import StgCmmTicky +import StgCmmGran +import StgCmmLayout +import StgCmmUtils +import StgCmmClosure + +import MkZipCfgCmm +import CoreSyn ( AltCon(..) ) +import SMRep +import Cmm +import CmmUtils +import CLabel +import StgSyn +import CostCentre +import Id +import Name +import Module +import ListSetOps +import Util +import BasicTypes +import Constants +import Outputable +import FastString +import Maybes + +import Data.List + +------------------------------------------------------------------------ +-- Top-level bindings +------------------------------------------------------------------------ + +-- For closures bound at top level, allocate in static space. +-- They should have no free variables. + +cgTopRhsClosure :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo + -> UpdateFlag + -> SRT + -> [Id] -- Args + -> StgExpr + -> FCode (Id, CgIdInfo) + +cgTopRhsClosure id ccs binder_info upd_flag srt args body = do + { -- LAY OUT THE OBJECT + let name = idName id + ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args + ; srt_info <- getSRTInfo srt + ; mod_name <- getModuleName + ; let descr = closureDescription mod_name name + closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr + closure_label = mkLocalClosureLabel name (idCafInfo id) + cg_id_info = litIdInfo id lf_info (CmmLabel closure_label) + closure_rep = mkStaticClosureFields closure_info ccs True [] + + -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) + ; emitDataLits closure_label closure_rep + ; forkClosureBody $ do + { node <- bindToReg id lf_info + ; closureCodeBody binder_info closure_info + ccs srt_info node args body } + + ; returnFC (id, cg_id_info) } + +------------------------------------------------------------------------ +-- Non-top-level bindings +------------------------------------------------------------------------ + +cgBind :: StgBinding -> FCode () +cgBind (StgNonRec name rhs) + = do { (name, info) <- cgRhs name rhs + ; addBindC name info } + +cgBind (StgRec pairs) + = do { new_binds <- fixC (\ new_binds -> + do { addBindsC new_binds + ; listFCs [ cgRhs b e | (b,e) <- pairs ] }) + ; addBindsC new_binds } + +-------------------- +cgRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) + -- The Id is passed along so a binding can be set up + +cgRhs name (StgRhsCon maybe_cc con args) + = do { idinfo <- buildDynCon name maybe_cc con args + ; return (name, idinfo) } + +cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) + = mkRhsClosure name cc bi fvs upd_flag srt args body + +------------------------------------------------------------------------ +-- Non-constructor right hand sides +------------------------------------------------------------------------ + +mkRhsClosure :: Id -> CostCentreStack -> StgBinderInfo + -> [Id] -- Free vars + -> UpdateFlag -> SRT + -> [Id] -- Args + -> StgExpr + -> FCode (Id, CgIdInfo) + +{- mkRhsClosure looks for two special forms of the right-hand side: + a) selector thunks + b) AP thunks + +If neither happens, it just calls mkClosureLFInfo. You might think +that mkClosureLFInfo should do all this, but it seems wrong for the +latter to look at the structure of an expression + +Note [Selectors] +~~~~~~~~~~~~~~~~ +We look at the body of the closure to see if it's a selector---turgid, +but nothing deep. We are looking for a closure of {\em exactly} the +form: + +... = [the_fv] \ u [] -> + case the_fv of + con a_1 ... a_n -> a_i + +Note [Ap thunks] +~~~~~~~~~~~~~~~~ +A more generic AP thunk of the form + + x = [ x_1...x_n ] \.. [] -> x_1 ... x_n + +A set of these is compiled statically into the RTS, so we just use +those. We could extend the idea to thunks where some of the x_i are +global ids (and hence not free variables), but this would entail +generating a larger thunk. It might be an option for non-optimising +compilation, though. + +We only generate an Ap thunk if all the free variables are pointers, +for semi-obvious reasons. + +-} + +---------- Note [Selectors] ------------------ +mkRhsClosure bndr cc bi + [the_fv] -- Just one free var + upd_flag -- Updatable thunk + _srt + [] -- A thunk + body@(StgCase (StgApp scrutinee [{-no args-}]) + _ _ _ _ -- ignore uniq, etc. + (AlgAlt _) + [(DataAlt con, params, _use_mask, + (StgApp selectee [{-no args-}]))]) + | the_fv == scrutinee -- Scrutinee is the only free variable + && maybeToBool maybe_offset -- Selectee is a component of the tuple + && offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough + = -- NOT TRUE: ASSERT(is_single_constructor) + -- The simplifier may have statically determined that the single alternative + -- is the only possible case and eliminated the others, even if there are + -- other constructors in the datatype. It's still ok to make a selector + -- thunk in this case, because we *know* which constructor the scrutinee + -- will evaluate to. + -- + -- srt is discarded; it must be empty + cgStdThunk bndr cc bi body lf_info [StgVarArg the_fv] + where + lf_info = mkSelectorLFInfo bndr offset_into_int + (isUpdatable upd_flag) + (_, params_w_offsets) = layOutDynConstr con (addIdReps params) + -- Just want the layout + maybe_offset = assocMaybe params_w_offsets selectee + Just the_offset = maybe_offset + offset_into_int = the_offset - fixedHdrSize + +---------- Note [Ap thunks] ------------------ +mkRhsClosure bndr cc bi + fvs + upd_flag + _srt + [] -- No args; a thunk + body@(StgApp fun_id args) + + | args `lengthIs` (arity-1) + && all isFollowableArg (map idCgRep fvs) + && isUpdatable upd_flag + && arity <= mAX_SPEC_AP_SIZE + + -- Ha! an Ap thunk + = cgStdThunk bndr cc bi body lf_info payload + where + lf_info = mkApLFInfo bndr upd_flag arity + -- the payload has to be in the correct order, hence we can't + -- just use the fvs. + payload = StgVarArg fun_id : args + arity = length fvs + +---------- Default case ------------------ +mkRhsClosure bndr cc bi fvs upd_flag srt args body + = do { -- LAY OUT THE OBJECT + -- If the binder is itself a free variable, then don't store + -- it in the closure. Instead, just bind it to Node on entry. + -- NB we can be sure that Node will point to it, because we + -- havn't told mkClosureLFInfo about this; so if the binder + -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is* + -- stored in the closure itself, so it will make sure that + -- Node points to it... + ; let + is_elem = isIn "cgRhsClosure" + bndr_is_a_fv = bndr `is_elem` fvs + reduced_fvs | bndr_is_a_fv = fvs `minusList` [bndr] + | otherwise = fvs + + + -- MAKE CLOSURE INFO FOR THIS CLOSURE + ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args + ; mod_name <- getModuleName + ; c_srt <- getSRTInfo srt + ; let name = idName bndr + descr = closureDescription mod_name name + fv_details :: [(Id, VirtualHpOffset)] + (tot_wds, ptr_wds, fv_details) + = mkVirtHeapOffsets (isLFThunk lf_info) + (addIdReps reduced_fvs) + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + c_srt descr + + -- BUILD ITS INFO TABLE AND CODE + ; forkClosureBody $ do + { -- Bind the binder itself + -- It does no harm to have it in the envt even if + -- it's not a free variable; and we need a reg for it + node <- bindToReg bndr lf_info + + -- Bind the free variables + ; mapCs (bind_fv node) fv_details + + -- And compile the body + ; closureCodeBody bi closure_info cc c_srt node args body } + + -- BUILD THE OBJECT + ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body + ; emit (mkComment $ mkFastString "calling allocDynClosure") + ; tmp <- allocDynClosure closure_info use_cc blame_cc + (mapFst StgVarArg fv_details) + + -- RETURN + ; return (bndr, regIdInfo bndr lf_info tmp) } + where + -- A function closure pointer may be tagged, so we + -- must take it into account when accessing the free variables. + tag = tagForArity (length args) + + bind_fv node (id, off) + = do { reg <- rebindToReg id + ; emit $ mkTaggedObjectLoad reg node off tag } + +------------------------- +cgStdThunk + :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo -- XXX: not used?? + -> StgExpr + -> LambdaFormInfo + -> [StgArg] -- payload + -> FCode (Id, CgIdInfo) + +cgStdThunk bndr cc _bndr_info body lf_info payload + = do -- AHA! A STANDARD-FORM THUNK + { -- LAY OUT THE OBJECT + mod_name <- getModuleName + ; let (tot_wds, ptr_wds, payload_w_offsets) + = mkVirtHeapOffsets (isLFThunk lf_info) (addArgReps payload) + + descr = closureDescription mod_name (idName bndr) + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + NoC_SRT -- No SRT for a std-form closure + descr + + ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body + + -- BUILD THE OBJECT + ; tmp <- allocDynClosure closure_info use_cc blame_cc payload_w_offsets + + -- RETURN + ; returnFC (bndr, regIdInfo bndr lf_info tmp) } + +mkClosureLFInfo :: Id -- The binder + -> TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> UpdateFlag -- Update flag + -> [Id] -- Args + -> FCode LambdaFormInfo +mkClosureLFInfo bndr top fvs upd_flag args + | null args = return (mkLFThunk (idType bndr) top fvs upd_flag) + | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args + ; return (mkLFReEntrant top fvs args arg_descr) } + + +------------------------------------------------------------------------ +-- The code for closures} +------------------------------------------------------------------------ + +closureCodeBody :: StgBinderInfo -- XXX: unused? + -> ClosureInfo -- Lots of information about this closure + -> CostCentreStack -- Optional cost centre attached to closure + -> C_SRT + -> LocalReg -- The closure itself; first argument + -- The Id is in scope already, bound to this reg + -> [Id] + -> StgExpr + -> FCode () + +{- There are two main cases for the code for closures. + +* If there are *no arguments*, then the closure is a thunk, and not in + normal form. So it should set up an update frame (if it is + shared). NB: Thunks cannot have a primitive type! + +* If there is *at least one* argument, then this closure is in + normal form, so there is no need to set up an update frame. + + The Macros for GrAnSim are produced at the beginning of the + argSatisfactionCheck (by calling fetchAndReschedule). + There info if Node points to closure is available. -- HWL -} + +closureCodeBody _binder_info cl_info cc srt node args body + | null args -- No args i.e. thunk + = do { code <- getCode $ thunkCode cl_info cc srt node body + ; emitClosureCodeAndInfoTable cl_info [node] code } + +closureCodeBody _binder_info cl_info cc srt node args body + = ASSERT( length args > 0 ) + do { -- Allocate the global ticky counter, + -- and establish the ticky-counter + -- label for this block + let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) $ clHasCafRefs cl_info + ; emitTickyCounter cl_info args + ; setTickyCtrLabel ticky_ctr_lbl $ do + +-- -- XXX: no slow-entry code for now +-- -- Emit the slow-entry code +-- { reg_save_code <- mkSlowEntryCode cl_info reg_args + + -- Emit the main entry code + ; let node_points = nodeMustPointToIt (closureLFInfo cl_info) + ; arg_regs <- bindArgsToRegs args + ; blks <- forkProc $ getCode $ do + { enterCostCentre cl_info cc body + ; tickyEnterFun cl_info + ; whenC node_points (ldvEnterClosure cl_info) + ; granYield arg_regs node_points + + -- Main payload + ; entryHeapCheck node arg_regs srt $ + cgExpr body } + + ; emitClosureCodeAndInfoTable cl_info (node:arg_regs) blks + } + +{- +----------------------------------------- +-- The "slow entry" code for a function. This entry point takes its +-- arguments on the stack. It loads the arguments into registers +-- according to the calling convention, and jumps to the function's +-- normal entry point. The function's closure is assumed to be in +-- R1/node. +-- +-- The slow entry point is used in two places: +-- +-- (a) unknown calls: eg. stg_PAP_entry +-- (b) returning from a heap-check failure + +mkSlowEntryCode :: ClosureInfo -> [(Id,GlobalReg)] -> FCode CmmStmts +-- If this function doesn't have a specialised ArgDescr, we need +-- to generate the function's arg bitmap, slow-entry code, and +-- register-save code for the heap-check failure +-- Here, we emit the slow-entry code, and +-- return the register-save assignments +mkSlowEntryCode cl_info reg_args + | Just (_, ArgGen _) <- closureFunInfo cl_info + = do { emitSimpleProc slow_lbl (emitStmts load_stmts) + ; return save_stmts } + | otherwise = return noStmts + where + name = closureName cl_info + slow_lbl = mkSlowEntryLabel name + + load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry] + save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts + + reps_w_regs :: [(CgRep,GlobalReg)] + reps_w_regs = [(idCgRep id, reg) | (id,reg) <- reverse reg_args] + (final_stk_offset, stk_offsets) + = mapAccumL (\off (rep,_) -> (off + cgRepSizeW rep, off)) + 0 reps_w_regs + + load_assts = zipWithEqual "mk_load" mk_load reps_w_regs stk_offsets + mk_load (rep,reg) offset = CmmAssign (CmmGlobal reg) + (CmmLoad (cmmRegOffW spReg offset) + (argMachRep rep)) + + save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets + mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegType reg ) + CmmStore (cmmRegOffW spReg offset) + (CmmReg (CmmGlobal reg)) + + stk_adj_pop = CmmAssign spReg (cmmRegOffW spReg final_stk_offset) + stk_adj_push = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset)) + jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name)) [] +-} + +----------------------------------------- +thunkCode :: ClosureInfo -> CostCentreStack -> C_SRT -> LocalReg -> StgExpr -> FCode () +thunkCode cl_info cc srt node body + = do { let node_points = nodeMustPointToIt (closureLFInfo cl_info) + + ; tickyEnterThunk cl_info + ; ldvEnterClosure cl_info -- NB: Node always points when profiling + ; granThunk node_points + + -- Heap overflow check + ; entryHeapCheck node [] srt $ do + { -- Overwrite with black hole if necessary + -- but *after* the heap-overflow check + whenC (blackHoleOnEntry cl_info && node_points) + (blackHoleIt cl_info) + + -- Push update frame + ; setupUpdate cl_info node + + -- We only enter cc after setting up update so + -- that cc of enclosing scope will be recorded + -- in update frame CAF/DICT functions will be + -- subsumed by this enclosing cc + ; enterCostCentre cl_info cc body + + ; cgExpr body } } + + +------------------------------------------------------------------------ +-- Update and black-hole wrappers +------------------------------------------------------------------------ + +blackHoleIt :: ClosureInfo -> FCode () +-- Only called for closures with no args +-- Node points to the closure +blackHoleIt closure_info = emitBlackHoleCode (closureSingleEntry closure_info) + +emitBlackHoleCode :: Bool -> FCode () +emitBlackHoleCode is_single_entry + | eager_blackholing = do + tickyBlackHole (not is_single_entry) + emit (mkStore (CmmReg nodeReg) (CmmLit (CmmLabel bh_lbl))) + | otherwise = + nopC + where + bh_lbl | is_single_entry = mkRtsDataLabel (sLit "stg_SE_BLACKHOLE_info") + | otherwise = mkRtsDataLabel (sLit "stg_BLACKHOLE_info") + + -- If we wanted to do eager blackholing with slop filling, + -- we'd need to do it at the *end* of a basic block, otherwise + -- we overwrite the free variables in the thunk that we still + -- need. We have a patch for this from Andy Cheadle, but not + -- incorporated yet. --SDM [6/2004] + -- + -- Profiling needs slop filling (to support LDV profiling), so + -- currently eager blackholing doesn't work with profiling. + -- + -- Previously, eager blackholing was enabled when ticky-ticky + -- was on. But it didn't work, and it wasn't strictly necessary + -- to bring back minimal ticky-ticky, so now EAGER_BLACKHOLING + -- is unconditionally disabled. -- krc 1/2007 + + eager_blackholing = False + +setupUpdate :: ClosureInfo -> LocalReg -> FCode () + -- Nota Bene: this function does not change Node (even if it's a CAF), + -- so that the cost centre in the original closure can still be + -- extracted by a subsequent enterCostCentre +setupUpdate closure_info node + | closureReEntrant closure_info + = return () + + | not (isStaticClosure closure_info) + = if closureUpdReqd closure_info + then do { tickyPushUpdateFrame; pushUpdateFrame node } + else tickyUpdateFrameOmitted + + | otherwise -- A static closure + = do { tickyUpdateBhCaf closure_info + + ; if closureUpdReqd closure_info + then do -- Blackhole the (updatable) CAF: + { upd_closure <- link_caf closure_info True + ; pushUpdateFrame upd_closure } + else tickyUpdateFrameOmitted + } + +pushUpdateFrame :: LocalReg -> FCode () +pushUpdateFrame cl_reg + = emit (mkAddToContext (mkLblExpr mkUpdInfoLabel) + [CmmReg (CmmLocal cl_reg)]) + +----------------------------------------------------------------------------- +-- Entering a CAF +-- +-- When a CAF is first entered, it creates a black hole in the heap, +-- and updates itself with an indirection to this new black hole. +-- +-- We update the CAF with an indirection to a newly-allocated black +-- hole in the heap. We also set the blocking queue on the newly +-- allocated black hole to be empty. +-- +-- Why do we make a black hole in the heap when we enter a CAF? +-- +-- - for a generational garbage collector, which needs a fast +-- test for whether an updatee is in an old generation or not +-- +-- - for the parallel system, which can implement updates more +-- easily if the updatee is always in the heap. (allegedly). +-- +-- When debugging, we maintain a separate CAF list so we can tell when +-- a CAF has been garbage collected. + +-- newCAF must be called before the itbl ptr is overwritten, since +-- newCAF records the old itbl ptr in order to do CAF reverting +-- (which Hugs needs to do in order that combined mode works right.) +-- + +-- ToDo [Feb 04] This entire link_caf nonsense could all be moved +-- into the "newCAF" RTS procedure, which we call anyway, including +-- the allocation of the black-hole indirection closure. +-- That way, code size would fall, the CAF-handling code would +-- be closer together, and the compiler wouldn't need to know +-- about off_indirectee etc. + +link_caf :: ClosureInfo + -> Bool -- True <=> updatable, False <=> single-entry + -> FCode LocalReg -- Returns amode for closure to be updated +-- To update a CAF we must allocate a black hole, link the CAF onto the +-- CAF list, then update the CAF to point to the fresh black hole. +-- This function returns the address of the black hole, so it can be +-- updated with the new value when available. The reason for all of this +-- is that we only want to update dynamic heap objects, not static ones, +-- so that generational GC is easier. +link_caf cl_info is_upd = do + { -- Alloc black hole specifying CC_HDR(Node) as the cost centre + ; let use_cc = costCentreFrom (CmmReg nodeReg) + blame_cc = use_cc + ; hp_rel <- allocDynClosure bh_cl_info use_cc blame_cc [] + + -- Call the RTS function newCAF to add the CAF to the CafList + -- so that the garbage collector can find them + -- This must be done *before* the info table pointer is overwritten, + -- because the old info table ptr is needed for reversion + ; emitRtsCallWithVols (sLit "newCAF") [(CmmReg nodeReg,AddrHint)] [node] False + -- node is live, so save it. + + -- Overwrite the closure with a (static) indirection + -- to the newly-allocated black hole + ; emit (mkStore (cmmRegOffW nodeReg off_indirectee) (CmmReg (CmmLocal hp_rel)) <*> + mkStore (CmmReg nodeReg) ind_static_info) + + ; return hp_rel } + where + bh_cl_info :: ClosureInfo + bh_cl_info | is_upd = cafBlackHoleClosureInfo cl_info + | otherwise = seCafBlackHoleClosureInfo cl_info + + ind_static_info :: CmmExpr + ind_static_info = mkLblExpr mkIndStaticInfoLabel + + off_indirectee :: WordOff + off_indirectee = fixedHdrSize + oFFSET_StgInd_indirectee*wORD_SIZE + + +------------------------------------------------------------------------ +-- Profiling +------------------------------------------------------------------------ + +-- For "global" data constructors the description is simply occurrence +-- name of the data constructor itself. Otherwise it is determined by +-- @closureDescription@ from the let binding information. + +closureDescription :: Module -- Module + -> Name -- Id of closure binding + -> String + -- Not called for StgRhsCon which have global info tables built in + -- CgConTbls.lhs with a description generated from the data constructor +closureDescription mod_name name + = showSDocDump (char '<' <> + (if isExternalName name + then ppr name -- ppr will include the module name prefix + else pprModule mod_name <> char '.' <> ppr name) <> + char '>') + -- showSDocDump, because we want to see the unique on the Name. + diff --git a/compiler/codeGen/StgCmmBind.hs-boot b/compiler/codeGen/StgCmmBind.hs-boot new file mode 100644 index 0000000..5840e99 --- /dev/null +++ b/compiler/codeGen/StgCmmBind.hs-boot @@ -0,0 +1,6 @@ +module StgCmmBind where + +import StgCmmMonad( FCode ) +import StgSyn( StgBinding ) + +cgBind :: StgBinding -> FCode () diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs new file mode 100644 index 0000000..c32d7cd --- /dev/null +++ b/compiler/codeGen/StgCmmClosure.hs @@ -0,0 +1,1100 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: +-- +-- The types LambdaFormInfo +-- ClosureInfo +-- +-- Nothing monadic in here! +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + + +module StgCmmClosure ( + SMRep, + DynTag, tagForCon, isSmallFamily, + ConTagZ, dataConTagZ, + + ArgDescr(..), Liveness(..), + C_SRT(..), needsSRT, + + isVoidRep, isGcPtrRep, addIdReps, addArgReps, + argPrimRep, + + LambdaFormInfo, -- Abstract + StandardFormInfo, -- ...ditto... + mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, + mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, + lfDynTag, + + ClosureInfo, + mkClosureInfo, mkConInfo, maybeIsLFCon, + + closureSize, closureNonHdrSize, + closureGoodStuffSize, closurePtrsSize, + slopSize, + + closureName, infoTableLabelFromCI, + closureLabelFromCI, + closureTypeInfo, + closureLFInfo, isLFThunk,closureSMRep, closureUpdReqd, + closureNeedsUpdSpace, closureIsThunk, + closureSingleEntry, closureReEntrant, isConstrClosure_maybe, + closureFunInfo, isStandardFormThunk, isKnownFun, + funTag, tagForArity, + + enterIdLabel, enterLocalIdLabel, + + nodeMustPointToIt, + CallMethod(..), getCallMethod, + + blackHoleOnEntry, + + getClosureType, + + isToplevClosure, + closureValDescr, closureTypeDescr, -- profiling + + isStaticClosure, + cafBlackHoleClosureInfo, seCafBlackHoleClosureInfo, + + staticClosureNeedsLink, clHasCafRefs + ) where + +#include "../includes/MachDeps.h" + +#define FAST_STRING_NOT_NEEDED +#include "HsVersions.h" + +import ClosureInfo (ArgDescr(..), C_SRT(..), Liveness(..)) + -- XXX temporary becuase FunInfo needs this one + +import StgSyn +import SMRep +import Cmm ( ClosureTypeInfo(..) ) +import CmmExpr + +import CLabel +import StaticFlags +import Id +import IdInfo +import DataCon +import Name +import OccName +import Type +import TypeRep +import TcType +import TyCon +import BasicTypes +import Outputable +import Constants + + +----------------------------------------------------------------------------- +-- Representations +----------------------------------------------------------------------------- + +addIdReps :: [Id] -> [(PrimRep, Id)] +addIdReps ids = [(idPrimRep id, id) | id <- ids] + +addArgReps :: [StgArg] -> [(PrimRep, StgArg)] +addArgReps args = [(argPrimRep arg, arg) | arg <- args] + +argPrimRep :: StgArg -> PrimRep +argPrimRep arg = typePrimRep (stgArgType arg) + +isVoidRep :: PrimRep -> Bool +isVoidRep VoidRep = True +isVoidRep _other = False + +isGcPtrRep :: PrimRep -> Bool +isGcPtrRep PtrRep = True +isGcPtrRep _ = False + + +----------------------------------------------------------------------------- +-- LambdaFormInfo +----------------------------------------------------------------------------- + +-- Information about an identifier, from the code generator's point of +-- view. Every identifier is bound to a LambdaFormInfo in the +-- environment, which gives the code generator enough info to be able to +-- tail call or return that identifier. + +data LambdaFormInfo + = LFReEntrant -- Reentrant closure (a function) + TopLevelFlag -- True if top level + !Int -- Arity. Invariant: always > 0 + !Bool -- True <=> no fvs + ArgDescr -- Argument descriptor (should really be in ClosureInfo) + + | LFThunk -- Thunk (zero arity) + TopLevelFlag + !Bool -- True <=> no free vars + !Bool -- True <=> updatable (i.e., *not* single-entry) + StandardFormInfo + !Bool -- True <=> *might* be a function type + + | LFCon -- A saturated constructor application + DataCon -- The constructor + + | LFUnknown -- Used for function arguments and imported things. + -- We know nothing about this closure. + -- Treat like updatable "LFThunk"... + -- Imported things which we *do* know something about use + -- one of the other LF constructors (eg LFReEntrant for + -- known functions) + !Bool -- True <=> *might* be a function type + -- The False case is good when we want to enter it, + -- because then we know the entry code will do + -- For a function, the entry code is the fast entry point + + | LFUnLifted -- A value of unboxed type; + -- always a value, neeeds evaluation + + | LFLetNoEscape -- See LetNoEscape module for precise description + + | LFBlackHole -- Used for the closures allocated to hold the result + -- of a CAF. We want the target of the update frame to + -- be in the heap, so we make a black hole to hold it. + CLabel -- Flavour (info label, eg CAF_BLACKHOLE_info). + + +------------------------- +-- An ArgDsecr describes the argument pattern of a function + +{- XXX -- imported from old ClosureInfo for now +data ArgDescr + = ArgSpec -- Fits one of the standard patterns + !StgHalfWord -- RTS type identifier ARG_P, ARG_N, ... + + | ArgGen -- General case + Liveness -- Details about the arguments +-} + +{- XXX -- imported from old ClosureInfo for now +------------------------- +-- We represent liveness bitmaps as a Bitmap (whose internal +-- representation really is a bitmap). These are pinned onto case return +-- vectors to indicate the state of the stack for the garbage collector. +-- +-- In the compiled program, liveness bitmaps that fit inside a single +-- word (StgWord) are stored as a single word, while larger bitmaps are +-- stored as a pointer to an array of words. + +data Liveness + = SmallLiveness -- Liveness info that fits in one word + StgWord -- Here's the bitmap + + | BigLiveness -- Liveness info witha a multi-word bitmap + CLabel -- Label for the bitmap +-} + +------------------------- +-- StandardFormInfo tells whether this thunk has one of +-- a small number of standard forms + +data StandardFormInfo + = NonStandardThunk + -- Not of of the standard forms + + | SelectorThunk + -- A SelectorThunk is of form + -- case x of + -- con a1,..,an -> ak + -- and the constructor is from a single-constr type. + WordOff -- 0-origin offset of ak within the "goods" of + -- constructor (Recall that the a1,...,an may be laid + -- out in the heap in a non-obvious order.) + + | ApThunk + -- An ApThunk is of form + -- x1 ... xn + -- The code for the thunk just pushes x2..xn on the stack and enters x1. + -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled + -- in the RTS to save space. + Int -- Arity, n + + +------------------------------------------------------ +-- Building LambdaFormInfo +------------------------------------------------------ + +mkLFArgument :: Id -> LambdaFormInfo +mkLFArgument id + | isUnLiftedType ty = LFUnLifted + | might_be_a_function ty = LFUnknown True + | otherwise = LFUnknown False + where + ty = idType id + +------------- +mkLFLetNoEscape :: LambdaFormInfo +mkLFLetNoEscape = LFLetNoEscape + +------------- +mkLFReEntrant :: TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> [Id] -- Args + -> ArgDescr -- Argument descriptor + -> LambdaFormInfo + +mkLFReEntrant top fvs args arg_descr + = LFReEntrant top (length args) (null fvs) arg_descr + +------------- +mkLFThunk :: Type -> TopLevelFlag -> [Id] -> UpdateFlag -> LambdaFormInfo +mkLFThunk thunk_ty top fvs upd_flag + = ASSERT( not (isUpdatable upd_flag) || not (isUnLiftedType thunk_ty) ) + LFThunk top (null fvs) + (isUpdatable upd_flag) + NonStandardThunk + (might_be_a_function thunk_ty) + +-------------- +might_be_a_function :: Type -> Bool +-- Return False only if we are *sure* it's a data type +-- Look through newtypes etc as much as poss +might_be_a_function ty + = case splitTyConApp_maybe (repType ty) of + Just (tc, _) -> not (isDataTyCon tc) + Nothing -> True + +------------- +mkConLFInfo :: DataCon -> LambdaFormInfo +mkConLFInfo con = LFCon con + +------------- +mkSelectorLFInfo :: Id -> Int -> Bool -> LambdaFormInfo +mkSelectorLFInfo id offset updatable + = LFThunk NotTopLevel False updatable (SelectorThunk offset) + (might_be_a_function (idType id)) + +------------- +mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo +mkApLFInfo id upd_flag arity + = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity) + (might_be_a_function (idType id)) + +------------- +mkLFImported :: Id -> LambdaFormInfo +mkLFImported id + | Just con <- isDataConWorkId_maybe id + , isNullaryRepDataCon con + = LFCon con -- An imported nullary constructor + -- We assume that the constructor is evaluated so that + -- the id really does point directly to the constructor + + | arity > 0 + = LFReEntrant TopLevel arity True (panic "arg_descr") + + | otherwise + = mkLFArgument id -- Not sure of exact arity + where + arity = idArity id + +----------------------------------------------------- +-- Dynamic pointer tagging +----------------------------------------------------- + +type ConTagZ = Int -- A *zero-indexed* contructor tag + +type DynTag = Int -- The tag on a *pointer* + -- (from the dynamic-tagging paper) + +{- Note [Data constructor dynamic tags] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The family size of a data type (the number of constructors) +can be either: + * small, if the family size < 2**tag_bits + * big, otherwise. + +Small families can have the constructor tag in the tag bits. +Big families only use the tag value 1 to represent evaluatedness. -} + +isSmallFamily :: Int -> Bool +isSmallFamily fam_size = fam_size <= mAX_PTR_TAG + +-- We keep the *zero-indexed* tag in the srt_len field of the info +-- table of a data constructor. +dataConTagZ :: DataCon -> ConTagZ +dataConTagZ con = dataConTag con - fIRST_TAG + +tagForCon :: DataCon -> DynTag +tagForCon con + | isSmallFamily fam_size = con_tag + 1 + | otherwise = 1 + where + con_tag = dataConTagZ con + fam_size = tyConFamilySize (dataConTyCon con) + +tagForArity :: Int -> DynTag +tagForArity arity | isSmallFamily arity = arity + | otherwise = 0 + +lfDynTag :: LambdaFormInfo -> DynTag +lfDynTag (LFCon con) = tagForCon con +lfDynTag (LFReEntrant _ arity _ _) = tagForArity arity +lfDynTag _other = 0 + + +----------------------------------------------------------------------------- +-- Observing LambdaFormInfo +----------------------------------------------------------------------------- + +------------- +maybeIsLFCon :: LambdaFormInfo -> Maybe DataCon +maybeIsLFCon (LFCon con) = Just con +maybeIsLFCon _ = Nothing + +------------ +isLFThunk :: LambdaFormInfo -> Bool +isLFThunk (LFThunk _ _ _ _ _) = True +isLFThunk (LFBlackHole _) = True + -- return True for a blackhole: this function is used to determine + -- whether to use the thunk header in SMP mode, and a blackhole + -- must have one. +isLFThunk _ = False + + +----------------------------------------------------------------------------- +-- Choosing SM reps +----------------------------------------------------------------------------- + +chooseSMRep + :: Bool -- True <=> static closure + -> LambdaFormInfo + -> WordOff -> WordOff -- Tot wds, ptr wds + -> SMRep + +chooseSMRep is_static lf_info tot_wds ptr_wds + = let + nonptr_wds = tot_wds - ptr_wds + closure_type = getClosureType is_static ptr_wds lf_info + in + GenericRep is_static ptr_wds nonptr_wds closure_type + +-- We *do* get non-updatable top-level thunks sometimes. eg. f = g +-- gets compiled to a jump to g (if g has non-zero arity), instead of +-- messing around with update frames and PAPs. We set the closure type +-- to FUN_STATIC in this case. + +getClosureType :: Bool -> WordOff -> LambdaFormInfo -> ClosureType +getClosureType is_static ptr_wds lf_info + = case lf_info of + LFCon {} | is_static && ptr_wds == 0 -> ConstrNoCaf + | otherwise -> Constr + LFReEntrant {} -> Fun + LFThunk _ _ _ (SelectorThunk {}) _ -> ThunkSelector + LFThunk {} -> Thunk + _ -> panic "getClosureType" + + +----------------------------------------------------------------------------- +-- nodeMustPointToIt +----------------------------------------------------------------------------- + +-- Be sure to see the stg-details notes about these... + +nodeMustPointToIt :: LambdaFormInfo -> Bool +nodeMustPointToIt (LFReEntrant top _ no_fvs _) + = not no_fvs || -- Certainly if it has fvs we need to point to it + isNotTopLevel top + -- If it is not top level we will point to it + -- We can have a \r closure with no_fvs which + -- is not top level as special case cgRhsClosure + -- has been dissabled in favour of let floating + + -- For lex_profiling we also access the cost centre for a + -- non-inherited function i.e. not top level + -- the not top case above ensures this is ok. + +nodeMustPointToIt (LFCon _) = True + + -- Strictly speaking, the above two don't need Node to point + -- to it if the arity = 0. But this is a *really* unlikely + -- situation. If we know it's nil (say) and we are entering + -- it. Eg: let x = [] in x then we will certainly have inlined + -- x, since nil is a simple atom. So we gain little by not + -- having Node point to known zero-arity things. On the other + -- hand, we do lose something; Patrick's code for figuring out + -- when something has been updated but not entered relies on + -- having Node point to the result of an update. SLPJ + -- 27/11/92. + +nodeMustPointToIt (LFThunk _ no_fvs updatable NonStandardThunk _) + = updatable || not no_fvs || opt_SccProfilingOn + -- For the non-updatable (single-entry case): + -- + -- True if has fvs (in which case we need access to them, and we + -- should black-hole it) + -- or profiling (in which case we need to recover the cost centre + -- from inside it) + +nodeMustPointToIt (LFThunk {}) -- Node must point to a standard-form thunk + = True + +nodeMustPointToIt (LFUnknown _) = True +nodeMustPointToIt LFUnLifted = False +nodeMustPointToIt (LFBlackHole _) = True -- BH entry may require Node to point +nodeMustPointToIt LFLetNoEscape = False + +----------------------------------------------------------------------------- +-- getCallMethod +----------------------------------------------------------------------------- + +{- The entry conventions depend on the type of closure being entered, +whether or not it has free variables, and whether we're running +sequentially or in parallel. + +Closure Node Argument Enter +Characteristics Par Req'd Passing Via +------------------------------------------------------------------------------- +Unknown & no & yes & stack & node +Known fun (>1 arg), no fvs & no & no & registers & fast entry (enough args) + & slow entry (otherwise) +Known fun (>1 arg), fvs & no & yes & registers & fast entry (enough args) +0 arg, no fvs \r,\s & no & no & n/a & direct entry +0 arg, no fvs \u & no & yes & n/a & node +0 arg, fvs \r,\s & no & yes & n/a & direct entry +0 arg, fvs \u & no & yes & n/a & node + +Unknown & yes & yes & stack & node +Known fun (>1 arg), no fvs & yes & no & registers & fast entry (enough args) + & slow entry (otherwise) +Known fun (>1 arg), fvs & yes & yes & registers & node +0 arg, no fvs \r,\s & yes & no & n/a & direct entry +0 arg, no fvs \u & yes & yes & n/a & node +0 arg, fvs \r,\s & yes & yes & n/a & node +0 arg, fvs \u & yes & yes & n/a & node +\end{tabular} + +When black-holing, single-entry closures could also be entered via node +(rather than directly) to catch double-entry. -} + +data CallMethod + = EnterIt -- No args, not a function + + | JumpToIt -- A join point + + | ReturnIt -- It's a value (function, unboxed value, + -- or constructor), so just return it. + + | SlowCall -- Unknown fun, or known fun with + -- too few args. + + | DirectEntry -- Jump directly, with args in regs + CLabel -- The code label + Int -- Its arity + +getCallMethod :: Name -- Function being applied + -> CafInfo -- Can it refer to CAF's? + -> LambdaFormInfo -- Its info + -> Int -- Number of available arguments + -> CallMethod + +getCallMethod _name _ lf_info _n_args + | nodeMustPointToIt lf_info && opt_Parallel + = -- If we're parallel, then we must always enter via node. + -- The reason is that the closure may have been + -- fetched since we allocated it. + EnterIt + +getCallMethod name caf (LFReEntrant _ arity _ _) n_args + | n_args == 0 = ASSERT( arity /= 0 ) + ReturnIt -- No args at all + | n_args < arity = SlowCall -- Not enough args + | otherwise = DirectEntry (enterIdLabel name caf) arity + +getCallMethod _name _ LFUnLifted n_args + = ASSERT( n_args == 0 ) ReturnIt + +getCallMethod _name _ (LFCon _) n_args + = ASSERT( n_args == 0 ) ReturnIt + +getCallMethod name caf (LFThunk _ _ updatable std_form_info is_fun) n_args + | is_fun -- it *might* be a function, so we must "call" it (which is always safe) + = SlowCall -- We cannot just enter it [in eval/apply, the entry code + -- is the fast-entry code] + + -- Since is_fun is False, we are *definitely* looking at a data value + | updatable || opt_DoTickyProfiling -- to catch double entry + {- OLD: || opt_SMP + I decided to remove this, because in SMP mode it doesn't matter + if we enter the same thunk multiple times, so the optimisation + of jumping directly to the entry code is still valid. --SDM + -} + = EnterIt + -- We used to have ASSERT( n_args == 0 ), but actually it is + -- possible for the optimiser to generate + -- let bot :: Int = error Int "urk" + -- in (bot `cast` unsafeCoerce Int (Int -> Int)) 3 + -- This happens as a result of the case-of-error transformation + -- So the right thing to do is just to enter the thing + + | otherwise -- Jump direct to code for single-entry thunks + = ASSERT( n_args == 0 ) + DirectEntry (thunkEntryLabel name caf std_form_info updatable) 0 + +getCallMethod _name _ (LFUnknown True) _n_args + = SlowCall -- might be a function + +getCallMethod name _ (LFUnknown False) n_args + = ASSERT2 ( n_args == 0, ppr name <+> ppr n_args ) + EnterIt -- Not a function + +getCallMethod _name _ (LFBlackHole _) _n_args + = SlowCall -- Presumably the black hole has by now + -- been updated, but we don't know with + -- what, so we slow call it + +getCallMethod _name _ LFLetNoEscape _n_args + = JumpToIt + +isStandardFormThunk :: LambdaFormInfo -> Bool +isStandardFormThunk (LFThunk _ _ _ (SelectorThunk _) _) = True +isStandardFormThunk (LFThunk _ _ _ (ApThunk _) _) = True +isStandardFormThunk _other_lf_info = False + +isKnownFun :: LambdaFormInfo -> Bool +isKnownFun (LFReEntrant _ _ _ _) = True +isKnownFun LFLetNoEscape = True +isKnownFun _ = False + +----------------------------------------------------------------------------- +-- staticClosureRequired +----------------------------------------------------------------------------- + +{- staticClosureRequired is never called (hence commented out) + + SimonMar writes (Sept 07) It's an optimisation we used to apply at + one time, I believe, but it got lost probably in the rewrite of + the RTS/code generator. I left that code there to remind me to + look into whether it was worth doing sometime + +{- Avoiding generating entries and info tables + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +At present, for every function we generate all of the following, +just in case. But they aren't always all needed, as noted below: + +[NB1: all of this applies only to *functions*. Thunks always +have closure, info table, and entry code.] + +[NB2: All are needed if the function is *exported*, just to play safe.] + +* Fast-entry code ALWAYS NEEDED + +* Slow-entry code + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) we're in the parallel world and the function has free vars + [Reason: in parallel world, we always enter functions + with free vars via the closure.] + +* The function closure + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) if the function has free vars (ie not top level) + + Why case (a) here? Because if the arg-satis check fails, + UpdatePAP stuffs a pointer to the function closure in the PAP. + [Could be changed; UpdatePAP could stuff in a code ptr instead, + but doesn't seem worth it.] + + [NB: these conditions imply that we might need the closure + without the slow-entry code. Here's how. + + f x y = let g w = ...x..y..w... + in + ...(g t)... + + Here we need a closure for g which contains x and y, + but since the calls are all saturated we just jump to the + fast entry point for g, with R1 pointing to the closure for g.] + + +* Standard info table + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) the function has free vars (ie not top level) + + NB. In the sequential world, (c) is only required so that the function closure has + an info table to point to, to keep the storage manager happy. + If (c) alone is true we could fake up an info table by choosing + one of a standard family of info tables, whose entry code just + bombs out. + + [NB In the parallel world (c) is needed regardless because + we enter functions with free vars via the closure.] + + If (c) is retained, then we'll sometimes generate an info table + (for storage mgr purposes) without slow-entry code. Then we need + to use an error label in the info table to substitute for the absent + slow entry code. +-} + +staticClosureRequired + :: Name + -> StgBinderInfo + -> LambdaFormInfo + -> Bool +staticClosureRequired binder bndr_info + (LFReEntrant top_level _ _ _) -- It's a function + = ASSERT( isTopLevel top_level ) + -- Assumption: it's a top-level, no-free-var binding + not (satCallsOnly bndr_info) + +staticClosureRequired binder other_binder_info other_lf_info = True +-} + +----------------------------------------------------------------------------- +-- Data types for closure information} +----------------------------------------------------------------------------- + + +{- Information about a closure, from the code generator's point of view. + +A ClosureInfo decribes the info pointer of a closure. It has +enough information + a) to construct the info table itself + b) to allocate a closure containing that info pointer (i.e. + it knows the info table label) + +We make a ClosureInfo for + - each let binding (both top level and not) + - each data constructor (for its shared static and + dynamic info tables) +-} + +data ClosureInfo + = ClosureInfo { + closureName :: !Name, -- The thing bound to this closure + closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon (see below) + closureSMRep :: !SMRep, -- representation used by storage mgr + closureSRT :: !C_SRT, -- What SRT applies to this closure + closureType :: !Type, -- Type of closure (ToDo: remove) + closureDescr :: !String -- closure description (for profiling) + } + + -- Constructor closures don't have a unique info table label (they use + -- the constructor's info table), and they don't have an SRT. + | ConInfo { + closureCon :: !DataCon, + closureSMRep :: !SMRep + } + +{- XXX temp imported from old ClosureInfo +-- C_SRT is what StgSyn.SRT gets translated to... +-- we add a label for the table, and expect only the 'offset/length' form + +data C_SRT = NoC_SRT + | C_SRT !CLabel !WordOff !StgHalfWord {-bitmap or escape-} + deriving (Eq) + +instance Outputable C_SRT where + ppr (NoC_SRT) = ptext SLIT("_no_srt_") + ppr (C_SRT label off bitmap) = parens (ppr label <> comma <> ppr off <> comma <> text (show bitmap)) +-} + +needsSRT :: C_SRT -> Bool +needsSRT NoC_SRT = False +needsSRT (C_SRT _ _ _) = True + + +-------------------------------------- +-- Building ClosureInfos +-------------------------------------- + +mkClosureInfo :: Bool -- Is static + -> Id + -> LambdaFormInfo + -> Int -> Int -- Total and pointer words + -> C_SRT + -> String -- String descriptor + -> ClosureInfo +mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr + = ClosureInfo { closureName = name, + closureLFInfo = lf_info, + closureSMRep = sm_rep, + closureSRT = srt_info, + closureType = idType id, + closureDescr = descr } + where + name = idName id + sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds + +mkConInfo :: Bool -- Is static + -> DataCon + -> Int -> Int -- Total and pointer words + -> ClosureInfo +mkConInfo is_static data_con tot_wds ptr_wds + = ConInfo { closureSMRep = sm_rep, + closureCon = data_con } + where + sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds + + +-- We need a black-hole closure info to pass to @allocDynClosure@ when we +-- want to allocate the black hole on entry to a CAF. These are the only +-- ways to build an LFBlackHole, maintaining the invariant that it really +-- is a black hole and not something else. + +cafBlackHoleClosureInfo :: ClosureInfo -> ClosureInfo +cafBlackHoleClosureInfo (ClosureInfo { closureName = nm, + closureType = ty }) + = ClosureInfo { closureName = nm, + closureLFInfo = LFBlackHole mkCAFBlackHoleInfoTableLabel, + closureSMRep = BlackHoleRep, + closureSRT = NoC_SRT, + closureType = ty, + closureDescr = "" } +cafBlackHoleClosureInfo _ = panic "cafBlackHoleClosureInfo" + +seCafBlackHoleClosureInfo :: ClosureInfo -> ClosureInfo +seCafBlackHoleClosureInfo (ClosureInfo { closureName = nm, + closureType = ty }) + = ClosureInfo { closureName = nm, + closureLFInfo = LFBlackHole mkSECAFBlackHoleInfoTableLabel, + closureSMRep = BlackHoleRep, + closureSRT = NoC_SRT, + closureType = ty, + closureDescr = "" } +seCafBlackHoleClosureInfo _ = panic "seCafBlackHoleClosureInfo" + +-------------------------------------- +-- Extracting ClosureTypeInfo +-------------------------------------- + +closureTypeInfo :: ClosureInfo -> ClosureTypeInfo +closureTypeInfo cl_info + = case cl_info of + ConInfo { closureCon = con } + -> ConstrInfo (ptrs, nptrs) + (fromIntegral (dataConTagZ con)) + con_name + where + con_name = panic "closureTypeInfo" + -- Was: + -- cstr <- mkByteStringCLit $ dataConIdentity con + -- con_name = makeRelativeRefTo info_lbl cstr + + ClosureInfo { closureName = name, + closureLFInfo = LFReEntrant _ arity _ arg_descr, + closureSRT = srt } + -> FunInfo (ptrs, nptrs) + srt + (fromIntegral arity) + arg_descr + (CmmLabel (mkSlowEntryLabel name (clHasCafRefs cl_info))) + + ClosureInfo { closureLFInfo = LFThunk _ _ _ (SelectorThunk offset) _, + closureSRT = srt } + -> ThunkSelectorInfo (fromIntegral offset) srt + + ClosureInfo { closureLFInfo = LFThunk {}, + closureSRT = srt } + -> ThunkInfo (ptrs, nptrs) srt + + _ -> panic "unexpected lambda form in mkCmmInfo" + where +-- info_lbl = infoTableLabelFromCI cl_info + ptrs = fromIntegral $ closurePtrsSize cl_info + size = fromIntegral $ closureNonHdrSize cl_info + nptrs = size - ptrs + +-------------------------------------- +-- Functions about closure *sizes* +-------------------------------------- + +closureSize :: ClosureInfo -> WordOff +closureSize cl_info = hdr_size + closureNonHdrSize cl_info + where hdr_size | closureIsThunk cl_info = thunkHdrSize + | otherwise = fixedHdrSize + -- All thunks use thunkHdrSize, even if they are non-updatable. + -- this is because we don't have separate closure types for + -- updatable vs. non-updatable thunks, so the GC can't tell the + -- difference. If we ever have significant numbers of non- + -- updatable thunks, it might be worth fixing this. + +closureNonHdrSize :: ClosureInfo -> WordOff +closureNonHdrSize cl_info + = tot_wds + computeSlopSize tot_wds cl_info + where + tot_wds = closureGoodStuffSize cl_info + +closureGoodStuffSize :: ClosureInfo -> WordOff +closureGoodStuffSize cl_info + = let (ptrs, nonptrs) = sizes_from_SMRep (closureSMRep cl_info) + in ptrs + nonptrs + +closurePtrsSize :: ClosureInfo -> WordOff +closurePtrsSize cl_info + = let (ptrs, _) = sizes_from_SMRep (closureSMRep cl_info) + in ptrs + +-- not exported: +sizes_from_SMRep :: SMRep -> (WordOff,WordOff) +sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs) +sizes_from_SMRep BlackHoleRep = (0, 0) + +-- Computing slop size. WARNING: this looks dodgy --- it has deep +-- knowledge of what the storage manager does with the various +-- representations... +-- +-- Slop Requirements: every thunk gets an extra padding word in the +-- header, which takes the the updated value. + +slopSize :: ClosureInfo -> WordOff +slopSize cl_info = computeSlopSize payload_size cl_info + where payload_size = closureGoodStuffSize cl_info + +computeSlopSize :: WordOff -> ClosureInfo -> WordOff +computeSlopSize payload_size cl_info + = max 0 (minPayloadSize smrep updatable - payload_size) + where + smrep = closureSMRep cl_info + updatable = closureNeedsUpdSpace cl_info + +closureNeedsUpdSpace :: ClosureInfo -> Bool +-- We leave space for an update if either (a) the closure is updatable +-- or (b) it is a static thunk. This is because a static thunk needs +-- a static link field in a predictable place (after the slop), regardless +-- of whether it is updatable or not. +closureNeedsUpdSpace (ClosureInfo { closureLFInfo = + LFThunk TopLevel _ _ _ _ }) = True +closureNeedsUpdSpace cl_info = closureUpdReqd cl_info + +minPayloadSize :: SMRep -> Bool -> WordOff +minPayloadSize smrep updatable + = case smrep of + BlackHoleRep -> min_upd_size + GenericRep _ _ _ _ | updatable -> min_upd_size + GenericRep True _ _ _ -> 0 -- static + GenericRep False _ _ _ -> mIN_PAYLOAD_SIZE + -- ^^^^^___ dynamic + where + min_upd_size = + ASSERT(mIN_PAYLOAD_SIZE <= sIZEOF_StgSMPThunkHeader) + 0 -- check that we already have enough + -- room for mIN_SIZE_NonUpdHeapObject, + -- due to the extra header word in SMP + +-------------------------------------- +-- Other functions over ClosureInfo +-------------------------------------- + +blackHoleOnEntry :: ClosureInfo -> Bool +-- Static closures are never themselves black-holed. +-- Updatable ones will be overwritten with a CAFList cell, which points to a +-- black hole; +-- Single-entry ones have no fvs to plug, and we trust they don't form part +-- of a loop. + +blackHoleOnEntry ConInfo{} = False +blackHoleOnEntry (ClosureInfo { closureLFInfo = lf_info, closureSMRep = rep }) + | isStaticRep rep + = False -- Never black-hole a static closure + + | otherwise + = case lf_info of + LFReEntrant _ _ _ _ -> False + LFLetNoEscape -> False + LFThunk _ no_fvs updatable _ _ + -> if updatable + then not opt_OmitBlackHoling + else opt_DoTickyProfiling || not no_fvs + -- the former to catch double entry, + -- and the latter to plug space-leaks. KSW/SDM 1999-04. + + _other -> panic "blackHoleOnEntry" -- Should never happen + + +staticClosureNeedsLink :: ClosureInfo -> Bool +-- A static closure needs a link field to aid the GC when traversing +-- the static closure graph. But it only needs such a field if either +-- a) it has an SRT +-- b) it's a constructor with one or more pointer fields +-- In case (b), the constructor's fields themselves play the role +-- of the SRT. +staticClosureNeedsLink (ClosureInfo { closureSRT = srt }) + = needsSRT srt +staticClosureNeedsLink (ConInfo { closureSMRep = sm_rep, closureCon = con }) + = not (isNullaryRepDataCon con) && not_nocaf_constr + where + not_nocaf_constr = + case sm_rep of + GenericRep _ _ _ ConstrNoCaf -> False + _other -> True + +isStaticClosure :: ClosureInfo -> Bool +isStaticClosure cl_info = isStaticRep (closureSMRep cl_info) + +closureUpdReqd :: ClosureInfo -> Bool +closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info +closureUpdReqd ConInfo{} = False + +lfUpdatable :: LambdaFormInfo -> Bool +lfUpdatable (LFThunk _ _ upd _ _) = upd +lfUpdatable (LFBlackHole _) = True + -- Black-hole closures are allocated to receive the results of an + -- alg case with a named default... so they need to be updated. +lfUpdatable _ = False + +closureIsThunk :: ClosureInfo -> Bool +closureIsThunk ClosureInfo{ closureLFInfo = lf_info } = isLFThunk lf_info +closureIsThunk ConInfo{} = False + +closureSingleEntry :: ClosureInfo -> Bool +closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd +closureSingleEntry _ = False + +closureReEntrant :: ClosureInfo -> Bool +closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True +closureReEntrant _ = False + +isConstrClosure_maybe :: ClosureInfo -> Maybe DataCon +isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con +isConstrClosure_maybe _ = Nothing + +closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) +closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info +closureFunInfo _ = Nothing + +lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr) +lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc) +lfFunInfo _ = Nothing + +funTag :: ClosureInfo -> DynTag +funTag (ClosureInfo { closureLFInfo = lf_info }) = lfDynTag lf_info +funTag (ConInfo {}) = panic "funTag" + +isToplevClosure :: ClosureInfo -> Bool +isToplevClosure (ClosureInfo { closureLFInfo = lf_info }) + = case lf_info of + LFReEntrant TopLevel _ _ _ -> True + LFThunk TopLevel _ _ _ _ -> True + _other -> False +isToplevClosure _ = False + +-------------------------------------- +-- Label generation +-------------------------------------- + +infoTableLabelFromCI :: ClosureInfo -> CLabel +infoTableLabelFromCI cl@(ClosureInfo { closureName = name, + closureLFInfo = lf_info }) + = case lf_info of + LFBlackHole info -> info + + LFThunk _ _ upd_flag (SelectorThunk offset) _ -> + mkSelectorInfoLabel upd_flag offset + + LFThunk _ _ upd_flag (ApThunk arity) _ -> + mkApInfoTableLabel upd_flag arity + + LFThunk{} -> mkLocalInfoTableLabel name $ clHasCafRefs cl + + LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name $ clHasCafRefs cl + + _other -> panic "infoTableLabelFromCI" + +infoTableLabelFromCI cl@(ConInfo { closureCon = con, closureSMRep = rep }) + | isStaticRep rep = mkStaticInfoTableLabel name $ clHasCafRefs cl + | otherwise = mkConInfoTableLabel name $ clHasCafRefs cl + where + name = dataConName con + +-- ClosureInfo for a closure (as opposed to a constructor) is always local +closureLabelFromCI :: ClosureInfo -> CLabel +closureLabelFromCI cl@(ClosureInfo { closureName = nm }) = + mkLocalClosureLabel nm $ clHasCafRefs cl +closureLabelFromCI _ = panic "closureLabelFromCI" + +thunkEntryLabel :: Name -> CafInfo -> StandardFormInfo -> Bool -> CLabel +-- thunkEntryLabel is a local help function, not exported. It's used from both +-- entryLabelFromCI and getCallMethod. +thunkEntryLabel _thunk_id _ (ApThunk arity) upd_flag + = enterApLabel upd_flag arity +thunkEntryLabel _thunk_id _ (SelectorThunk offset) upd_flag + = enterSelectorLabel upd_flag offset +thunkEntryLabel thunk_id c _ _ + = enterIdLabel thunk_id c + +enterApLabel :: Bool -> Arity -> CLabel +enterApLabel is_updatable arity + | tablesNextToCode = mkApInfoTableLabel is_updatable arity + | otherwise = mkApEntryLabel is_updatable arity + +enterSelectorLabel :: Bool -> WordOff -> CLabel +enterSelectorLabel upd_flag offset + | tablesNextToCode = mkSelectorInfoLabel upd_flag offset + | otherwise = mkSelectorEntryLabel upd_flag offset + +enterIdLabel :: Name -> CafInfo -> CLabel +enterIdLabel id c + | tablesNextToCode = mkInfoTableLabel id c + | otherwise = mkEntryLabel id c + +enterLocalIdLabel :: Name -> CafInfo -> CLabel +enterLocalIdLabel id c + | tablesNextToCode = mkLocalInfoTableLabel id c + | otherwise = mkLocalEntryLabel id c + + +-------------------------------------- +-- Profiling +-------------------------------------- + +-- Profiling requires two pieces of information to be determined for +-- each closure's info table --- description and type. + +-- The description is stored directly in the @CClosureInfoTable@ when the +-- info table is built. + +-- The type is determined from the type information stored with the @Id@ +-- in the closure info using @closureTypeDescr@. + +closureValDescr, closureTypeDescr :: ClosureInfo -> String +closureValDescr (ClosureInfo {closureDescr = descr}) + = descr +closureValDescr (ConInfo {closureCon = con}) + = occNameString (getOccName con) + +closureTypeDescr (ClosureInfo { closureType = ty }) + = getTyDescription ty +closureTypeDescr (ConInfo { closureCon = data_con }) + = occNameString (getOccName (dataConTyCon data_con)) + +getTyDescription :: Type -> String +getTyDescription ty + = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) -> + case tau_ty of + TyVarTy _ -> "*" + AppTy fun _ -> getTyDescription fun + FunTy _ res -> '-' : '>' : fun_result res + TyConApp tycon _ -> getOccString tycon + PredTy sty -> getPredTyDescription sty + ForAllTy _ ty -> getTyDescription ty + } + where + fun_result (FunTy _ res) = '>' : fun_result res + fun_result other = getTyDescription other + +getPredTyDescription :: PredType -> String +getPredTyDescription (ClassP cl _) = getOccString cl +getPredTyDescription (IParam ip _) = getOccString (ipNameName ip) +getPredTyDescription (EqPred ty1 _ty2) = getTyDescription ty1 -- Urk? + + +-------------------------------------- +-- SRTs/CAFs +-------------------------------------- + +-- This is horrible, but we need to know whether a closure may have CAFs. +clHasCafRefs :: ClosureInfo -> CafInfo +clHasCafRefs (ClosureInfo {closureSRT = srt}) = + case srt of NoC_SRT -> NoCafRefs + _ -> MayHaveCafRefs +clHasCafRefs (ConInfo {}) = NoCafRefs diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs new file mode 100644 index 0000000..de1d77a --- /dev/null +++ b/compiler/codeGen/StgCmmCon.hs @@ -0,0 +1,216 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C--: code generation for constructors +-- +-- This module provides the support code for StgCmm to deal with with +-- constructors on the RHSs of let(rec)s. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmCon ( + cgTopRhsCon, buildDynCon, bindConArgs + ) where + +#include "HsVersions.h" + +import StgSyn +import CoreSyn ( AltCon(..) ) + +import StgCmmMonad +import StgCmmEnv +import StgCmmHeap +import StgCmmUtils +import StgCmmClosure +import StgCmmProf + +import Cmm +import CLabel +import SMRep +import CostCentre +import Constants +import DataCon +import FastString +import Id +import Literal +import PrelInfo +import Outputable +import Util ( lengthIs ) +import Char ( ord ) + + +--------------------------------------------------------------- +-- Top-level constructors +--------------------------------------------------------------- + +cgTopRhsCon :: Id -- Name of thing bound to this RHS + -> DataCon -- Id + -> [StgArg] -- Args + -> FCode (Id, CgIdInfo) +cgTopRhsCon id con args + = do { +#if mingw32_TARGET_OS + -- Windows DLLs have a problem with static cross-DLL refs. + ; this_pkg <- getThisPackage + ; ASSERT( not (isDllConApp this_pkg con args) ) return () +#endif + ; ASSERT( args `lengthIs` dataConRepArity con ) return () + + -- LAY IT OUT + ; let + name = idName id + lf_info = mkConLFInfo con + closure_label = mkClosureLabel name $ idCafInfo id + caffy = any stgArgHasCafRefs args + (closure_info, nv_args_w_offsets) + = layOutStaticConstr con (addArgReps args) + + get_lit (arg, _offset) = do { CmmLit lit <- getArgAmode arg + ; return lit } + + ; payload <- mapM get_lit nv_args_w_offsets + -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs + -- NB2: all the amodes should be Lits! + + ; let closure_rep = mkStaticClosureFields + closure_info + dontCareCCS -- Because it's static data + caffy -- Has CAF refs + payload + + -- BUILD THE OBJECT + ; emitDataLits closure_label closure_rep + + -- RETURN + ; return (id, litIdInfo id lf_info (CmmLabel closure_label)) } + + +--------------------------------------------------------------- +-- Lay out and allocate non-top-level constructors +--------------------------------------------------------------- + +buildDynCon :: Id -- Name of the thing to which this constr will + -- be bound + -> CostCentreStack -- Where to grab cost centre from; + -- current CCS if currentOrSubsumedCCS + -> DataCon -- The data constructor + -> [StgArg] -- Its args + -> FCode CgIdInfo -- Return details about how to find it + +{- We used to pass a boolean indicating whether all the +args were of size zero, so we could use a static +construtor; but I concluded that it just isn't worth it. +Now I/O uses unboxed tuples there just aren't any constructors +with all size-zero args. + +The reason for having a separate argument, rather than looking at +the addr modes of the args is that we may be in a "knot", and +premature looking at the args will cause the compiler to black-hole! +-} + + +-------- buildDynCon: Nullary constructors -------------- +-- First we deal with the case of zero-arity constructors. They +-- will probably be unfolded, so we don't expect to see this case much, +-- if at all, but it does no harm, and sets the scene for characters. +-- +-- In the case of zero-arity constructors, or, more accurately, those +-- which have exclusively size-zero (VoidRep) args, we generate no code +-- at all. + +buildDynCon binder _cc con [] + = return (litIdInfo binder (mkConLFInfo con) + (CmmLabel (mkClosureLabel (dataConName con) (idCafInfo binder)))) + +-------- buildDynCon: Charlike and Intlike constructors ----------- +{- The following three paragraphs about @Char@-like and @Int@-like +closures are obsolete, but I don't understand the details well enough +to properly word them, sorry. I've changed the treatment of @Char@s to +be analogous to @Int@s: only a subset is preallocated, because @Char@ +has now 31 bits. Only literals are handled here. -- Qrczak + +Now for @Char@-like closures. We generate an assignment of the +address of the closure to a temporary. It would be possible simply to +generate no code, and record the addressing mode in the environment, +but we'd have to be careful if the argument wasn't a constant --- so +for simplicity we just always asssign to a temporary. + +Last special case: @Int@-like closures. We only special-case the +situation in which the argument is a literal in the range +@mIN_INTLIKE@..@mAX_INTLILKE@. NB: for @Char@-like closures we can +work with any old argument, but for @Int@-like ones the argument has +to be a literal. Reason: @Char@ like closures have an argument type +which is guaranteed in range. + +Because of this, we use can safely return an addressing mode. -} + +buildDynCon binder _cc con [arg] + | maybeIntLikeCon con + , StgLitArg (MachInt val) <- arg + , val <= fromIntegral mAX_INTLIKE -- Comparisons at type Integer! + , val >= fromIntegral mIN_INTLIKE -- ...ditto... + = do { let intlike_lbl = mkRtsGcPtrLabel (sLit "stg_INTLIKE_closure") + val_int = fromIntegral val :: Int + offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) + -- INTLIKE closures consist of a header and one word payload + intlike_amode = cmmLabelOffW intlike_lbl offsetW + ; return (litIdInfo binder (mkConLFInfo con) intlike_amode) } + +buildDynCon binder _cc con [arg] + | maybeCharLikeCon con + , StgLitArg (MachChar val) <- arg + , let val_int = ord val :: Int + , val_int <= mAX_CHARLIKE + , val_int >= mIN_CHARLIKE + = do { let charlike_lbl = mkRtsGcPtrLabel (sLit "stg_CHARLIKE_closure") + offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) + -- CHARLIKE closures consist of a header and one word payload + charlike_amode = cmmLabelOffW charlike_lbl offsetW + ; return (litIdInfo binder (mkConLFInfo con) charlike_amode) } + +-------- buildDynCon: the general case ----------- +buildDynCon binder ccs con args + = do { let (cl_info, args_w_offsets) = layOutDynConstr con (addArgReps args) + -- No void args in args_w_offsets + ; tmp <- allocDynClosure cl_info use_cc blame_cc args_w_offsets + ; return (regIdInfo binder lf_info tmp) } + where + lf_info = mkConLFInfo con + + use_cc -- cost-centre to stick in the object + | currentOrSubsumedCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + blame_cc = use_cc -- cost-centre on which to blame the alloc (same) + + +--------------------------------------------------------------- +-- Binding constructor arguments +--------------------------------------------------------------- + +bindConArgs :: AltCon -> LocalReg -> [Id] -> FCode [LocalReg] +-- bindConArgs is called from cgAlt of a case +-- (bindConArgs con args) augments the environment with bindings for the +-- binders args, assuming that we have just returned from a 'case' which +-- found a con +bindConArgs (DataAlt con) base args + = ASSERT(not (isUnboxedTupleCon con)) + mapM bind_arg args_w_offsets + where + (_, args_w_offsets) = layOutDynConstr con (addIdReps args) + + tag = tagForCon con + + -- The binding below forces the masking out of the tag bits + -- when accessing the constructor field. + bind_arg :: (Id, VirtualHpOffset) -> FCode LocalReg + bind_arg (arg, offset) + = do { emit $ mkTaggedObjectLoad (idToReg arg) base offset tag + ; bindArgToReg arg } + +bindConArgs _other_con _base args + = ASSERT( null args ) return [] + + + diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs new file mode 100644 index 0000000..c43bf80 --- /dev/null +++ b/compiler/codeGen/StgCmmEnv.hs @@ -0,0 +1,209 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: the binding environment +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmEnv ( + CgIdInfo, + + cgIdInfoId, cgIdInfoLF, + + litIdInfo, lneIdInfo, regIdInfo, + idInfoToAmode, + + addBindC, addBindsC, + + bindArgsToRegs, bindToReg, rebindToReg, + bindArgToReg, idToReg, + getArgAmode, getNonVoidArgAmodes, + getCgIdInfo, + maybeLetNoEscape, + ) where + +#include "HsVersions.h" + +import StgCmmMonad +import StgCmmUtils +import StgCmmClosure + +import CLabel + +import BlockId +import Cmm +import CmmUtils +import FastString +import PprCmm ( {- instance Outputable -} ) +import Id +import VarEnv +import Maybes +import Name +import StgSyn +import Outputable + + + +------------------------------------- +-- Manipulating CgIdInfo +------------------------------------- + +mkCgIdInfo :: Id -> LambdaFormInfo -> CmmExpr -> CgIdInfo +mkCgIdInfo id lf expr + = CgIdInfo { cg_id = id, cg_loc = CmmLoc expr, + cg_lf = lf, cg_rep = idPrimRep id, + cg_tag = lfDynTag lf } + +lneIdInfo :: Id -> [LocalReg] -> CgIdInfo +lneIdInfo id regs + = CgIdInfo { cg_id = id, cg_loc = LneLoc blk_id regs, + cg_lf = lf, cg_rep = idPrimRep id, + cg_tag = lfDynTag lf } + where + lf = mkLFLetNoEscape + blk_id = mkBlockId (idUnique id) + +litIdInfo :: Id -> LambdaFormInfo -> CmmLit -> CgIdInfo +litIdInfo id lf_info lit = mkCgIdInfo id lf_info (CmmLit lit) + +regIdInfo :: Id -> LambdaFormInfo -> LocalReg -> CgIdInfo +regIdInfo id lf_info reg = mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)) + +idInfoToAmode :: CgIdInfo -> CmmExpr +-- Returns a CmmExpr for the *tagged* pointer +idInfoToAmode (CgIdInfo { cg_loc = CmmLoc e, cg_tag = tag }) + = addDynTag e tag +idInfoToAmode cg_info + = pprPanic "idInfoToAmode" (ppr (cg_id cg_info)) -- LneLoc + +addDynTag :: CmmExpr -> DynTag -> CmmExpr +-- A tag adds a byte offset to the pointer +addDynTag expr tag = cmmOffsetB expr tag + +cgIdInfoId :: CgIdInfo -> Id +cgIdInfoId = cg_id + +cgIdInfoLF :: CgIdInfo -> LambdaFormInfo +cgIdInfoLF = cg_lf + +maybeLetNoEscape :: CgIdInfo -> Maybe (BlockId, [LocalReg]) +maybeLetNoEscape (CgIdInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args) +maybeLetNoEscape _other = Nothing + + + +--------------------------------------------------------- +-- The binding environment +-- +-- There are three basic routines, for adding (addBindC), +-- modifying(modifyBindC) and looking up (getCgIdInfo) bindings. +--------------------------------------------------------- + +addBindC :: Id -> CgIdInfo -> FCode () +addBindC name stuff_to_bind = do + binds <- getBinds + setBinds $ extendVarEnv binds name stuff_to_bind + +addBindsC :: [(Id, CgIdInfo)] -> FCode () +addBindsC new_bindings = do + binds <- getBinds + let new_binds = foldl (\ binds (name,info) -> extendVarEnv binds name info) + binds + new_bindings + setBinds new_binds + +getCgIdInfo :: Id -> FCode CgIdInfo +getCgIdInfo id + = do { -- Try local bindings first + ; local_binds <- getBinds + ; case lookupVarEnv local_binds id of { + Just info -> return info ; + Nothing -> do + + { -- Try top-level bindings + static_binds <- getStaticBinds + ; case lookupVarEnv static_binds id of { + Just info -> return info ; + Nothing -> + + -- Should be imported; make up a CgIdInfo for it + let + name = idName id + in + if isExternalName name then do + let ext_lbl = CmmLabel (mkClosureLabel name $ idCafInfo id) + return (litIdInfo id (mkLFImported id) ext_lbl) + else + -- Bug + cgLookupPanic id + }}}} + +cgLookupPanic :: Id -> FCode a +cgLookupPanic id + = do static_binds <- getStaticBinds + local_binds <- getBinds + srt <- getSRTLabel + pprPanic "StgCmmEnv: variable not found" + (vcat [ppr id, + ptext (sLit "static binds for:"), + vcat [ ppr (cg_id info) | info <- varEnvElts static_binds ], + ptext (sLit "local binds for:"), + vcat [ ppr (cg_id info) | info <- varEnvElts local_binds ], + ptext (sLit "SRT label") <+> pprCLabel srt + ]) + + +-------------------- +getArgAmode :: StgArg -> FCode CmmExpr +getArgAmode (StgVarArg var) = do { info <- getCgIdInfo var; return (idInfoToAmode info) } +getArgAmode (StgLitArg lit) = return (CmmLit (mkSimpleLit lit)) +getArgAmode (StgTypeArg _) = panic "getArgAmode: type arg" + +getNonVoidArgAmodes :: [StgArg] -> FCode [CmmExpr] +-- NB: Filters out void args, +-- so the result list may be shorter than the argument list +getNonVoidArgAmodes [] = return [] +getNonVoidArgAmodes (arg:args) + | isVoidRep (argPrimRep arg) = getNonVoidArgAmodes args + | otherwise = do { amode <- getArgAmode arg + ; amodes <- getNonVoidArgAmodes args + ; return ( amode : amodes ) } + + +------------------------------------------------------------------------ +-- Interface functions for binding and re-binding names +------------------------------------------------------------------------ + +bindToReg :: Id -> LambdaFormInfo -> FCode LocalReg +-- Bind an Id to a fresh LocalReg +bindToReg id lf_info + = do { let reg = idToReg id + ; addBindC id (regIdInfo id lf_info reg) + ; return reg } + +rebindToReg :: Id -> FCode LocalReg +-- Like bindToReg, but the Id is already in scope, so +-- get its LF info from the envt +rebindToReg id + = do { info <- getCgIdInfo id + ; bindToReg id (cgIdInfoLF info) } + +bindArgToReg :: Id -> FCode LocalReg +bindArgToReg id = bindToReg id (mkLFArgument id) + +bindArgsToRegs :: [Id] -> FCode [LocalReg] +bindArgsToRegs args = mapM bindArgToReg args + +idToReg :: Id -> LocalReg +-- Make a register from an Id, typically a function argument, +-- free variable, or case binder +-- +-- We re-use the Unique from the Id to make it easier to see what is going on +-- +-- By now the Ids should be uniquely named; else one would worry +-- about accidental collision +idToReg id = LocalReg (idUnique id) + (primRepCmmType (idPrimRep id)) + + diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs new file mode 100644 index 0000000..74c69b7 --- /dev/null +++ b/compiler/codeGen/StgCmmExpr.hs @@ -0,0 +1,451 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: expressions +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmExpr ( cgExpr ) where + +#define FAST_STRING_NOT_NEEDED +#include "HsVersions.h" + +import {-# SOURCE #-} StgCmmBind ( cgBind ) + +import StgCmmMonad +import StgCmmHeap +import StgCmmEnv +import StgCmmCon +import StgCmmProf +import StgCmmLayout +import StgCmmPrim +import StgCmmHpc +import StgCmmTicky +import StgCmmUtils +import StgCmmClosure + +import StgSyn + +import MkZipCfgCmm +import BlockId +import Cmm() +import CmmExpr +import CoreSyn +import DataCon +import Id +import TyCon +import CostCentre ( CostCentreStack, currentCCS ) +import Maybes +import Util +import FastString +import Outputable + +------------------------------------------------------------------------ +-- cgExpr: the main function +------------------------------------------------------------------------ + +cgExpr :: StgExpr -> FCode () + +cgExpr (StgApp fun args) = cgIdApp fun args +cgExpr (StgOpApp op args ty) = cgOpApp op args ty +cgExpr (StgConApp con args) = cgConApp con args + +cgExpr (StgSCC cc expr) = do { emitSetCCC cc; cgExpr expr } +cgExpr (StgTick m n expr) = do { emit (mkTickBox m n); cgExpr expr } +cgExpr (StgLit lit) = emitReturn [CmmLit (mkSimpleLit lit)] + +cgExpr (StgLet binds expr) = do { emit (mkComment $ mkFastString "calling cgBind"); cgBind binds; emit (mkComment $ mkFastString "calling cgExpr"); cgExpr expr } +cgExpr (StgLetNoEscape _ _ binds expr) = do { cgLneBinds binds; cgExpr expr } + +cgExpr (StgCase expr _live_vars _save_vars bndr srt alt_type alts) + = cgCase expr bndr srt alt_type alts + +cgExpr (StgLam {}) = panic "cgExpr: StgLam" + +------------------------------------------------------------------------ +-- Let no escape +------------------------------------------------------------------------ + +{- Generating code for a let-no-escape binding, aka join point is very +very similar to whatwe do for a case expression. The duality is +between + let-no-escape x = b + in e +and + case e of ... -> b + +That is, the RHS of 'x' (ie 'b') will execute *later*, just like +the alternative of the case; it needs to be compiled in an environment +in which all volatile bindings are forgotten, and the free vars are +bound only to stable things like stack locations.. The 'e' part will +execute *next*, just like the scrutinee of a case. -} + +------------------------- +cgLneBinds :: StgBinding -> FCode () +cgLneBinds (StgNonRec bndr rhs) + = do { local_cc <- saveCurrentCostCentre + -- See Note [Saving the current cost centre] + ; (bndr,info) <- cgLetNoEscapeRhs local_cc bndr rhs + ; addBindC bndr info } + +cgLneBinds (StgRec pairs) + = do { local_cc <- saveCurrentCostCentre + ; new_bindings <- fixC (\ new_bindings -> do + { addBindsC new_bindings + ; listFCs [ cgLetNoEscapeRhs local_cc b e + | (b,e) <- pairs ] }) + + ; addBindsC new_bindings } + +------------------------- +cgLetNoEscapeRhs + :: Maybe LocalReg -- Saved cost centre + -> Id + -> StgRhs + -> FCode (Id, CgIdInfo) + +cgLetNoEscapeRhs local_cc bndr (StgRhsClosure cc _bi _ _upd srt args body) + = cgLetNoEscapeClosure bndr local_cc cc srt args body +cgLetNoEscapeRhs local_cc bndr (StgRhsCon cc con args) + = cgLetNoEscapeClosure bndr local_cc cc NoSRT [] (StgConApp con args) + -- For a constructor RHS we want to generate a single chunk of + -- code which can be jumped to from many places, which will + -- return the constructor. It's easy; just behave as if it + -- was an StgRhsClosure with a ConApp inside! + +------------------------- +cgLetNoEscapeClosure + :: Id -- binder + -> Maybe LocalReg -- Slot for saved current cost centre + -> CostCentreStack -- XXX: *** NOT USED *** why not? + -> SRT + -> [Id] -- Args (as in \ args -> body) + -> StgExpr -- Body (as in above) + -> FCode (Id, CgIdInfo) + +cgLetNoEscapeClosure bndr cc_slot _unused_cc srt args body + = do { arg_regs <- forkProc $ do + { restoreCurrentCostCentre cc_slot + ; arg_regs <- bindArgsToRegs args + ; c_srt <- getSRTInfo srt + ; altHeapCheck arg_regs c_srt (cgExpr body) + -- Using altHeapCheck just reduces + -- instructions to save on stack + ; return arg_regs } + ; return (bndr, lneIdInfo bndr arg_regs) } + + +------------------------------------------------------------------------ +-- Case expressions +------------------------------------------------------------------------ + +{- Note [Compiling case expressions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It is quite interesting to decide whether to put a heap-check at the +start of each alternative. Of course we certainly have to do so if +the case forces an evaluation, or if there is a primitive op which can +trigger GC. + +A more interesting situation is this (a Plan-B situation) + + !P!; + ...P... + case x# of + 0# -> !Q!; ...Q... + default -> !R!; ...R... + +where !x! indicates a possible heap-check point. The heap checks +in the alternatives *can* be omitted, in which case the topmost +heapcheck will take their worst case into account. + +In favour of omitting !Q!, !R!: + + - *May* save a heap overflow test, + if ...P... allocates anything. + + - We can use relative addressing from a single Hp to + get at all the closures so allocated. + + - No need to save volatile vars etc across heap checks + in !Q!, !R! + +Against omitting !Q!, !R! + + - May put a heap-check into the inner loop. Suppose + the main loop is P -> R -> P -> R... + Q is the loop exit, and only it does allocation. + This only hurts us if P does no allocation. If P allocates, + then there is a heap check in the inner loop anyway. + + - May do more allocation than reqd. This sometimes bites us + badly. For example, nfib (ha!) allocates about 30\% more space if the + worst-casing is done, because many many calls to nfib are leaf calls + which don't need to allocate anything. + + We can un-allocate, but that costs an instruction + +Neither problem hurts us if there is only one alternative. + +Suppose the inner loop is P->R->P->R etc. Then here is +how many heap checks we get in the *inner loop* under various +conditions + + Alooc Heap check in branches (!Q!, !R!)? + P Q R yes no (absorb to !P!) +-------------------------------------- + n n n 0 0 + n y n 0 1 + n . y 1 1 + y . y 2 1 + y . n 1 1 + +Best choices: absorb heap checks from Q and R into !P! iff + a) P itself does some allocation +or + b) P does allocation, or there is exactly one alternative + +We adopt (b) because that is more likely to put the heap check at the +entry to a function, when not many things are live. After a bunch of +single-branch cases, we may have lots of things live + +Hence: two basic plans for + + case e of r { alts } + +------ Plan A: the general case --------- + + ...save current cost centre... + + ...code for e, + with sequel (SetLocals r) + + ...restore current cost centre... + ...code for alts... + ...alts do their own heap checks + +------ Plan B: special case when --------- + (i) e does not allocate or call GC + (ii) either upstream code performs allocation + or there is just one alternative + + Then heap allocation in the (single) case branch + is absorbed by the upstream check. + Very common example: primops on unboxed values + + ...code for e, + with sequel (SetLocals r)... + + ...code for alts... + ...no heap check... +-} + + + +------------------------------------- +data GcPlan + = GcInAlts -- Put a GC check at the start the case alternatives, + [LocalReg] -- which binds these registers + SRT -- using this SRT + | NoGcInAlts -- The scrutinee is a primitive value, or a call to a + -- primitive op which does no GC. Absorb the allocation + -- of the case alternative(s) into the upstream check + +------------------------------------- +cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode () +cgCase scrut bndr srt alt_type alts + = do { up_hp_usg <- getVirtHp -- Upstream heap usage + ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts + alt_regs = map idToReg ret_bndrs + simple_scrut = isSimpleScrut scrut alt_type + gc_plan | not simple_scrut = GcInAlts alt_regs srt + | isSingleton alts = NoGcInAlts + | up_hp_usg > 0 = NoGcInAlts + | otherwise = GcInAlts alt_regs srt + + ; mb_cc <- maybeSaveCostCentre simple_scrut + ; c_srt <- getSRTInfo srt + ; withSequel (AssignTo alt_regs c_srt) + (cgExpr scrut) + ; restoreCurrentCostCentre mb_cc + + ; bindArgsToRegs ret_bndrs + ; cgAlts gc_plan bndr alt_type alts } + +----------------- +maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg) +maybeSaveCostCentre simple_scrut + | simple_scrut = saveCurrentCostCentre + | otherwise = return Nothing + + + +----------------- +isSimpleScrut :: StgExpr -> AltType -> Bool +-- Simple scrutinee, does not allocate +isSimpleScrut (StgOpApp _ _ _) _ = True +isSimpleScrut (StgLit _) _ = True +isSimpleScrut (StgApp _ []) (PrimAlt _) = True +isSimpleScrut _ _ = False + +----------------- +chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [Id] +-- These are the binders of a case that are assigned +-- by the evaluation of the scrutinee +-- Only non-void ones come back +chooseReturnBndrs bndr (PrimAlt _) _alts + = nonVoidIds [bndr] + +chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)] + = nonVoidIds ids -- 'bndr' is not assigned! + +chooseReturnBndrs bndr (AlgAlt _) _alts + = [bndr] -- Only 'bndr' is assigned + +chooseReturnBndrs bndr PolyAlt _alts + = [bndr] -- Only 'bndr' is assigned + +chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" + -- UbxTupALt has only one alternative + +nonVoidIds :: [Id] -> [Id] +nonVoidIds ids = [id | id <- ids, not (isVoidRep (idPrimRep id))] + +------------------------------------- +cgAlts :: GcPlan -> Id -> AltType -> [StgAlt] -> FCode () +-- At this point the result of the case are in the binders +cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] + = maybeAltHeapCheck gc_plan (cgExpr rhs) + +cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)] + = maybeAltHeapCheck gc_plan (cgExpr rhs) + -- Here bndrs are *already* in scope, so don't rebind them + +cgAlts gc_plan bndr (PrimAlt _) alts + = do { tagged_cmms <- cgAltRhss gc_plan bndr alts + + ; let bndr_reg = CmmLocal (idToReg bndr) + (DEFAULT,deflt) = head tagged_cmms + -- PrimAlts always have a DEFAULT case + -- and it always comes first + + tagged_cmms' = [(lit,code) + | (LitAlt lit, code) <- tagged_cmms] + ; emit (mkCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt) } + +cgAlts gc_plan bndr (AlgAlt tycon) alts + = do { tagged_cmms <- cgAltRhss gc_plan bndr alts + + ; let fam_sz = tyConFamilySize tycon + bndr_reg = CmmLocal (idToReg bndr) + mb_deflt = case tagged_cmms of + ((DEFAULT,rhs) : _) -> Just rhs + _other -> Nothing + -- DEFAULT is always first, if present + + branches = [ (dataConTagZ con, cmm) + | (DataAlt con, cmm) <- tagged_cmms ] + + -- Is the constructor tag in the node reg? + ; if isSmallFamily fam_sz + then let -- Yes, bndr_reg has constr. tag in ls bits + tag_expr = cmmConstrTag1 (CmmReg bndr_reg) + branches' = [(tag+1,branch) | (tag,branch) <- branches] + in + emitSwitch tag_expr branches' mb_deflt 1 fam_sz + + else -- No, get tag from info table + let -- Note that ptr _always_ has tag 1 + -- when the family size is big enough + untagged_ptr = cmmRegOffB bndr_reg (-1) + tag_expr = getConstrTag (untagged_ptr) + in + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) } + +cgAlts _ _ _ _ = panic "cgAlts" + -- UbxTupAlt and PolyAlt have only one alternative + +------------------- +cgAltRhss :: GcPlan -> Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] +cgAltRhss gc_plan bndr alts + = forkAlts (map cg_alt alts) + where + base_reg = idToReg bndr + cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph) + cg_alt (con, bndrs, _uses, rhs) + = getCodeR $ + maybeAltHeapCheck gc_plan $ + do { bindConArgs con base_reg bndrs + ; cgExpr rhs + ; return con } + +maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a +maybeAltHeapCheck NoGcInAlts code + = code +maybeAltHeapCheck (GcInAlts regs srt) code + = do { c_srt <- getSRTInfo srt + ; altHeapCheck regs c_srt code } + +----------------------------------------------------------------------------- +-- Tail calls +----------------------------------------------------------------------------- + +cgConApp :: DataCon -> [StgArg] -> FCode () +cgConApp con stg_args + = ASSERT( stg_args `lengthIs` dataConRepArity con ) + do { idinfo <- buildDynCon (dataConWorkId con) currentCCS con stg_args + -- The first "con" says that the name bound to this closure is + -- is "con", which is a bit of a fudge, but it only affects profiling + + ; emitReturn [idInfoToAmode idinfo] } + +cgIdApp :: Id -> [StgArg] -> FCode () +cgIdApp fun_id args + = do { fun_info <- getCgIdInfo fun_id + ; case maybeLetNoEscape fun_info of + Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args + Nothing -> cgTailCall fun_id fun_info args } + +cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode () +cgLneJump blk_id lne_regs args -- Join point; discard sequel + = do { cmm_args <- getNonVoidArgAmodes args + ; emit (mkMultiAssign lne_regs cmm_args + <*> mkBranch blk_id) } + +cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode () +cgTailCall fun_id fun_info args + = case (getCallMethod fun_name (idCafInfo fun_id) lf_info (length args)) of + + -- A value in WHNF, so we can just return it. + ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged? + + EnterIt -> ASSERT( null args ) -- Discarding arguments + do { [ret,call] <- forkAlts [ + getCode $ emitReturn [fun], -- Is tagged; no need to untag + getCode $ emitCall (entryCode fun) [fun]] -- Not tagged + ; emit (mkCmmIfThenElse (cmmIsTagged fun) ret call) } + + SlowCall -> do -- A slow function call via the RTS apply routines + { tickySlowCall lf_info args + ; slowCall fun args } + + -- A direct function call (possibly with some left-over arguments) + DirectEntry lbl arity -> do + { tickyDirectCall arity args + ; if node_points then + do call <- getCode $ directCall lbl arity args + emit (mkAssign nodeReg fun <*> call) + -- directCall lbl (arity+1) (StgVarArg fun_id : args)) + -- >>= (emit . (mkComment (mkFastString "DirectEntry") <*>)) + else directCall lbl arity args } + + JumpToIt {} -> panic "cgTailCall" -- ??? + + where + fun_name = idName fun_id + fun = idInfoToAmode fun_info + lf_info = cgIdInfoLF fun_info + node_points = nodeMustPointToIt lf_info + + + diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs new file mode 100644 index 0000000..2d5d79e --- /dev/null +++ b/compiler/codeGen/StgCmmForeign.hs @@ -0,0 +1,316 @@ +{-# OPTIONS -w #-} +-- Lots of missing type sigs etc + +----------------------------------------------------------------------------- +-- +-- Code generation for foreign calls. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmForeign ( + cgForeignCall, + emitPrimCall, emitCCall, + emitSaveThreadState, -- will be needed by the Cmm parser + emitLoadThreadState, -- ditto + emitCloseNursery, + emitOpenNursery, + ) where + +#include "HsVersions.h" + +import StgSyn +import StgCmmProf +import StgCmmEnv +import StgCmmMonad +import StgCmmUtils +import StgCmmClosure + +import MkZipCfgCmm +import Cmm +import CmmUtils +import Type +import TysPrim +import CLabel +import SMRep +import ForeignCall +import Constants +import StaticFlags +import Maybes +import Outputable + +import Control.Monad + +----------------------------------------------------------------------------- +-- Code generation for Foreign Calls +----------------------------------------------------------------------------- + +cgForeignCall :: [LocalReg] -- r1,r2 where to put the results + -> [ForeignHint] + -> ForeignCall -- the op + -> [StgArg] -- x,y arguments + -> FCode () +-- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z ) + +cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args + = do { cmm_args <- getFCallArgs stg_args + ; let (args, arg_hints) = unzip cmm_args + fc = ForeignConvention cconv arg_hints result_hints + (call_args, cmm_target) + = case target of + StaticTarget lbl -> (args, CmmLit (CmmLabel + (mkForeignLabel lbl (call_size args) False))) + DynamicTarget -> case args of fn:rest -> (rest, fn) + call_target = ForeignTarget cmm_target fc + + ; srt <- getSRTInfo (panic "emitForeignCall") -- SLPJ: Not sure what SRT + -- is right here + ; emitForeignCall safety results call_target call_args srt CmmMayReturn } + where + -- in the stdcall calling convention, the symbol needs @size appended + -- to it, where size is the total number of bytes of arguments. We + -- attach this info to the CLabel here, and the CLabel pretty printer + -- will generate the suffix when the label is printed. + call_size args + | StdCallConv <- cconv = Just (sum (map arg_size args)) + | otherwise = Nothing + + -- ToDo: this might not be correct for 64-bit API + arg_size arg = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE + +cgForeignCall _ _ (DNCall _) _ + = panic "cgForeignCall: DNCall" + +emitCCall :: [(CmmFormal,ForeignHint)] + -> CmmExpr + -> [(CmmActual,ForeignHint)] + -> FCode () +emitCCall hinted_results fn hinted_args + = emitForeignCall PlayRisky results (ForeignTarget fn fc) args + NoC_SRT -- No SRT b/c we PlayRisky + CmmMayReturn + where + (args, arg_hints) = unzip hinted_args + (results, result_hints) = unzip hinted_results + target = ForeignTarget fn fc + fc = ForeignConvention CCallConv arg_hints result_hints + + +emitPrimCall :: CmmFormal -> CallishMachOp -> CmmActuals -> FCode () +emitPrimCall res op args + = emitForeignCall PlayRisky [res] (PrimTarget op) args NoC_SRT CmmMayReturn + +-- alternative entry point, used by CmmParse +emitForeignCall + :: Safety + -> CmmFormals -- where to put the results + -> MidCallTarget -- the op + -> CmmActuals -- arguments + -> C_SRT -- the SRT of the calls continuation + -> CmmReturnInfo -- This can say "never returns" + -- only RTS procedures do this + -> FCode () +emitForeignCall safety results target args _srt _ret + | not (playSafe safety) = trace "emitForeignCall; ret is undone" $ do + let (caller_save, caller_load) = callerSaveVolatileRegs + emit caller_save + emit (mkUnsafeCall target results args) + emit caller_load + + | otherwise = panic "ToDo: emitForeignCall'" + +{- + | otherwise = do + -- Both 'id' and 'new_base' are KindNonPtr because they're + -- RTS only objects and are not subject to garbage collection + id <- newTemp bWord + new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) + temp_target <- load_target_into_temp target + let (caller_save, caller_load) = callerSaveVolatileRegs + emitSaveThreadState + emit caller_save + -- The CmmUnsafe arguments are only correct because this part + -- of the code hasn't been moved into the CPS pass yet. + -- Once that happens, this function will just emit a (CmmSafe srt) call, + -- and the CPS will will be the one to convert that + -- to this sequence of three CmmUnsafe calls. + emit (mkCmmCall (CmmCallee suspendThread CCallConv) + [ (id,AddrHint) ] + [ (CmmReg (CmmGlobal BaseReg), AddrHint) ] + CmmUnsafe + ret) + emit (mkCmmCall temp_target results args CmmUnsafe ret) + emit (mkCmmCall (CmmCallee resumeThread CCallConv) + [ (new_base, AddrHint) ] + [ (CmmReg (CmmLocal id), AddrHint) ] + CmmUnsafe + ret ) + -- Assign the result to BaseReg: we + -- might now have a different Capability! + emit (mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base))) + emit caller_load + emitLoadThreadState + +suspendThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("suspendThread"))) +resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("resumeThread"))) +-} + + +{- +-- THINK ABOUT THIS (used to happen) +-- we might need to load arguments into temporaries before +-- making the call, because certain global registers might +-- overlap with registers that the C calling convention uses +-- for passing arguments. +-- +-- This is a HACK; really it should be done in the back end, but +-- it's easier to generate the temporaries here. +load_args_into_temps = mapM arg_assign_temp + where arg_assign_temp (e,hint) = do + tmp <- maybe_assign_temp e + return (tmp,hint) + +load_target_into_temp (CmmCallee expr conv) = do + tmp <- maybe_assign_temp expr + return (CmmCallee tmp conv) +load_target_into_temp other_target = + return other_target + +maybe_assign_temp e + | hasNoGlobalRegs e = return e + | otherwise = do + -- don't use assignTemp, it uses its own notion of "trivial" + -- expressions, which are wrong here. + -- this is a NonPtr because it only duplicates an existing + reg <- newTemp (cmmExprType e) --TODO FIXME NOW + emit (mkAssign (CmmLocal reg) e) + return (CmmReg (CmmLocal reg)) +-} + +-- ----------------------------------------------------------------------------- +-- Save/restore the thread state in the TSO + +-- This stuff can't be done in suspendThread/resumeThread, because it +-- refers to global registers which aren't available in the C world. + +emitSaveThreadState :: FCode () +emitSaveThreadState = do + -- CurrentTSO->sp = Sp; + emit $ mkStore (cmmOffset stgCurrentTSO tso_SP) stgSp + emitCloseNursery + -- and save the current cost centre stack in the TSO when profiling: + when opt_SccProfilingOn $ + emit (mkStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS) + + -- CurrentNursery->free = Hp+1; +emitCloseNursery :: FCode () +emitCloseNursery = emit $ mkStore nursery_bdescr_free (cmmOffsetW stgHp 1) + +emitLoadThreadState :: FCode () +emitLoadThreadState = do + tso <- newTemp gcWord -- TODO FIXME NOW + emit $ catAGraphs [ + -- tso = CurrentTSO; + mkAssign (CmmLocal tso) stgCurrentTSO, + -- Sp = tso->sp; + mkAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_SP) + bWord), + -- SpLim = tso->stack + RESERVED_STACK_WORDS; + mkAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal tso)) tso_STACK) + rESERVED_STACK_WORDS) + ] + emitOpenNursery + -- and load the current cost centre stack from the TSO when profiling: + when opt_SccProfilingOn $ + emit (mkStore curCCSAddr + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType)) + +emitOpenNursery :: FCode () +emitOpenNursery = emit $ catAGraphs [ + -- Hp = CurrentNursery->free - 1; + mkAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free bWord) (-1)), + + -- HpLim = CurrentNursery->start + + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; + mkAssign hpLim + (cmmOffsetExpr + (CmmLoad nursery_bdescr_start bWord) + (cmmOffset + (CmmMachOp mo_wordMul [ + CmmMachOp (MO_SS_Conv W32 wordWidth) + [CmmLoad nursery_bdescr_blocks b32], + CmmLit (mkIntCLit bLOCK_SIZE) + ]) + (-1) + ) + ) + ] + + +nursery_bdescr_free = cmmOffset stgCurrentNursery oFFSET_bdescr_free +nursery_bdescr_start = cmmOffset stgCurrentNursery oFFSET_bdescr_start +nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks + +tso_SP = tsoFieldB oFFSET_StgTSO_sp +tso_STACK = tsoFieldB oFFSET_StgTSO_stack +tso_CCCS = tsoProfFieldB oFFSET_StgTSO_CCCS + +-- The TSO struct has a variable header, and an optional StgTSOProfInfo in +-- the middle. The fields we're interested in are after the StgTSOProfInfo. +tsoFieldB :: ByteOff -> ByteOff +tsoFieldB off + | opt_SccProfilingOn = off + sIZEOF_StgTSOProfInfo + fixedHdrSize * wORD_SIZE + | otherwise = off + fixedHdrSize * wORD_SIZE + +tsoProfFieldB :: ByteOff -> ByteOff +tsoProfFieldB off = off + fixedHdrSize * wORD_SIZE + +stgSp = CmmReg sp +stgHp = CmmReg hp +stgCurrentTSO = CmmReg currentTSO +stgCurrentNursery = CmmReg currentNursery + +sp = CmmGlobal Sp +spLim = CmmGlobal SpLim +hp = CmmGlobal Hp +hpLim = CmmGlobal HpLim +currentTSO = CmmGlobal CurrentTSO +currentNursery = CmmGlobal CurrentNursery + +-- ----------------------------------------------------------------------------- +-- For certain types passed to foreign calls, we adjust the actual +-- value passed to the call. For ByteArray#/Array# we pass the +-- address of the actual array, not the address of the heap object. + +getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)] +-- (a) Drop void args +-- (b) Add foriegn-call shim code +-- It's (b) that makes this differ from getNonVoidArgAmodes + +getFCallArgs args + = do { mb_cmms <- mapM get args + ; return (catMaybes mb_cmms) } + where + get arg | isVoidRep arg_rep + = return Nothing + | otherwise + = do { cmm <- getArgAmode arg + ; return (Just (add_shim arg_ty cmm, hint)) } + where + arg_ty = stgArgType arg + arg_rep = typePrimRep arg_ty + hint = typeForeignHint arg_ty + +add_shim :: Type -> CmmExpr -> CmmExpr +add_shim arg_ty expr + | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon + = cmmOffsetB expr arrPtrsHdrSize + + | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon + = cmmOffsetB expr arrWordsHdrSize + + | otherwise = expr + where + tycon = tyConAppTyCon (repType arg_ty) + -- should be a tycon app, since this is a foreign call diff --git a/compiler/codeGen/StgCmmGran.hs b/compiler/codeGen/StgCmmGran.hs new file mode 100644 index 0000000..5fad2bf --- /dev/null +++ b/compiler/codeGen/StgCmmGran.hs @@ -0,0 +1,131 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow -2006 +-- +-- Code generation relaed to GpH +-- (a) parallel +-- (b) GranSim +-- +----------------------------------------------------------------------------- + +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module StgCmmGran ( + staticGranHdr,staticParHdr, + granThunk, granYield, + doGranAllocate + ) where + +-- This entire module consists of no-op stubs at the moment +-- GranSim worked once, but it certainly doesn't any more +-- I've left the calls, though, in case anyone wants to resurrect it + +import StgCmmMonad +import Id +import Cmm + +staticGranHdr :: [CmmLit] +staticGranHdr = [] + +staticParHdr :: [CmmLit] +staticParHdr = [] + +doGranAllocate :: VirtualHpOffset -> FCode () +-- Must be lazy in the amount of allocation +doGranAllocate n = return () + +granFetchAndReschedule :: [(Id,GlobalReg)] -> Bool -> FCode () +granFetchAndReschedule regs node_reqd = return () + +granYield :: [LocalReg] -> Bool -> FCode () +granYield regs node_reqd = return () + +granThunk :: Bool -> FCode () +granThunk node_points = return () + +----------------------------------------------------------------- +{- ------- Everything below here is commented out ------------- +----------------------------------------------------------------- + +-- Parallel header words in a static closure +staticParHdr :: [CmmLit] +-- Parallel header words in a static closure +staticParHdr = [] + +staticGranHdr :: [CmmLit] +-- Gransim header words in a static closure +staticGranHdr = [] + +doGranAllocate :: CmmExpr -> Code +-- macro DO_GRAN_ALLOCATE +doGranAllocate hp + | not opt_GranMacros = nopC + | otherwise = panic "doGranAllocate" + + + +------------------------- +granThunk :: Bool -> FCode () +-- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node +-- (we prefer fetchAndReschedule-style context switches to yield ones) +granThunk node_points + | node_points = granFetchAndReschedule [] node_points + | otherwise = granYield [] node_points + +granFetchAndReschedule :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code +-- Emit code for simulating a fetch and then reschedule. +granFetchAndReschedule regs node_reqd + | opt_GranMacros && (node `elem` map snd regs || node_reqd) + = do { fetch + ; reschedule liveness node_reqd } + | otherwise + = nopC + where + liveness = mkRegLiveness regs 0 0 + +fetch = panic "granFetch" + -- Was: absC (CMacroStmt GRAN_FETCH []) + --HWL: generate GRAN_FETCH macro for GrAnSim + -- currently GRAN_FETCH and GRAN_FETCH_AND_RESCHEDULE are miai + +reschedule liveness node_reqd = panic "granReschedule" + -- Was: absC (CMacroStmt GRAN_RESCHEDULE [ + -- mkIntCLit (I# (word2Int# liveness_mask)), + -- mkIntCLit (if node_reqd then 1 else 0)]) + + +------------------------- +-- The @GRAN_YIELD@ macro is taken from JSM's code for Concurrent Haskell. It +-- allows to context-switch at places where @node@ is not alive (it uses the +-- @Continue@ rather than the @EnterNodeCode@ function in the RTS). We emit +-- this kind of macro at the beginning of the following kinds of basic bocks: +-- \begin{itemize} +-- \item Slow entry code where node is not alive (see @CgClosure.lhs@). Normally +-- we use @fetchAndReschedule@ at a slow entry code. +-- \item Fast entry code (see @CgClosure.lhs@). +-- \item Alternatives in case expressions (@CLabelledCode@ structures), provided +-- that they are not inlined (see @CgCases.lhs@). These alternatives will +-- be turned into separate functions. + +granYield :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code + +granYield regs node_reqd + | opt_GranMacros && node_reqd = yield liveness + | otherwise = nopC + where + liveness = mkRegLiveness regs 0 0 + +yield liveness = panic "granYield" + -- Was : absC (CMacroStmt GRAN_YIELD + -- [mkIntCLit (I# (word2Int# liveness_mask))]) + +-} diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs new file mode 100644 index 0000000..6a8a435 --- /dev/null +++ b/compiler/codeGen/StgCmmHeap.hs @@ -0,0 +1,519 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C--: heap management functions +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmHeap ( + getVirtHp, setVirtHp, setRealHp, + getHpRelOffset, hpRel, + + entryHeapCheck, altHeapCheck, + + layOutDynConstr, layOutStaticConstr, + mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure, + + allocDynClosure, emitSetDynHdr + ) where + +#include "HsVersions.h" + +import StgSyn +import CLabel +import StgCmmLayout +import StgCmmUtils +import StgCmmMonad +import StgCmmProf +import StgCmmTicky +import StgCmmGran +import StgCmmClosure +import StgCmmEnv + +import MkZipCfgCmm + +import SMRep +import CmmExpr +import CmmUtils +import DataCon +import TyCon +import CostCentre +import Outputable +import FastString( LitString, mkFastString, sLit ) +import Constants +import Data.List + + +----------------------------------------------------------- +-- Layout of heap objects +----------------------------------------------------------- + +layOutDynConstr, layOutStaticConstr + :: DataCon -> [(PrimRep, a)] + -> (ClosureInfo, [(a, VirtualHpOffset)]) +-- No Void arguments in result + +layOutDynConstr = layOutConstr False +layOutStaticConstr = layOutConstr True + +layOutConstr :: Bool -> DataCon -> [(PrimRep, a)] + -> (ClosureInfo, [(a, VirtualHpOffset)]) +layOutConstr is_static data_con args + = (mkConInfo is_static data_con tot_wds ptr_wds, + things_w_offsets) + where + (tot_wds, -- #ptr_wds + #nonptr_wds + ptr_wds, -- #ptr_wds + things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args + + +----------------------------------------------------------- +-- Initialise dynamic heap objects +----------------------------------------------------------- + +allocDynClosure + :: ClosureInfo + -> CmmExpr -- Cost Centre to stick in the object + -> CmmExpr -- Cost Centre to blame for this alloc + -- (usually the same; sometimes "OVERHEAD") + + -> [(StgArg, VirtualHpOffset)] -- Offsets from start of the object + -- ie Info ptr has offset zero. + -- No void args in here + -> FCode LocalReg + +-- allocDynClosure allocates the thing in the heap, +-- and modifies the virtual Hp to account for this. + +-- Note [Return a LocalReg] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr. +-- Reason: +-- ...allocate object... +-- obj = Hp + 8 +-- y = f(z) +-- ...here obj is still valid, +-- but Hp+8 means something quite different... + + +allocDynClosure cl_info use_cc _blame_cc args_w_offsets + = do { virt_hp <- getVirtHp + + -- SAY WHAT WE ARE ABOUT TO DO + ; tickyDynAlloc cl_info + ; profDynAlloc cl_info use_cc + -- ToDo: This is almost certainly wrong + -- We're ignoring blame_cc. But until we've + -- fixed the boxing hack in chooseDynCostCentres etc, + -- we're worried about making things worse by "fixing" + -- this part to use blame_cc! + + -- FIND THE OFFSET OF THE INFO-PTR WORD + ; let info_offset = virt_hp + 1 + -- info_offset is the VirtualHpOffset of the first + -- word of the new object + -- Remember, virtHp points to last allocated word, + -- ie 1 *before* the info-ptr word of new object. + + info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info)) + + -- ALLOCATE THE OBJECT + ; base <- getHpRelOffset info_offset + ; emit (mkComment $ mkFastString "allocDynClosure") + ; emitSetDynHdr base info_ptr use_cc + ; let (args, offsets) = unzip args_w_offsets + ; cmm_args <- mapM getArgAmode args -- No void args + ; hpStore base cmm_args offsets + + -- BUMP THE VIRTUAL HEAP POINTER + ; setVirtHp (virt_hp + closureSize cl_info) + + -- Assign to a temporary and return + -- Note [Return a LocalReg] + ; hp_rel <- getHpRelOffset info_offset + ; assignTemp hp_rel } + +emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +emitSetDynHdr base info_ptr ccs + = hpStore base header [0..] + where + header :: [CmmExpr] + header = [info_ptr] ++ dynProfHdr ccs + -- ToDo: Gransim stuff + -- ToDo: Parallel stuff + -- No ticky header + +hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode () +-- Store the item (expr,off) in base[off] +hpStore base vals offs + = emit (catAGraphs (zipWith mk_store vals offs)) + where + mk_store val off = mkStore (cmmOffsetW base off) val + + +----------------------------------------------------------- +-- Layout of static closures +----------------------------------------------------------- + +-- Make a static closure, adding on any extra padding needed for CAFs, +-- and adding a static link field if necessary. + +mkStaticClosureFields + :: ClosureInfo + -> CostCentreStack + -> Bool -- Has CAF refs + -> [CmmLit] -- Payload + -> [CmmLit] -- The full closure +mkStaticClosureFields cl_info ccs caf_refs payload + = mkStaticClosure info_lbl ccs payload padding_wds + static_link_field saved_info_field + where + info_lbl = infoTableLabelFromCI cl_info + + -- CAFs must have consistent layout, regardless of whether they + -- are actually updatable or not. The layout of a CAF is: + -- + -- 3 saved_info + -- 2 static_link + -- 1 indirectee + -- 0 info ptr + -- + -- the static_link and saved_info fields must always be in the same + -- place. So we use closureNeedsUpdSpace rather than + -- closureUpdReqd here: + + is_caf = closureNeedsUpdSpace cl_info + + padding_wds + | not is_caf = [] + | otherwise = ASSERT(null payload) [mkIntCLit 0] + + static_link_field + | is_caf || staticClosureNeedsLink cl_info = [static_link_value] + | otherwise = [] + + saved_info_field + | is_caf = [mkIntCLit 0] + | otherwise = [] + + -- for a static constructor which has NoCafRefs, we set the + -- static link field to a non-zero value so the garbage + -- collector will ignore it. + static_link_value + | caf_refs = mkIntCLit 0 + | otherwise = mkIntCLit 1 + + +mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit] + -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] +mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field + = [CmmLabel info_lbl] + ++ variable_header_words + ++ payload + ++ padding_wds + ++ static_link_field + ++ saved_info_field + where + variable_header_words + = staticGranHdr + ++ staticParHdr + ++ staticProfHdr ccs + ++ staticTickyHdr + +----------------------------------------------------------- +-- Heap overflow checking +----------------------------------------------------------- + +{- Note [Heap checks] + ~~~~~~~~~~~~~~~~~~ +Heap checks come in various forms. We provide the following entry +points to the runtime system, all of which use the native C-- entry +convention. + + * gc() performs garbage collection and returns + nothing to its caller + + * A series of canned entry points like + r = gc_1p( r ) + where r is a pointer. This performs gc, and + then returns its argument r to its caller. + + * A series of canned entry points like + gcfun_2p( f, x, y ) + where f is a function closure of arity 2 + This performs garbage collection, keeping alive the + three argument ptrs, and then tail-calls f(x,y) + +These are used in the following circumstances + +* entryHeapCheck: Function entry + (a) With a canned GC entry sequence + f( f_clo, x:ptr, y:ptr ) { + Hp = Hp+8 + if Hp > HpLim goto L + ... + L: HpAlloc = 8 + jump gcfun_2p( f_clo, x, y ) } + Note the tail call to the garbage collector; + it should do no register shuffling + + (b) No canned sequence + f( f_clo, x:ptr, y:ptr, ...etc... ) { + T: Hp = Hp+8 + if Hp > HpLim goto L + ... + L: HpAlloc = 8 + call gc() -- Needs an info table + goto T } + +* altHeapCheck: Immediately following an eval + Started as + case f x y of r { (p,q) -> rhs } + (a) With a canned sequence for the results of f + (which is the very common case since + all boxed cases return just one pointer + ... + r = f( x, y ) + K: -- K needs an info table + Hp = Hp+8 + if Hp > HpLim goto L + ...code for rhs... + + L: r = gc_1p( r ) + goto K } + + Here, the info table needed by the call + to gc_1p should be the *same* as the + one for the call to f; the C-- optimiser + spots this sharing opportunity + + (b) No canned sequence for results of f + Note second info table + ... + (r1,r2,r3) = call f( x, y ) + K: + Hp = Hp+8 + if Hp > HpLim goto L + ...code for rhs... + + L: call gc() -- Extra info table here + goto K + +* generalHeapCheck: Anywhere else + e.g. entry to thunk + case branch *not* following eval, + or let-no-escape + Exactly the same as the previous case: + + K: -- K needs an info table + Hp = Hp+8 + if Hp > HpLim goto L + ... + + L: call gc() + goto K +-} + +-------------------------------------------------------------- +-- A heap/stack check at a function or thunk entry point. + +entryHeapCheck :: LocalReg -- Function + -> [LocalReg] -- Args (empty for thunk) + -> C_SRT + -> FCode () + -> FCode () + +entryHeapCheck fun args srt code + = heapCheck gc_call code -- The 'fun' keeps relevant CAFs alive + where + gc_call + | null args = mkJump (CmmReg (CmmGlobal GCEnter1)) [CmmReg (CmmLocal fun)] + | otherwise = case gc_lbl args of + Just lbl -> mkJump (CmmLit (CmmLabel (mkRtsCodeLabel lbl))) + (map (CmmReg . CmmLocal) (fun:args)) + Nothing -> mkCmmCall generic_gc [] [] srt + + gc_lbl :: [LocalReg] -> Maybe LitString + gc_lbl [reg] + | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p" + | isFloatType ty = case width of + W32 -> Just (sLit "stg_gc_f1") -- "stg_gc_fun_f1" + W64 -> Just (sLit "stg_gc_d1") -- "stg_gc_fun_d1" + _other -> Nothing + | otherwise = case width of + W32 -> Just (sLit "stg_gc_unbx_r1") -- "stg_gc_fun_unbx_r1" + W64 -> Just (sLit "stg_gc_l1") -- "stg_gc_fun_unbx_l1" + _other -> Nothing -- Narrow cases + where + ty = localRegType reg + width = typeWidth ty + + gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs) + + gc_lbl_ptrs :: [Bool] -> Maybe LitString + -- JD: TEMPORARY -- UNTIL THOSE FUNCTIONS EXIST... + --gc_lbl_ptrs [True,True] = Just (sLit "stg_gc_fun_2p") + --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p") + gc_lbl_ptrs _ = Nothing + + +altHeapCheck :: [LocalReg] -> C_SRT -> FCode a -> FCode a +altHeapCheck regs srt code + = heapCheck gc_call code + where + gc_call + | null regs = mkCmmCall generic_gc [] [] srt + + | Just gc_lbl <- rts_label regs -- Canned call + = mkCmmCall (CmmLit (CmmLabel (mkRtsCodeLabel gc_lbl))) + regs + (map (CmmReg . CmmLocal) regs) + srt + | otherwise -- No canned call, and non-empty live vars + = mkCmmCall generic_gc [] [] srt + + rts_label [reg] + | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") + | isFloatType ty = case width of + W32 -> Just (sLit "stg_gc_f1") + W64 -> Just (sLit "stg_gc_d1") + _other -> Nothing + | otherwise = case width of + W32 -> Just (sLit "stg_gc_unbx_r1") + W64 -> Just (sLit "stg_gc_unbx_l1") + _other -> Nothing -- Narrow cases + where + ty = localRegType reg + width = typeWidth ty + + rts_label _ = Nothing + + +generic_gc :: CmmExpr -- The generic GC procedure; no params, no resuls +generic_gc = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_fun"))) + +------------------------------- +heapCheck :: CmmAGraph -> FCode a -> FCode a +heapCheck do_gc code + = getHeapUsage $ \ hpHw -> + do { emit (do_checks hpHw do_gc) + -- Emit heap checks, but be sure to do it lazily so + -- that the conditionals on hpHw don't cause a black hole + ; tickyAllocHeap hpHw + ; doGranAllocate hpHw + ; setRealHp hpHw + ; code } + +do_checks :: WordOff -- Heap headroom + -> CmmAGraph -- What to do on failure + -> CmmAGraph +do_checks 0 _ + = mkNop +do_checks alloc do_gc + = withFreshLabel "gc" $ \ blk_id -> + mkLabel blk_id Nothing + <*> mkAssign hpReg bump_hp + <*> mkCmmIfThen hp_oflo + (save_alloc + <*> do_gc + <*> mkBranch blk_id) + -- Bump heap pointer, and test for heap exhaustion + -- Note that we don't move the heap pointer unless the + -- stack check succeeds. Otherwise we might end up + -- with slop at the end of the current block, which can + -- confuse the LDV profiler. + where + alloc_lit = CmmLit (mkIntCLit (alloc*wORD_SIZE)) -- Bytes + bump_hp = cmmOffsetExprB (CmmReg hpReg) alloc_lit + + -- Hp overflow if (Hp > HpLim) + -- (Hp has been incremented by now) + -- HpLim points to the LAST WORD of valid allocation space. + hp_oflo = CmmMachOp mo_wordUGt + [CmmReg hpReg, CmmReg (CmmGlobal HpLim)] + + save_alloc = mkAssign (CmmGlobal HpAlloc) alloc_lit + +{- + +{- Unboxed tuple alternatives and let-no-escapes (the two most annoying +constructs to generate code for!) For unboxed tuple returns, there +are an arbitrary number of possibly unboxed return values, some of +which will be in registers, and the others will be on the stack. We +always organise the stack-resident fields into pointers & +non-pointers, and pass the number of each to the heap check code. -} + +unbxTupleHeapCheck + :: [(Id, GlobalReg)] -- Live registers + -> WordOff -- no. of stack slots containing ptrs + -> WordOff -- no. of stack slots containing nonptrs + -> CmmAGraph -- code to insert in the failure path + -> FCode () + -> FCode () + +unbxTupleHeapCheck regs ptrs nptrs fail_code code + -- We can't manage more than 255 pointers/non-pointers + -- in a generic heap check. + | ptrs > 255 || nptrs > 255 = panic "altHeapCheck" + | otherwise + = initHeapUsage $ \ hpHw -> do + { codeOnly $ do { do_checks 0 {- no stack check -} hpHw + full_fail_code rts_label + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } + where + full_fail_code = fail_code `plusStmts` oneStmt assign_liveness + assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho! + (CmmLit (mkWordCLit liveness)) + liveness = mkRegLiveness regs ptrs nptrs + rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut"))) + + +{- Old Gransim comment -- I have no idea whether it still makes sense (SLPJ Sep07) +For GrAnSim the code for doing a heap check and doing a context switch +has been separated. Especially, the HEAP_CHK macro only performs a +heap check. THREAD_CONTEXT_SWITCH should be used for doing a context +switch. GRAN_FETCH_AND_RESCHEDULE must be put at the beginning of +every slow entry code in order to simulate the fetching of +closures. If fetching is necessary (i.e. current closure is not local) +then an automatic context switch is done. -} + + +When failing a check, we save a return address on the stack and +jump to a pre-compiled code fragment that saves the live registers +and returns to the scheduler. + +The return address in most cases will be the beginning of the basic +block in which the check resides, since we need to perform the check +again on re-entry because someone else might have stolen the resource +in the meantime. + +%************************************************************************ +%* * + Generic Heap/Stack Checks - used in the RTS +%* * +%************************************************************************ + +\begin{code} +hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +hpChkGen bytes liveness reentry + = do_checks' bytes True assigns stg_gc_gen + where + assigns = mkStmts [ + CmmAssign (CmmGlobal (VanillaReg 9)) liveness, + CmmAssign (CmmGlobal (VanillaReg 10)) reentry + ] + +-- a heap check where R1 points to the closure to enter on return, and +-- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP). +hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> FCode () +hpChkNodePointsAssignSp0 bytes sp0 + = do_checks' bytes True assign stg_gc_enter1 + where assign = oneStmt (CmmStore (CmmReg spReg) sp0) + +stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen"))) +\end{code} + +-} diff --git a/compiler/codeGen/StgCmmHpc.hs b/compiler/codeGen/StgCmmHpc.hs new file mode 100644 index 0000000..0205bd0 --- /dev/null +++ b/compiler/codeGen/StgCmmHpc.hs @@ -0,0 +1,83 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for coverage +-- +-- (c) Galois Connections, Inc. 2006 +-- +----------------------------------------------------------------------------- + +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module StgCmmHpc ( initHpc, mkTickBox ) where + +import StgCmmUtils +import StgCmmMonad +import StgCmmForeign +import StgCmmClosure + +import MkZipCfgCmm +import Cmm +import CLabel +import Module +import CmmUtils +import ForeignCall +import FastString +import HscTypes +import Char +import StaticFlags +import PackageConfig + +mkTickBox :: Module -> Int -> CmmAGraph +mkTickBox mod n + = mkStore tick_box (CmmMachOp (MO_Add W64) + [ CmmLoad tick_box b64 + , CmmLit (CmmInt 1 W64) + ]) + where + tick_box = cmmIndex W64 + (CmmLit $ CmmLabel $ mkHpcTicksLabel $ mod) + (fromIntegral n) + +initHpc :: Module -> HpcInfo -> FCode CmmAGraph +-- Emit top-level tables for HPC and return code to initialise +initHpc this_mod (NoHpcInfo {}) + = return mkNop +initHpc this_mod (HpcInfo tickCount hashNo) + = getCode $ whenC opt_Hpc $ + do { emitData ReadOnlyData + [ CmmDataLabel mkHpcModuleNameLabel + , CmmString $ map (fromIntegral . ord) + (full_name_str) + ++ [0] + ] + ; emitData Data $ [ CmmDataLabel (mkHpcTicksLabel this_mod) + ] ++ + [ CmmStaticLit (CmmInt 0 W64) + | _ <- take tickCount [0::Int ..] + ] + + ; id <- newTemp bWord -- TODO FIXME NOW + ; emitCCall + [(id,NoHint)] + (CmmLit $ CmmLabel $ mkForeignLabel mod_alloc Nothing False) + [ (mkLblExpr mkHpcModuleNameLabel,AddrHint) + , (CmmLit $ mkIntCLit tickCount,NoHint) + , (CmmLit $ mkIntCLit hashNo,NoHint) + , (CmmLit $ CmmLabel $ mkHpcTicksLabel $ this_mod,AddrHint) + ] + } + where + mod_alloc = mkFastString "hs_hpc_module" + module_name_str = moduleNameString (Module.moduleName this_mod) + full_name_str = if modulePackageId this_mod == mainPackageId + then module_name_str + else packageIdString (modulePackageId this_mod) ++ "/" ++ + module_name_str + + + diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs new file mode 100644 index 0000000..f8d3964 --- /dev/null +++ b/compiler/codeGen/StgCmmLayout.hs @@ -0,0 +1,618 @@ +----------------------------------------------------------------------------- +-- +-- Building info tables. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +{-# OPTIONS #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module StgCmmLayout ( + mkArgDescr, + emitCall, emitReturn, + + emitClosureCodeAndInfoTable, + + slowCall, directCall, + + mkVirtHeapOffsets, getHpRelOffset, hpRel, + + stdInfoTableSizeB, + entryCode, closureInfoPtr, + getConstrTag, + cmmGetClosureType, + infoTable, infoTableClosureType, + infoTablePtrs, infoTableNonPtrs, + funInfoTable, makeRelativeRefTo + ) where + + +#include "HsVersions.h" + +import StgCmmClosure +import StgCmmEnv +import StgCmmTicky +import StgCmmUtils +import StgCmmMonad + +import MkZipCfgCmm +import SMRep +import CmmUtils +import Cmm +import CLabel +import StgSyn +import Id +import Name +import TyCon ( PrimRep(..) ) +import Unique +import BasicTypes ( Arity ) +import StaticFlags + +import Bitmap +import Data.Bits + +import Maybes +import Constants +import Util +import Data.List +import Outputable +import FastString ( LitString, sLit ) + +------------------------------------------------------------------------ +-- Call and return sequences +------------------------------------------------------------------------ + +emitReturn :: [CmmExpr] -> FCode () +-- Return multiple values to the sequel +-- +-- If the sequel is Return +-- return (x,y) +-- If the sequel is AssignTo [p,q] +-- p=x; q=y; +emitReturn results + = do { adjustHpBackwards + ; sequel <- getSequel; + ; case sequel of + Return _ -> emit (mkReturn results) + AssignTo regs _ -> emit (mkMultiAssign regs results) + } + +emitCall :: CmmExpr -> [CmmExpr] -> FCode () +-- (cgCall fun args) makes a call to the entry-code of 'fun', +-- passing 'args', and returning the results to the current sequel +emitCall fun args + = do { adjustHpBackwards + ; sequel <- getSequel; + ; case sequel of + Return _ -> emit (mkJump fun args) + AssignTo res_regs srt -> emit (mkCmmCall fun res_regs args srt) + } + +adjustHpBackwards :: FCode () +-- This function adjusts and heap pointers just before a tail call or +-- return. At a call or return, the virtual heap pointer may be less +-- than the real Hp, because the latter was advanced to deal with +-- the worst-case branch of the code, and we may be in a better-case +-- branch. In that case, move the real Hp *back* and retract some +-- ticky allocation count. +-- +-- It *does not* deal with high-water-mark adjustment. +-- That's done by functions which allocate heap. +adjustHpBackwards + = do { hp_usg <- getHpUsage + ; let rHp = realHp hp_usg + vHp = virtHp hp_usg + adjust_words = vHp -rHp + ; new_hp <- getHpRelOffset vHp + + ; emit (if adjust_words == 0 + then mkNop + else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp + + ; tickyAllocHeap adjust_words -- ...ditto + + ; setRealHp vHp + } + + +------------------------------------------------------------------------- +-- Making calls: directCall and slowCall +------------------------------------------------------------------------- + +directCall :: CLabel -> Arity -> [StgArg] -> FCode () +-- (directCall f n args) +-- calls f(arg1, ..., argn), and applies the result to the remaining args +-- The function f has arity n, and there are guaranteed at least n args +-- Both arity and args include void args +directCall lbl arity stg_args + = do { cmm_args <- getNonVoidArgAmodes stg_args + ; direct_call lbl arity cmm_args (argsLReps stg_args) } + +slowCall :: CmmExpr -> [StgArg] -> FCode () +-- (slowCall fun args) applies fun to args, returning the results to Sequel +slowCall fun stg_args + = do { cmm_args <- getNonVoidArgAmodes stg_args + ; slow_call fun cmm_args (argsLReps stg_args) } + +-------------- +direct_call :: CLabel -> Arity -> [CmmExpr] -> [LRep] -> FCode () +-- NB1: (length args) maybe less than (length reps), because +-- the args exclude the void ones +-- NB2: 'arity' refers to the *reps* +direct_call lbl arity args reps + | null rest_args + = ASSERT( arity == length args) + emitCall target args + + | otherwise + = ASSERT( arity == length initial_reps ) + do { pap_id <- newTemp gcWord + ; let srt = pprTrace "Urk! SRT for over-sat call" + (ppr lbl) NoC_SRT + -- XXX: what if rest_args contains static refs? + ; withSequel (AssignTo [pap_id] srt) + (emitCall target args) + ; slow_call (CmmReg (CmmLocal pap_id)) + rest_args rest_reps } + where + target = CmmLit (CmmLabel lbl) + (initial_reps, rest_reps) = splitAt arity reps + arg_arity = count isNonV initial_reps + (_, rest_args) = splitAt arg_arity args + +-------------- +slow_call :: CmmExpr -> [CmmExpr] -> [LRep] -> FCode () +slow_call fun args reps + = direct_call (mkRtsApFastLabel rts_fun) (arity+1) + (fun : args) (P : reps) + where + (rts_fun, arity) = slowCallPattern reps + +-- These cases were found to cover about 99% of all slow calls: +slowCallPattern :: [LRep] -> (LitString, Arity) +-- Returns the generic apply function and arity +slowCallPattern (P: P: P: P: P: P: _) = (sLit "stg_ap_pppppp", 6) +slowCallPattern (P: P: P: P: P: _) = (sLit "stg_ap_ppppp", 5) +slowCallPattern (P: P: P: P: _) = (sLit "stg_ap_pppp", 4) +slowCallPattern (P: P: P: V: _) = (sLit "stg_ap_pppv", 4) +slowCallPattern (P: P: P: _) = (sLit "stg_ap_ppp", 3) +slowCallPattern (P: P: V: _) = (sLit "stg_ap_ppv", 3) +slowCallPattern (P: P: _) = (sLit "stg_ap_pp", 2) +slowCallPattern (P: V: _) = (sLit "stg_ap_pv", 2) +slowCallPattern (P: _) = (sLit "stg_ap_p", 1) +slowCallPattern (V: _) = (sLit "stg_ap_v", 1) +slowCallPattern (N: _) = (sLit "stg_ap_n", 1) +slowCallPattern (F: _) = (sLit "stg_ap_f", 1) +slowCallPattern (D: _) = (sLit "stg_ap_d", 1) +slowCallPattern (L: _) = (sLit "stg_ap_l", 1) +slowCallPattern [] = (sLit "stg_ap_0", 0) + + +------------------------------------------------------------------------- +-- Classifying arguments: LRep +------------------------------------------------------------------------- + +-- LRep is not exported (even abstractly) +-- It's a local helper type for classification + +data LRep = P -- GC Ptr + | N -- One-word non-ptr + | L -- Two-word non-ptr (long) + | V -- Void + | F -- Float + | D -- Double + +toLRep :: PrimRep -> LRep +toLRep VoidRep = V +toLRep PtrRep = P +toLRep IntRep = N +toLRep WordRep = N +toLRep AddrRep = N +toLRep Int64Rep = L +toLRep Word64Rep = L +toLRep FloatRep = F +toLRep DoubleRep = D + +isNonV :: LRep -> Bool +isNonV V = False +isNonV _ = True + +argsLReps :: [StgArg] -> [LRep] +argsLReps = map (toLRep . argPrimRep) + +lRepSizeW :: LRep -> WordOff -- Size in words +lRepSizeW N = 1 +lRepSizeW P = 1 +lRepSizeW F = 1 +lRepSizeW L = wORD64_SIZE `quot` wORD_SIZE +lRepSizeW D = dOUBLE_SIZE `quot` wORD_SIZE +lRepSizeW V = 0 + +------------------------------------------------------------------------- +---- Laying out objects on the heap and stack +------------------------------------------------------------------------- + +-- The heap always grows upwards, so hpRel is easy +hpRel :: VirtualHpOffset -- virtual offset of Hp + -> VirtualHpOffset -- virtual offset of The Thing + -> WordOff -- integer word offset +hpRel hp off = off - hp + +getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr +getHpRelOffset virtual_offset + = do { hp_usg <- getHpUsage + ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) } + +mkVirtHeapOffsets + :: Bool -- True <=> is a thunk + -> [(PrimRep,a)] -- Things to make offsets for + -> (WordOff, -- _Total_ number of words allocated + WordOff, -- Number of words allocated for *pointers* + [(a, VirtualHpOffset)]) + +-- Things with their offsets from start of object in order of +-- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER +-- First in list gets lowest offset, which is initial offset + 1. +-- +-- Void arguments are removed, so output list may be shorter than +-- input list +-- +-- mkVirtHeapOffsets always returns boxed things with smaller offsets +-- than the unboxed things + +mkVirtHeapOffsets is_thunk things + = let non_void_things = filterOut (isVoidRep . fst) things + (ptrs, non_ptrs) = partition (isGcPtrRep . fst) non_void_things + (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs + (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs + in + (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets) + where + hdr_size | is_thunk = thunkHdrSize + | otherwise = fixedHdrSize + + computeOffset wds_so_far (rep, thing) + = (wds_so_far + lRepSizeW (toLRep rep), + (thing, hdr_size + wds_so_far)) + + +------------------------------------------------------------------------- +-- +-- Making argument descriptors +-- +-- An argument descriptor describes the layout of args on the stack, +-- both for * GC (stack-layout) purposes, and +-- * saving/restoring registers when a heap-check fails +-- +-- Void arguments aren't important, therefore (contrast constructSlowCall) +-- +------------------------------------------------------------------------- + +-- bring in ARG_P, ARG_N, etc. +#include "../includes/StgFun.h" + +------------------------- +-- argDescrType :: ArgDescr -> StgHalfWord +-- -- The "argument type" RTS field type +-- argDescrType (ArgSpec n) = n +-- argDescrType (ArgGen liveness) +-- | isBigLiveness liveness = ARG_GEN_BIG +-- | otherwise = ARG_GEN + + +mkArgDescr :: Name -> [Id] -> FCode ArgDescr +mkArgDescr nm args + = case stdPattern arg_reps of + Just spec_id -> return (ArgSpec spec_id) + Nothing -> do { liveness <- mkLiveness nm size bitmap + ; return (ArgGen liveness) } + where + arg_reps = filter isNonV (map (toLRep . idPrimRep) args) + -- Getting rid of voids eases matching of standard patterns + + bitmap = mkBitmap arg_bits + arg_bits = argBits arg_reps + size = length arg_bits + +argBits :: [LRep] -> [Bool] -- True for non-ptr, False for ptr +argBits [] = [] +argBits (P : args) = False : argBits args +argBits (arg : args) = take (lRepSizeW arg) (repeat True) ++ argBits args + +---------------------- +stdPattern :: [LRep] -> Maybe StgHalfWord +stdPattern reps + = case reps of + [] -> Just ARG_NONE -- just void args, probably + [N] -> Just ARG_N + [P] -> Just ARG_N + [F] -> Just ARG_F + [D] -> Just ARG_D + [L] -> Just ARG_L + + [N,N] -> Just ARG_NN + [N,P] -> Just ARG_NP + [P,N] -> Just ARG_PN + [P,P] -> Just ARG_PP + + [N,N,N] -> Just ARG_NNN + [N,N,P] -> Just ARG_NNP + [N,P,N] -> Just ARG_NPN + [N,P,P] -> Just ARG_NPP + [P,N,N] -> Just ARG_PNN + [P,N,P] -> Just ARG_PNP + [P,P,N] -> Just ARG_PPN + [P,P,P] -> Just ARG_PPP + + [P,P,P,P] -> Just ARG_PPPP + [P,P,P,P,P] -> Just ARG_PPPPP + [P,P,P,P,P,P] -> Just ARG_PPPPPP + + _ -> Nothing + +------------------------------------------------------------------------- +-- +-- Liveness info +-- +------------------------------------------------------------------------- + +-- TODO: This along with 'mkArgDescr' should be unified +-- with 'CmmInfo.mkLiveness'. However that would require +-- potentially invasive changes to the 'ClosureInfo' type. +-- For now, 'CmmInfo.mkLiveness' handles only continuations and +-- this one handles liveness everything else. Another distinction +-- between these two is that 'CmmInfo.mkLiveness' information +-- about the stack layout, and this one is information about +-- the heap layout of PAPs. +mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness +mkLiveness name size bits + | size > mAX_SMALL_BITMAP_SIZE -- Bitmap does not fit in one word + = do { let lbl = mkBitmapLabel (getUnique name) + ; emitRODataLits lbl ( mkWordCLit (fromIntegral size) + : map mkWordCLit bits) + ; return (BigLiveness lbl) } + + | otherwise -- Bitmap fits in one word + = let + small_bits = case bits of + [] -> 0 + [b] -> fromIntegral b + _ -> panic "livenessToAddrMode" + in + return (smallLiveness size small_bits) + +smallLiveness :: Int -> StgWord -> Liveness +smallLiveness size small_bits = SmallLiveness bits + where bits = fromIntegral size .|. (small_bits `shiftL` bITMAP_BITS_SHIFT) + +------------------- +-- isBigLiveness :: Liveness -> Bool +-- isBigLiveness (BigLiveness _) = True +-- isBigLiveness (SmallLiveness _) = False + +------------------- +-- mkLivenessCLit :: Liveness -> CmmLit +-- mkLivenessCLit (BigLiveness lbl) = CmmLabel lbl +-- mkLivenessCLit (SmallLiveness bits) = mkWordCLit bits + + +------------------------------------------------------------------------- +-- +-- Bitmap describing register liveness +-- across GC when doing a "generic" heap check +-- (a RET_DYN stack frame). +-- +-- NB. Must agree with these macros (currently in StgMacros.h): +-- GET_NON_PTRS(), GET_PTRS(), GET_LIVENESS(). +------------------------------------------------------------------------- + +{- Not used in new code gen +mkRegLiveness :: [(Id, GlobalReg)] -> Int -> Int -> StgWord +mkRegLiveness regs ptrs nptrs + = (fromIntegral nptrs `shiftL` 16) .|. + (fromIntegral ptrs `shiftL` 24) .|. + all_non_ptrs `xor` reg_bits regs + where + all_non_ptrs = 0xff + + reg_bits [] = 0 + reg_bits ((id, VanillaReg i) : regs) | isGcPtrRep (idPrimRep id) + = (1 `shiftL` (i - 1)) .|. reg_bits regs + reg_bits (_ : regs) + = reg_bits regs +-} + +------------------------------------------------------------------------- +-- +-- Generating the info table and code for a closure +-- +------------------------------------------------------------------------- + +-- Here we make an info table of type 'CmmInfo'. The concrete +-- representation as a list of 'CmmAddr' is handled later +-- in the pipeline by 'cmmToRawCmm'. + +emitClosureCodeAndInfoTable :: ClosureInfo -> CmmFormals + -> CmmAGraph -> FCode () +emitClosureCodeAndInfoTable cl_info args body + = do { info <- mkCmmInfo cl_info + ; emitProc info (infoLblToEntryLbl info_lbl) args body } + where + info_lbl = infoTableLabelFromCI cl_info + +-- Convert from 'ClosureInfo' to 'CmmInfo'. +-- Not used for return points. (The 'smRepClosureTypeInt' call would panic.) +mkCmmInfo :: ClosureInfo -> FCode CmmInfo +mkCmmInfo cl_info + = do { prof <- if opt_SccProfilingOn then + do fd_lit <- mkStringCLit (closureTypeDescr cl_info) + ad_lit <- mkStringCLit (closureValDescr cl_info) + return $ ProfilingInfo fd_lit ad_lit + else return $ ProfilingInfo (mkIntCLit 0) (mkIntCLit 0) + ; return (CmmInfo gc_target Nothing (CmmInfoTable prof cl_type info)) } + where + info = closureTypeInfo cl_info + cl_type = smRepClosureTypeInt (closureSMRep cl_info) + + -- The gc_target is to inform the CPS pass when it inserts a stack check. + -- Since that pass isn't used yet we'll punt for now. + -- When the CPS pass is fully integrated, this should + -- be replaced by the label that any heap check jumped to, + -- so that branch can be shared by both the heap (from codeGen) + -- and stack checks (from the CPS pass). + -- JD: Actually, we've decided to go a different route here: + -- the code generator is now responsible for producing the + -- stack limit check explicitly, so this field is now obsolete. + gc_target = Nothing + +----------------------------------------------------------------------------- +-- +-- Info table offsets +-- +----------------------------------------------------------------------------- + +stdInfoTableSizeW :: WordOff +-- The size of a standard info table varies with profiling/ticky etc, +-- so we can't get it from Constants +-- It must vary in sync with mkStdInfoTable +stdInfoTableSizeW + = size_fixed + size_prof + where + size_fixed = 2 -- layout, type + size_prof | opt_SccProfilingOn = 2 + | otherwise = 0 + +stdInfoTableSizeB :: ByteOff +stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE :: ByteOff + +stdSrtBitmapOffset :: ByteOff +-- Byte offset of the SRT bitmap half-word which is +-- in the *higher-addressed* part of the type_lit +stdSrtBitmapOffset = stdInfoTableSizeB - hALF_WORD_SIZE + +stdClosureTypeOffset :: ByteOff +-- Byte offset of the closure type half-word +stdClosureTypeOffset = stdInfoTableSizeB - wORD_SIZE + +stdPtrsOffset, stdNonPtrsOffset :: ByteOff +stdPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE +stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE + +------------------------------------------------------------------------- +-- +-- Accessing fields of an info table +-- +------------------------------------------------------------------------- + +closureInfoPtr :: CmmExpr -> CmmExpr +-- Takes a closure pointer and returns the info table pointer +closureInfoPtr e = CmmLoad e bWord + +entryCode :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns its entry code +entryCode e | tablesNextToCode = e + | otherwise = CmmLoad e bWord + +getConstrTag :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the *zero-indexed* +-- constructor tag obtained from the info table +-- This lives in the SRT field of the info table +-- (constructors don't need SRTs). +getConstrTag closure_ptr + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableConstrTag info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + +cmmGetClosureType :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the closure type +-- obtained from the info table +cmmGetClosureType closure_ptr + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableClosureType info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + +infoTable :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns a pointer to the first word of the standard-form +-- info table, excluding the entry-code word (if present) +infoTable info_ptr + | tablesNextToCode = cmmOffsetB info_ptr (- stdInfoTableSizeB) + | otherwise = cmmOffsetW info_ptr 1 -- Past the entry code pointer + +infoTableConstrTag :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the constr tag +-- field of the info table (same as the srt_bitmap field) +infoTableConstrTag = infoTableSrtBitmap + +infoTableSrtBitmap :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the srt_bitmap +-- field of the info table +infoTableSrtBitmap info_tbl + = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) bHalfWord + +infoTableClosureType :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the closure type +-- field of the info table. +infoTableClosureType info_tbl + = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) bHalfWord + +infoTablePtrs :: CmmExpr -> CmmExpr +infoTablePtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) bHalfWord + +infoTableNonPtrs :: CmmExpr -> CmmExpr +infoTableNonPtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) bHalfWord + +funInfoTable :: CmmExpr -> CmmExpr +-- Takes the info pointer of a function, +-- and returns a pointer to the first word of the StgFunInfoExtra struct +-- in the info table. +funInfoTable info_ptr + | tablesNextToCode + = cmmOffsetB info_ptr (- stdInfoTableSizeB - sIZEOF_StgFunInfoExtraRev) + | otherwise + = cmmOffsetW info_ptr (1 + stdInfoTableSizeW) + -- Past the entry code pointer + +------------------------------------------------------------------------- +-- +-- Static reference tables +-- +------------------------------------------------------------------------- + +-- srtLabelAndLength :: C_SRT -> CLabel -> (CmmLit, StgHalfWord) +-- srtLabelAndLength NoC_SRT _ +-- = (zeroCLit, 0) +-- srtLabelAndLength (C_SRT lbl off bitmap) info_lbl +-- = (makeRelativeRefTo info_lbl $ cmmLabelOffW lbl off, bitmap) + +------------------------------------------------------------------------- +-- +-- Position independent code +-- +------------------------------------------------------------------------- +-- In order to support position independent code, we mustn't put absolute +-- references into read-only space. Info tables in the tablesNextToCode +-- case must be in .text, which is read-only, so we doctor the CmmLits +-- to use relative offsets instead. + +-- Note that this is done even when the -fPIC flag is not specified, +-- as we want to keep binary compatibility between PIC and non-PIC. + +makeRelativeRefTo :: CLabel -> CmmLit -> CmmLit + +makeRelativeRefTo info_lbl (CmmLabel lbl) + | tablesNextToCode + = CmmLabelDiffOff lbl info_lbl 0 +makeRelativeRefTo info_lbl (CmmLabelOff lbl off) + | tablesNextToCode + = CmmLabelDiffOff lbl info_lbl off +makeRelativeRefTo _ lit = lit diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs new file mode 100644 index 0000000..3652639 --- /dev/null +++ b/compiler/codeGen/StgCmmMonad.hs @@ -0,0 +1,601 @@ +----------------------------------------------------------------------------- +-- +-- Monad for Stg to C-- code generation +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmMonad ( + FCode, -- type + + initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, + returnFC, fixC, nopC, whenC, + newUnique, newUniqSupply, + + emit, emitData, emitProc, emitSimpleProc, + + getCmm, cgStmtsToBlocks, + getCodeR, getCode, getHeapUsage, + + forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly, + + ConTagZ, + + Sequel(..), + withSequel, getSequel, + + setSRTLabel, getSRTLabel, + setTickyCtrLabel, getTickyCtrLabel, + + HeapUsage(..), VirtualHpOffset, initHpUsage, + getHpUsage, setHpUsage, heapHWM, + setVirtHp, getVirtHp, setRealHp, + + getModuleName, + + -- ideally we wouldn't export these, but some other modules access internal state + getState, setState, getInfoDown, getDynFlags, getThisPackage, + + -- more localised access to monad state + CgIdInfo(..), CgLoc(..), + getBinds, setBinds, getStaticBinds, + + -- out of general friendliness, we also export ... + CgInfoDownwards(..), CgState(..) -- non-abstract + ) where + +#include "HsVersions.h" + +import StgCmmClosure +import DynFlags +import MkZipCfgCmm +import BlockId +import Cmm +import CLabel +import TyCon ( PrimRep ) +import SMRep +import Module +import Id +import VarEnv +import OrdList +import Unique +import Util() +import UniqSupply +import FastString(sLit) +import Outputable + +import Control.Monad +import Data.List +import Prelude hiding( sequence ) +import qualified Prelude( sequence ) + +infixr 9 `thenC` -- Right-associative! +infixr 9 `thenFC` + + +-------------------------------------------------------- +-- The FCode monad and its types +-------------------------------------------------------- + +newtype FCode a = FCode (CgInfoDownwards -> CgState -> (a, CgState)) + +instance Monad FCode where + (>>=) = thenFC + return = returnFC + +{-# INLINE thenC #-} +{-# INLINE thenFC #-} +{-# INLINE returnFC #-} + +initC :: DynFlags -> Module -> FCode a -> IO a +initC dflags mod (FCode code) + = do { uniqs <- mkSplitUniqSupply 'c' + ; case code (initCgInfoDown dflags mod) (initCgState uniqs) of + (res, _) -> return res + } + +returnFC :: a -> FCode a +returnFC val = FCode (\_info_down state -> (val, state)) + +thenC :: FCode () -> FCode a -> FCode a +thenC (FCode m) (FCode k) = + FCode (\info_down state -> let (_,new_state) = m info_down state in + k info_down new_state) + +nopC :: FCode () +nopC = return () + +whenC :: Bool -> FCode () -> FCode () +whenC True code = code +whenC False _code = nopC + +listCs :: [FCode ()] -> FCode () +listCs [] = return () +listCs (fc:fcs) = do + fc + listCs fcs + +mapCs :: (a -> FCode ()) -> [a] -> FCode () +mapCs = mapM_ + +thenFC :: FCode a -> (a -> FCode c) -> FCode c +thenFC (FCode m) k = FCode ( + \info_down state -> + let + (m_result, new_state) = m info_down state + (FCode kcode) = k m_result + in + kcode info_down new_state + ) + +listFCs :: [FCode a] -> FCode [a] +listFCs = Prelude.sequence + +mapFCs :: (a -> FCode b) -> [a] -> FCode [b] +mapFCs = mapM + +fixC :: (a -> FCode a) -> FCode a +fixC fcode = FCode ( + \info_down state -> + let + FCode fc = fcode v + result@(v,_) = fc info_down state + -- ^--------^ + in + result + ) + + +-------------------------------------------------------- +-- The code generator environment +-------------------------------------------------------- + +-- This monadery has some information that it only passes +-- *downwards*, as well as some ``state'' which is modified +-- as we go along. + +data CgInfoDownwards -- information only passed *downwards* by the monad + = MkCgInfoDown { + cgd_dflags :: DynFlags, + cgd_mod :: Module, -- Module being compiled + cgd_statics :: CgBindings, -- [Id -> info] : static environment + cgd_srt_lbl :: CLabel, -- Label of the current top-level SRT + cgd_ticky :: CLabel, -- Current destination for ticky counts + cgd_sequel :: Sequel -- What to do at end of basic block + } + +type CgBindings = IdEnv CgIdInfo + +data CgIdInfo + = CgIdInfo + { cg_id :: Id -- Id that this is the info for + -- Can differ from the Id at occurrence sites by + -- virtue of being externalised, for splittable C + , cg_lf :: LambdaFormInfo + , cg_loc :: CgLoc + , cg_rep :: PrimRep -- Cache for (idPrimRep id) + , cg_tag :: {-# UNPACK #-} !DynTag -- Cache for (lfDynTag cg_lf) + } + +data CgLoc + = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning + -- Hp, so that it remains valid across calls + + | LneLoc BlockId [LocalReg] -- A join point + -- A join point (= let-no-escape) should only + -- be tail-called, and in a saturated way. + -- To tail-call it, assign to these locals, + -- and branch to the block id + +instance Outputable CgIdInfo where + ppr (CgIdInfo { cg_id = id, cg_loc = loc }) + = ppr id <+> ptext (sLit "-->") <+> ppr loc + +instance Outputable CgLoc where + ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e + ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs + + +-- Sequel tells what to do with the result of this expression +data Sequel + = Return Bool -- Return result(s) to continuation found on the stack + -- True <=> the continuation is update code (???) + + | AssignTo + [LocalReg] -- Put result(s) in these regs and fall through + -- NB: no void arguments here + C_SRT -- Here are the statics live in the continuation + + + +initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards +initCgInfoDown dflags mod + = MkCgInfoDown { cgd_dflags = dflags, + cgd_mod = mod, + cgd_statics = emptyVarEnv, + cgd_srt_lbl = error "initC: srt_lbl", + cgd_ticky = mkTopTickyCtrLabel, + cgd_sequel = initSequel } + +initSequel :: Sequel +initSequel = Return False + + +-------------------------------------------------------- +-- The code generator state +-------------------------------------------------------- + +data CgState + = MkCgState { + cgs_stmts :: CmmAGraph, -- Current procedure + + cgs_tops :: OrdList CmmTopZ, + -- Other procedures and data blocks in this compilation unit + -- Both are ordered only so that we can + -- reduce forward references, when it's easy to do so + + cgs_binds :: CgBindings, -- [Id -> info] : *local* bindings environment + -- Bindings for top-level things are given in + -- the info-down part + + cgs_hp_usg :: HeapUsage, + + cgs_uniqs :: UniqSupply } + +data HeapUsage = + HeapUsage { + virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word + realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr + } + +type VirtualHpOffset = WordOff + +initCgState :: UniqSupply -> CgState +initCgState uniqs + = MkCgState { cgs_stmts = mkNop, cgs_tops = nilOL, + cgs_binds = emptyVarEnv, + cgs_hp_usg = initHpUsage, + cgs_uniqs = uniqs } + +stateIncUsage :: CgState -> CgState -> CgState +-- stateIncUsage@ e1 e2 incorporates in e1 +-- the heap high water mark found in e2. +stateIncUsage s1 s2@(MkCgState { cgs_hp_usg = hp_usg }) + = s1 { cgs_hp_usg = cgs_hp_usg s1 `maxHpHw` virtHp hp_usg } + `addCodeBlocksFrom` s2 + +addCodeBlocksFrom :: CgState -> CgState -> CgState +-- Add code blocks from the latter to the former +-- (The cgs_stmts will often be empty, but not always; see codeOnly) +s1 `addCodeBlocksFrom` s2 + = s1 { cgs_stmts = cgs_stmts s1 <*> cgs_stmts s2, + cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 } + + +-- The heap high water mark is the larger of virtHp and hwHp. The latter is +-- only records the high water marks of forked-off branches, so to find the +-- heap high water mark you have to take the max of virtHp and hwHp. Remember, +-- virtHp never retreats! +-- +-- Note Jan 04: ok, so why do we only look at the virtual Hp?? + +heapHWM :: HeapUsage -> VirtualHpOffset +heapHWM = virtHp + +initHpUsage :: HeapUsage +initHpUsage = HeapUsage { virtHp = 0, realHp = 0 } + +maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage +hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } + + +-------------------------------------------------------- +-- Operators for getting and setting the state and "info_down". +-------------------------------------------------------- + +getState :: FCode CgState +getState = FCode $ \_info_down state -> (state,state) + +setState :: CgState -> FCode () +setState state = FCode $ \_info_down _ -> ((),state) + +getHpUsage :: FCode HeapUsage +getHpUsage = do + state <- getState + return $ cgs_hp_usg state + +setHpUsage :: HeapUsage -> FCode () +setHpUsage new_hp_usg = do + state <- getState + setState $ state {cgs_hp_usg = new_hp_usg} + +setVirtHp :: VirtualHpOffset -> FCode () +setVirtHp new_virtHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {virtHp = new_virtHp}) } + +getVirtHp :: FCode VirtualHpOffset +getVirtHp + = do { hp_usage <- getHpUsage + ; return (virtHp hp_usage) } + +setRealHp :: VirtualHpOffset -> FCode () +setRealHp new_realHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {realHp = new_realHp}) } + +getBinds :: FCode CgBindings +getBinds = do + state <- getState + return $ cgs_binds state + +setBinds :: CgBindings -> FCode () +setBinds new_binds = do + state <- getState + setState $ state {cgs_binds = new_binds} + +getStaticBinds :: FCode CgBindings +getStaticBinds = do + info <- getInfoDown + return (cgd_statics info) + +withState :: FCode a -> CgState -> FCode (a,CgState) +withState (FCode fcode) newstate = FCode $ \info_down state -> + let (retval, state2) = fcode info_down newstate in ((retval,state2), state) + +newUniqSupply :: FCode UniqSupply +newUniqSupply = do + state <- getState + let (us1, us2) = splitUniqSupply (cgs_uniqs state) + setState $ state { cgs_uniqs = us1 } + return us2 + +newUnique :: FCode Unique +newUnique = do + us <- newUniqSupply + return (uniqFromSupply us) + +------------------ +getInfoDown :: FCode CgInfoDownwards +getInfoDown = FCode $ \info_down state -> (info_down,state) + +getDynFlags :: FCode DynFlags +getDynFlags = liftM cgd_dflags getInfoDown + +getThisPackage :: FCode PackageId +getThisPackage = liftM thisPackage getDynFlags + +withInfoDown :: FCode a -> CgInfoDownwards -> FCode a +withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state + +doFCode :: FCode a -> CgInfoDownwards -> CgState -> (a,CgState) +doFCode (FCode fcode) info_down state = fcode info_down state + + +-- ---------------------------------------------------------------------------- +-- Get the current module name + +getModuleName :: FCode Module +getModuleName = do { info <- getInfoDown; return (cgd_mod info) } + +-- ---------------------------------------------------------------------------- +-- Get/set the end-of-block info + +withSequel :: Sequel -> FCode () -> FCode () +withSequel sequel code + = do { info <- getInfoDown + ; withInfoDown code (info {cgd_sequel = sequel }) } + +getSequel :: FCode Sequel +getSequel = do { info <- getInfoDown + ; return (cgd_sequel info) } + +-- ---------------------------------------------------------------------------- +-- Get/set the current SRT label + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTLabel :: FCode CLabel -- Used only by cgPanic +getSRTLabel = do info <- getInfoDown + return (cgd_srt_lbl info) + +setSRTLabel :: CLabel -> FCode a -> FCode a +setSRTLabel srt_lbl code + = do info <- getInfoDown + withInfoDown code (info { cgd_srt_lbl = srt_lbl}) + +-- ---------------------------------------------------------------------------- +-- Get/set the current ticky counter label + +getTickyCtrLabel :: FCode CLabel +getTickyCtrLabel = do + info <- getInfoDown + return (cgd_ticky info) + +setTickyCtrLabel :: CLabel -> FCode () -> FCode () +setTickyCtrLabel ticky code = do + info <- getInfoDown + withInfoDown code (info {cgd_ticky = ticky}) + + +-------------------------------------------------------- +-- Forking +-------------------------------------------------------- + +forkClosureBody :: FCode () -> FCode () +-- forkClosureBody takes a code, $c$, and compiles it in a +-- fresh environment, except that: +-- - compilation info and statics are passed in unchanged. +-- - local bindings are passed in unchanged +-- (it's up to the enclosed code to re-bind the +-- free variables to a field of the closure) +-- +-- The current state is passed on completely unaltered, except that +-- C-- from the fork is incorporated. + +forkClosureBody body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let body_info_down = info { cgd_sequel = initSequel } + fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } + ((),fork_state_out) + = doFCode body_code body_info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out } + +forkStatics :: FCode a -> FCode a +-- @forkStatics@ $fc$ compiles $fc$ in an environment whose *statics* come +-- from the current *local bindings*, but which is otherwise freshly initialised. +-- The Abstract~C returned is attached to the current state, but the +-- bindings and usage information is otherwise unchanged. +forkStatics body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let rhs_info_down = info { cgd_statics = cgs_binds state, + cgd_sequel = initSequel } + (result, fork_state_out) = doFCode body_code rhs_info_down + (initCgState us) + ; setState (state `addCodeBlocksFrom` fork_state_out) + ; return result } + +forkProc :: FCode a -> FCode a +-- 'forkProc' takes a code and compiles it in the *current* environment, +-- returning the graph thus constructed. +-- +-- The current environment is passed on completely unchanged to +-- the successor. In particular, any heap usage from the enclosed +-- code is discarded; it should deal with its own heap consumption +forkProc body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) + { cgs_binds = cgs_binds state } + (result, fork_state_out) = doFCode body_code info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out + ; return result } + +codeOnly :: FCode () -> FCode () +-- Emit any code from the inner thing into the outer thing +-- Do not affect anything else in the outer state +-- Used in almost-circular code to prevent false loop dependencies +codeOnly body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state, + cgs_hp_usg = cgs_hp_usg state } + ((), fork_state_out) = doFCode body_code info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out } + +forkAlts :: [FCode a] -> FCode [a] +-- (forkAlts' bs d) takes fcodes 'bs' for the branches of a 'case', and +-- an fcode for the default case 'd', and compiles each in the current +-- environment. The current environment is passed on unmodified, except +-- that the virtual Hp is moved on to the worst virtual Hp for the branches + +forkAlts branch_fcodes + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let compile us branch + = (us2, doFCode branch info_down branch_state) + where + (us1,us2) = splitUniqSupply us + branch_state = (initCgState us1) { + cgs_binds = cgs_binds state, + cgs_hp_usg = cgs_hp_usg state } + + (_us, results) = mapAccumL compile us branch_fcodes + (branch_results, branch_out_states) = unzip results + ; setState $ foldl stateIncUsage state branch_out_states + -- NB foldl. state is the *left* argument to stateIncUsage + ; return branch_results } + +-- collect the code emitted by an FCode computation +getCodeR :: FCode a -> FCode (a, CmmAGraph) +getCodeR fcode + = do { state1 <- getState + ; (a, state2) <- withState fcode (state1 { cgs_stmts = mkNop }) + ; setState $ state2 { cgs_stmts = cgs_stmts state1 } + ; return (a, cgs_stmts state2) } + +getCode :: FCode a -> FCode CmmAGraph +getCode fcode = do { (_,stmts) <- getCodeR fcode; return stmts } + +-- 'getHeapUsage' applies a function to the amount of heap that it uses. +-- It initialises the heap usage to zeros, and passes on an unchanged +-- heap usage. +-- +-- It is usually a prelude to performing a GC check, so everything must +-- be in a tidy and consistent state. +-- +-- Note the slightly subtle fixed point behaviour needed here + +getHeapUsage :: (VirtualHpOffset -> FCode a) -> FCode a +getHeapUsage fcode + = do { info_down <- getInfoDown + ; state <- getState + ; let fstate_in = state { cgs_hp_usg = initHpUsage } + (r, fstate_out) = doFCode (fcode hp_hw) info_down fstate_in + hp_hw = heapHWM (cgs_hp_usg fstate_out) -- Loop here! + + ; setState $ fstate_out { cgs_hp_usg = cgs_hp_usg state } + ; return r } + +-- ---------------------------------------------------------------------------- +-- Combinators for emitting code + +emit :: CmmAGraph -> FCode () +emit ag + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state <*> ag } } + +emitData :: Section -> [CmmStatic] -> FCode () +emitData sect lits + = do { state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` data_block } } + where + data_block = CmmData sect lits + +emitProc :: CmmInfo -> CLabel -> CmmFormals -> CmmAGraph -> FCode () +emitProc info lbl args blocks + = do { us <- newUniqSupply + ; let (offset, entry) = mkEntry (mkBlockId $ uniqFromSupply us) Native args + blks = initUs_ us $ lgraphOfAGraph offset $ entry <*> blocks + -- ; blks <- cgStmtsToBlocks blocks + ; let proc_block = CmmProc info lbl args blks + ; state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } } + +emitSimpleProc :: CLabel -> CmmAGraph -> FCode () +-- Emit a procedure whose body is the specified code; no info table +emitSimpleProc lbl code + = emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] code + +getCmm :: FCode () -> FCode CmmZ +-- Get all the CmmTops (there should be no stmts) +-- Return a single Cmm which may be split from other Cmms by +-- object splitting (at a later stage) +getCmm code + = do { state1 <- getState + ; ((), state2) <- withState code (state1 { cgs_tops = nilOL }) + ; setState $ state2 { cgs_tops = cgs_tops state1 } + ; return (Cmm (fromOL (cgs_tops state2))) } + +-- ---------------------------------------------------------------------------- +-- CgStmts + +-- These functions deal in terms of CgStmts, which is an abstract type +-- representing the code in the current proc. + +-- turn CgStmts into [CmmBasicBlock], for making a new proc. +cgStmtsToBlocks :: CmmAGraph -> FCode CmmGraph +cgStmtsToBlocks stmts + = do { us <- newUniqSupply + ; return (initUs_ us (lgraphOfAGraph 0 stmts)) } + diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs new file mode 100644 index 0000000..96467fe --- /dev/null +++ b/compiler/codeGen/StgCmmPrim.hs @@ -0,0 +1,662 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C--: primitive operations +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmPrim ( + cgOpApp + ) where + +#include "HsVersions.h" + +import StgCmmLayout +import StgCmmForeign +import StgCmmEnv +import StgCmmMonad +import StgCmmUtils + +import MkZipCfgCmm +import StgSyn +import Cmm +import Type ( Type, tyConAppTyCon ) +import TyCon +import CLabel +import CmmUtils +import PrimOp +import SMRep +import Constants +import FastString +import Outputable + +------------------------------------------------------------------------ +-- Primitive operations and foreign calls +------------------------------------------------------------------------ + +{- Note [Foreign call results] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A foreign call always returns an unboxed tuple of results, one +of which is the state token. This seems to happen even for pure +calls. + +Even if we returned a single result for pure calls, it'd still be +right to wrap it in a singleton unboxed tuple, because the result +might be a Haskell closure pointer, we don't want to evaluate it. -} + +---------------------------------- +cgOpApp :: StgOp -- The op + -> [StgArg] -- Arguments + -> Type -- Result type (always an unboxed tuple) + -> FCode () + +-- Foreign calls +cgOpApp (StgFCallOp fcall _) stg_args res_ty + = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty + -- Choose result regs r1, r2 + -- Note [Foreign call results] + ; cgForeignCall res_regs res_hints fcall stg_args + -- r1, r2 = foo( x, y ) + ; emitReturn (map (CmmReg . CmmLocal) res_regs) } + -- return (r1, r2) + +-- tagToEnum# is special: we need to pull the constructor +-- out of the table, and perform an appropriate return. + +cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty + = ASSERT(isEnumerationTyCon tycon) + do { amode <- getArgAmode arg + ; emitReturn [tagToClosure tycon amode] } + where + -- If you're reading this code in the attempt to figure + -- out why the compiler panic'ed here, it is probably because + -- you used tagToEnum# in a non-monomorphic setting, e.g., + -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x# + -- That won't work. + tycon = tyConAppTyCon res_ty + +cgOpApp (StgPrimOp primop) args res_ty + | primOpOutOfLine primop + = do { cmm_args <- getNonVoidArgAmodes args + ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop)) + ; emitCall fun cmm_args } + + | ReturnsPrim VoidRep <- result_info + = do cgPrimOp [] primop args + emitReturn [] + + | ReturnsPrim rep <- result_info + = do res <- newTemp (primRepCmmType rep) + cgPrimOp [res] primop args + emitReturn [CmmReg (CmmLocal res)] + + | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon + = do (regs, _hints) <- newUnboxedTupleRegs res_ty + cgPrimOp regs primop args + emitReturn (map (CmmReg . CmmLocal) regs) + + | ReturnsAlg tycon <- result_info + , isEnumerationTyCon tycon + -- c.f. cgExpr (...TagToEnumOp...) + = do tag_reg <- newTemp bWord + cgPrimOp [tag_reg] primop args + emitReturn [tagToClosure tycon + (CmmReg (CmmLocal tag_reg))] + + | otherwise = panic "cgPrimop" + where + result_info = getPrimOpResultInfo primop + +--------------------------------------------------- +cgPrimOp :: [LocalReg] -- where to put the results + -> PrimOp -- the op + -> [StgArg] -- arguments + -> FCode () + +cgPrimOp results op args + = do arg_exprs <- getNonVoidArgAmodes args + emitPrimOp results op arg_exprs + + +------------------------------------------------------------------------ +-- Emitting code for a primop +------------------------------------------------------------------------ + +emitPrimOp :: [LocalReg] -- where to put the results + -> PrimOp -- the op + -> [CmmExpr] -- arguments + -> FCode () + +-- First we handle various awkward cases specially. The remaining +-- easy cases are then handled by translateOp, defined below. + +emitPrimOp [res_r,res_c] IntAddCOp [aa,bb] +{- + With some bit-twiddling, we can define int{Add,Sub}Czh portably in + C, and without needing any comparisons. This may not be the + fastest way to do it - if you have better code, please send it! --SDM + + Return : r = a + b, c = 0 if no overflow, 1 on overflow. + + We currently don't make use of the r value if c is != 0 (i.e. + overflow), we just convert to big integers and try again. This + could be improved by making r and c the correct values for + plugging into a new J#. + + { r = ((I_)(a)) + ((I_)(b)); \ + c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + Wading through the mass of bracketry, it seems to reduce to: + c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1) + +-} + = emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp mo_wordAdd [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordNot [CmmMachOp mo_wordXor [aa,bb]], + CmmMachOp mo_wordXor [aa, CmmReg (CmmLocal res_r)] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res_r,res_c] IntSubCOp [aa,bb] +{- Similarly: + #define subIntCzh(r,c,a,b) \ + { r = ((I_)(a)) - ((I_)(b)); \ + c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + + c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1) +-} + = emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp mo_wordSub [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordXor [aa,bb], + CmmMachOp mo_wordXor [aa, CmmReg (CmmLocal res_r)] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res] ParOp [arg] + = + -- for now, just implement this in a C function + -- later, we might want to inline it. + emitCCall + [(res,NoHint)] + (CmmLit (CmmLabel (mkRtsCodeLabel (sLit "newSpark")))) + [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)] + +emitPrimOp [res] ReadMutVarOp [mutv] + = emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) + +emitPrimOp [] WriteMutVarOp [mutv,var] + = do + emit (mkStore (cmmOffsetW mutv fixedHdrSize) var) + emitCCall + [{-no results-}] + (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) + [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)] + +-- #define sizzeofByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofByteArrayOp [arg] + = emit $ + mkAssign (CmmLocal res) (CmmMachOp mo_wordMul [ + cmmLoadIndexW arg fixedHdrSize bWord, + CmmLit (mkIntCLit wORD_SIZE) + ]) + +-- #define sizzeofMutableByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofMutableByteArrayOp [arg] + = emitPrimOp [res] SizeofByteArrayOp [arg] + + +-- #define touchzh(o) /* nothing */ +emitPrimOp [] TouchOp [_arg] + = nopC + +-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) +emitPrimOp [res] ByteArrayContents_Char [arg] + = emit (mkAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize)) + +-- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) +emitPrimOp [res] StableNameToIntOp [arg] + = emit (mkAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)) + +-- #define eqStableNamezh(r,sn1,sn2) \ +-- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) +emitPrimOp [res] EqStableNameOp [arg1,arg2] + = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [ + cmmLoadIndexW arg1 fixedHdrSize bWord, + cmmLoadIndexW arg2 fixedHdrSize bWord + ])) + + +emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] + = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2])) + +-- #define addrToHValuezh(r,a) r=(P_)a +emitPrimOp [res] AddrToHValueOp [arg] + = emit (mkAssign (CmmLocal res) arg) + +-- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) +-- Note: argument may be tagged! +emitPrimOp [res] DataToTagOp [arg] + = emit (mkAssign (CmmLocal res) (getConstrTag (cmmUntag arg))) + +{- Freezing arrays-of-ptrs requires changing an info table, for the + benefit of the generational collector. It needs to scavenge mutable + objects, even if they are in old space. When they become immutable, + they can be removed from this scavenge list. -} + +-- #define unsafeFreezzeArrayzh(r,a) +-- { +-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info); +-- r = a; +-- } +emitPrimOp [res] UnsafeFreezeArrayOp [arg] + = emit $ catAGraphs + [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)), + mkAssign (CmmLocal res) arg ] + +-- #define unsafeFreezzeByteArrayzh(r,a) r=(a) +emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] + = emit (mkAssign (CmmLocal res) arg) + +-- Reading/writing pointer arrays + +emitPrimOp [r] ReadArrayOp [obj,ix] = doReadPtrArrayOp r obj ix +emitPrimOp [r] IndexArrayOp [obj,ix] = doReadPtrArrayOp r obj ix +emitPrimOp [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v + +-- IndexXXXoffAddr + +emitPrimOp res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args + +-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. + +emitPrimOp res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args + +-- IndexXXXArray + +emitPrimOp res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args + +-- ReadXXXArray, identical to IndexXXXArray. + +emitPrimOp res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args + +-- WriteXXXoffAddr + +emitPrimOp res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just mo_WordTo8) res args +emitPrimOp res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just mo_WordTo32) res args +emitPrimOp res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just mo_WordTo8) res args +emitPrimOp res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just mo_WordTo16) res args +emitPrimOp res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just mo_WordTo32) res args +emitPrimOp res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just mo_WordTo8) res args +emitPrimOp res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just mo_WordTo16) res args +emitPrimOp res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just mo_WordTo32) res args +emitPrimOp res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing res args + +-- WriteXXXArray + +emitPrimOp res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just mo_WordTo8) res args +emitPrimOp res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just mo_WordTo32) res args +emitPrimOp res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just mo_WordTo8) res args +emitPrimOp res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just mo_WordTo16) res args +emitPrimOp res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just mo_WordTo32) res args +emitPrimOp res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just mo_WordTo8) res args +emitPrimOp res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just mo_WordTo16) res args +emitPrimOp res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just mo_WordTo32) res args +emitPrimOp res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing res args + + +-- The rest just translate straightforwardly +emitPrimOp [res] op [arg] + | nopOp op + = emit (mkAssign (CmmLocal res) arg) + + | Just (mop,rep) <- narrowOp op + = emit (mkAssign (CmmLocal res) $ + CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]) + +emitPrimOp [res] op args + | Just prim <- callishOp op + = do emitPrimCall res prim args + + | Just mop <- translateOp op + = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in + emit stmt + +emitPrimOp _ op _ + = pprPanic "emitPrimOp: can't translate PrimOp" (ppr op) + + +-- These PrimOps are NOPs in Cmm + +nopOp :: PrimOp -> Bool +nopOp Int2WordOp = True +nopOp Word2IntOp = True +nopOp Int2AddrOp = True +nopOp Addr2IntOp = True +nopOp ChrOp = True -- Int# and Char# are rep'd the same +nopOp OrdOp = True +nopOp _ = False + +-- These PrimOps turn into double casts + +narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width) +narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8) +narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16) +narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32) +narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8) +narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16) +narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32) +narrowOp _ = Nothing + +-- Native word signless ops + +translateOp :: PrimOp -> Maybe MachOp +translateOp IntAddOp = Just mo_wordAdd +translateOp IntSubOp = Just mo_wordSub +translateOp WordAddOp = Just mo_wordAdd +translateOp WordSubOp = Just mo_wordSub +translateOp AddrAddOp = Just mo_wordAdd +translateOp AddrSubOp = Just mo_wordSub + +translateOp IntEqOp = Just mo_wordEq +translateOp IntNeOp = Just mo_wordNe +translateOp WordEqOp = Just mo_wordEq +translateOp WordNeOp = Just mo_wordNe +translateOp AddrEqOp = Just mo_wordEq +translateOp AddrNeOp = Just mo_wordNe + +translateOp AndOp = Just mo_wordAnd +translateOp OrOp = Just mo_wordOr +translateOp XorOp = Just mo_wordXor +translateOp NotOp = Just mo_wordNot +translateOp SllOp = Just mo_wordShl +translateOp SrlOp = Just mo_wordUShr + +translateOp AddrRemOp = Just mo_wordURem + +-- Native word signed ops + +translateOp IntMulOp = Just mo_wordMul +translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordWidth) +translateOp IntQuotOp = Just mo_wordSQuot +translateOp IntRemOp = Just mo_wordSRem +translateOp IntNegOp = Just mo_wordSNeg + + +translateOp IntGeOp = Just mo_wordSGe +translateOp IntLeOp = Just mo_wordSLe +translateOp IntGtOp = Just mo_wordSGt +translateOp IntLtOp = Just mo_wordSLt + +translateOp ISllOp = Just mo_wordShl +translateOp ISraOp = Just mo_wordSShr +translateOp ISrlOp = Just mo_wordUShr + +-- Native word unsigned ops + +translateOp WordGeOp = Just mo_wordUGe +translateOp WordLeOp = Just mo_wordULe +translateOp WordGtOp = Just mo_wordUGt +translateOp WordLtOp = Just mo_wordULt + +translateOp WordMulOp = Just mo_wordMul +translateOp WordQuotOp = Just mo_wordUQuot +translateOp WordRemOp = Just mo_wordURem + +translateOp AddrGeOp = Just mo_wordUGe +translateOp AddrLeOp = Just mo_wordULe +translateOp AddrGtOp = Just mo_wordUGt +translateOp AddrLtOp = Just mo_wordULt + +-- Char# ops + +translateOp CharEqOp = Just (MO_Eq wordWidth) +translateOp CharNeOp = Just (MO_Ne wordWidth) +translateOp CharGeOp = Just (MO_U_Ge wordWidth) +translateOp CharLeOp = Just (MO_U_Le wordWidth) +translateOp CharGtOp = Just (MO_U_Gt wordWidth) +translateOp CharLtOp = Just (MO_U_Lt wordWidth) + +-- Double ops + +translateOp DoubleEqOp = Just (MO_F_Eq W64) +translateOp DoubleNeOp = Just (MO_F_Ne W64) +translateOp DoubleGeOp = Just (MO_F_Ge W64) +translateOp DoubleLeOp = Just (MO_F_Le W64) +translateOp DoubleGtOp = Just (MO_F_Gt W64) +translateOp DoubleLtOp = Just (MO_F_Lt W64) + +translateOp DoubleAddOp = Just (MO_F_Add W64) +translateOp DoubleSubOp = Just (MO_F_Sub W64) +translateOp DoubleMulOp = Just (MO_F_Mul W64) +translateOp DoubleDivOp = Just (MO_F_Quot W64) +translateOp DoubleNegOp = Just (MO_F_Neg W64) + +-- Float ops + +translateOp FloatEqOp = Just (MO_F_Eq W32) +translateOp FloatNeOp = Just (MO_F_Ne W32) +translateOp FloatGeOp = Just (MO_F_Ge W32) +translateOp FloatLeOp = Just (MO_F_Le W32) +translateOp FloatGtOp = Just (MO_F_Gt W32) +translateOp FloatLtOp = Just (MO_F_Lt W32) + +translateOp FloatAddOp = Just (MO_F_Add W32) +translateOp FloatSubOp = Just (MO_F_Sub W32) +translateOp FloatMulOp = Just (MO_F_Mul W32) +translateOp FloatDivOp = Just (MO_F_Quot W32) +translateOp FloatNegOp = Just (MO_F_Neg W32) + +-- Conversions + +translateOp Int2DoubleOp = Just (MO_SF_Conv wordWidth W64) +translateOp Double2IntOp = Just (MO_FS_Conv W64 wordWidth) + +translateOp Int2FloatOp = Just (MO_SF_Conv wordWidth W32) +translateOp Float2IntOp = Just (MO_FS_Conv W32 wordWidth) + +translateOp Float2DoubleOp = Just (MO_FF_Conv W32 W64) +translateOp Double2FloatOp = Just (MO_FF_Conv W64 W32) + +-- Word comparisons masquerading as more exotic things. + +translateOp SameMutVarOp = Just mo_wordEq +translateOp SameMVarOp = Just mo_wordEq +translateOp SameMutableArrayOp = Just mo_wordEq +translateOp SameMutableByteArrayOp = Just mo_wordEq +translateOp SameTVarOp = Just mo_wordEq +translateOp EqStablePtrOp = Just mo_wordEq + +translateOp _ = Nothing + +-- These primops are implemented by CallishMachOps, because they sometimes +-- turn into foreign calls depending on the backend. + +callishOp :: PrimOp -> Maybe CallishMachOp +callishOp DoublePowerOp = Just MO_F64_Pwr +callishOp DoubleSinOp = Just MO_F64_Sin +callishOp DoubleCosOp = Just MO_F64_Cos +callishOp DoubleTanOp = Just MO_F64_Tan +callishOp DoubleSinhOp = Just MO_F64_Sinh +callishOp DoubleCoshOp = Just MO_F64_Cosh +callishOp DoubleTanhOp = Just MO_F64_Tanh +callishOp DoubleAsinOp = Just MO_F64_Asin +callishOp DoubleAcosOp = Just MO_F64_Acos +callishOp DoubleAtanOp = Just MO_F64_Atan +callishOp DoubleLogOp = Just MO_F64_Log +callishOp DoubleExpOp = Just MO_F64_Exp +callishOp DoubleSqrtOp = Just MO_F64_Sqrt + +callishOp FloatPowerOp = Just MO_F32_Pwr +callishOp FloatSinOp = Just MO_F32_Sin +callishOp FloatCosOp = Just MO_F32_Cos +callishOp FloatTanOp = Just MO_F32_Tan +callishOp FloatSinhOp = Just MO_F32_Sinh +callishOp FloatCoshOp = Just MO_F32_Cosh +callishOp FloatTanhOp = Just MO_F32_Tanh +callishOp FloatAsinOp = Just MO_F32_Asin +callishOp FloatAcosOp = Just MO_F32_Acos +callishOp FloatAtanOp = Just MO_F32_Atan +callishOp FloatLogOp = Just MO_F32_Log +callishOp FloatExpOp = Just MO_F32_Exp +callishOp FloatSqrtOp = Just MO_F32_Sqrt + +callishOp _ = Nothing + +------------------------------------------------------------------------------ +-- Helpers for translating various minor variants of array indexing. + +doIndexOffAddrOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode () +doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx +doIndexOffAddrOp _ _ _ _ + = panic "CgPrimOp: doIndexOffAddrOp" + +doIndexByteArrayOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode () +doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead arrWordsHdrSize maybe_post_read_cast rep res addr idx +doIndexByteArrayOp _ _ _ _ + = panic "CgPrimOp: doIndexByteArrayOp" + +doReadPtrArrayOp :: LocalReg -> CmmExpr -> CmmExpr -> FCode () +doReadPtrArrayOp res addr idx + = mkBasicIndexedRead arrPtrsHdrSize Nothing gcWord res addr idx + + +doWriteOffAddrOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode () +doWriteOffAddrOp maybe_pre_write_cast [] [addr,idx,val] + = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx val +doWriteOffAddrOp _ _ _ + = panic "CgPrimOp: doWriteOffAddrOp" + +doWriteByteArrayOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode () +doWriteByteArrayOp maybe_pre_write_cast [] [addr,idx,val] + = mkBasicIndexedWrite arrWordsHdrSize maybe_pre_write_cast addr idx val +doWriteByteArrayOp _ _ _ + = panic "CgPrimOp: doWriteByteArrayOp" + +doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +doWritePtrArrayOp addr idx val + = do emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) + mkBasicIndexedWrite arrPtrsHdrSize Nothing addr idx val + +mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType + -> LocalReg -> CmmExpr -> CmmExpr -> FCode () +mkBasicIndexedRead off Nothing read_rep res base idx + = emit (mkAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)) +mkBasicIndexedRead off (Just cast) read_rep res base idx + = emit (mkAssign (CmmLocal res) (CmmMachOp cast [ + cmmLoadIndexOffExpr off read_rep base idx])) + +mkBasicIndexedWrite :: ByteOff -> Maybe MachOp + -> CmmExpr -> CmmExpr -> CmmExpr -> FCode () +mkBasicIndexedWrite off Nothing base idx val + = emit (mkStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val) +mkBasicIndexedWrite off (Just cast) base idx val + = mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val]) + +-- ---------------------------------------------------------------------------- +-- Misc utils + +cmmIndexOffExpr :: ByteOff -> Width -> CmmExpr -> CmmExpr -> CmmExpr +cmmIndexOffExpr off width base idx + = cmmIndexExpr width (cmmOffsetB base off) idx + +cmmLoadIndexOffExpr :: ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr +cmmLoadIndexOffExpr off ty base idx + = CmmLoad (cmmIndexOffExpr off (typeWidth ty) base idx) ty + +setInfo :: CmmExpr -> CmmExpr -> CmmAGraph +setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr + diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs new file mode 100644 index 0000000..f442295 --- /dev/null +++ b/compiler/codeGen/StgCmmProf.hs @@ -0,0 +1,553 @@ +{-# OPTIONS -w #-} +-- Lots of missing type sigs etc + +----------------------------------------------------------------------------- +-- +-- Code generation for profiling +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmProf ( + initCostCentres, ccType, ccsType, + mkCCostCentre, mkCCostCentreStack, + + -- Cost-centre Profiling + dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, + enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, + chooseDynCostCentres, + costCentreFrom, + curCCS, curCCSAddr, + emitSetCCC, emitCCS, + + saveCurrentCostCentre, restoreCurrentCostCentre, + + -- Lag/drag/void stuff + ldvEnter, ldvEnterClosure, ldvRecordCreate + ) where + +#include "HsVersions.h" +#include "MachDeps.h" + -- For WORD_SIZE_IN_BITS only. +#include "../includes/Constants.h" + -- For LDV_CREATE_MASK, LDV_STATE_USE + -- which are StgWords +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import StgCmmClosure +import StgCmmUtils +import StgCmmMonad +import SMRep + +import MkZipCfgCmm +import Cmm +import TyCon ( PrimRep(..) ) +import CmmUtils +import CLabel + +import Id +import qualified Module +import CostCentre +import StgSyn +import StaticFlags +import FastString +import Constants -- Lots of field offsets +import Outputable + +import Data.Maybe +import Data.Char +import Control.Monad + +----------------------------------------------------------------------------- +-- +-- Cost-centre-stack Profiling +-- +----------------------------------------------------------------------------- + +-- Expression representing the current cost centre stack +ccsType :: CmmType -- Type of a cost-centre stack +ccsType = bWord + +ccType :: CmmType -- Type of a cost centre +ccType = bWord + +curCCS :: CmmExpr +curCCS = CmmLoad curCCSAddr ccsType + +-- Address of current CCS variable, for storing into +curCCSAddr :: CmmExpr +curCCSAddr = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCCS"))) + +mkCCostCentre :: CostCentre -> CmmLit +mkCCostCentre cc = CmmLabel (mkCCLabel cc) + +mkCCostCentreStack :: CostCentreStack -> CmmLit +mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) + +costCentreFrom :: CmmExpr -- A closure pointer + -> CmmExpr -- The cost centre from that closure +costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) ccsType + +staticProfHdr :: CostCentreStack -> [CmmLit] +-- The profiling header words in a static closure +-- Was SET_STATIC_PROF_HDR +staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs, + staticLdvInit] + +dynProfHdr :: CmmExpr -> [CmmExpr] +-- Profiling header words in a dynamic closure +dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit] + +initUpdFrameProf :: CmmExpr -> FCode () +-- Initialise the profiling field of an update frame +initUpdFrameProf frame_amode + = ifProfiling $ -- frame->header.prof.ccs = CCCS + emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS) + -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) + -- is unnecessary because it is not used anyhow. + +--------------------------------------------------------------------------- +-- Saving and restoring the current cost centre +--------------------------------------------------------------------------- + +{- Note [Saving the current cost centre] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The current cost centre is like a global register. Like other +global registers, it's a caller-saves one. But consider + case (f x) of (p,q) -> rhs +Since 'f' may set the cost centre, we must restore it +before resuming rhs. So we want code like this: + local_cc = CCC -- save + r = f( x ) + CCC = local_cc -- restore +That is, we explicitly "save" the current cost centre in +a LocalReg, local_cc; and restore it after the call. The +C-- infrastructure will arrange to save local_cc across the +call. + +The same goes for join points; + let j x = join-stuff + in blah-blah +We want this kind of code: + local_cc = CCC -- save + blah-blah + J: + CCC = local_cc -- restore +-} + +saveCurrentCostCentre :: FCode (Maybe LocalReg) + -- Returns Nothing if profiling is off +saveCurrentCostCentre + | not opt_SccProfilingOn + = return Nothing + | otherwise + = do { local_cc <- newTemp ccType + ; emit (mkAssign (CmmLocal local_cc) curCCS) + ; return (Just local_cc) } + +restoreCurrentCostCentre :: Maybe LocalReg -> FCode () +restoreCurrentCostCentre Nothing + = return () +restoreCurrentCostCentre (Just local_cc) + = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc))) + + +------------------------------------------------------------------------------- +-- Recording allocation in a cost centre +------------------------------------------------------------------------------- + +-- | Record the allocation of a closure. The CmmExpr is the cost +-- centre stack to which to attribute the allocation. +profDynAlloc :: ClosureInfo -> CmmExpr -> FCode () +profDynAlloc cl_info ccs + = ifProfiling $ + profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs + +-- | Record the allocation of a closure (size is given by a CmmExpr) +-- The size must be in words, because the allocation counter in a CCS counts +-- in words. +profAlloc :: CmmExpr -> CmmExpr -> FCode () +profAlloc words ccs + = ifProfiling $ + emit (addToMemE alloc_rep + (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc) + (CmmMachOp (MO_UU_Conv wordWidth (typeWidth alloc_rep)) $ + [CmmMachOp mo_wordSub [words, + CmmLit (mkIntCLit profHdrSize)]])) + -- subtract the "profiling overhead", which is the + -- profiling header in a closure. + where + alloc_rep = REP_CostCentreStack_mem_alloc + +-- ---------------------------------------------------------------------- +-- Setting the cost centre in a new closure + +chooseDynCostCentres :: CostCentreStack + -> [Id] -- Args + -> StgExpr -- Body + -> FCode (CmmExpr, CmmExpr) +-- Called when alllcating a closure +-- Tells which cost centre to put in the object, and which +-- to blame the cost of allocation on +chooseDynCostCentres ccs args body = do + -- Cost-centre we record in the object + use_ccs <- emitCCS ccs + + -- Cost-centre on whom we blame the allocation + let blame_ccs + | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) + | otherwise = use_ccs + + return (use_ccs, blame_ccs) + + +-- Some CostCentreStacks are a sequence of pushes on top of CCCS. +-- These pushes must be performed before we can refer to the stack in +-- an expression. +emitCCS :: CostCentreStack -> FCode CmmExpr +emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) + where + (cc's, ccs') = decomposeCCS ccs + + push_em ccs [] = return ccs + push_em ccs (cc:rest) = do + tmp <- newTemp ccsType + pushCostCentre tmp ccs cc + push_em (CmmReg (CmmLocal tmp)) rest + +ccsExpr :: CostCentreStack -> CmmExpr +ccsExpr ccs + | isCurrentCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + +isBox :: StgExpr -> Bool +-- If it's an utterly trivial RHS, then it must be +-- one introduced by boxHigherOrderArgs for profiling, +-- so we charge it to "OVERHEAD". +-- This looks like a GROSS HACK to me --SDM +isBox (StgApp fun []) = True +isBox other = False + + +-- ----------------------------------------------------------------------- +-- Setting the current cost centre on entry to a closure + +-- For lexically scoped profiling we have to load the cost centre from +-- the closure entered, if the costs are not supposed to be inherited. +-- This is done immediately on entering the fast entry point. + +-- Load current cost centre from closure, if not inherited. +-- Node is guaranteed to point to it, if profiling and not inherited. + +enterCostCentre + :: ClosureInfo + -> CostCentreStack + -> StgExpr -- The RHS of the closure + -> FCode () + +-- We used to have a special case for bindings of form +-- f = g True +-- where g has arity 2. The RHS is a thunk, but we don't +-- need to update it; and we want to subsume costs. +-- We don't have these sort of PAPs any more, so the special +-- case has gone away. + +enterCostCentre closure_info ccs body + = ifProfiling $ + ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) + enter_cost_centre closure_info ccs body + +enter_cost_centre closure_info ccs body + | isSubsumedCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(re_entrant) + enter_ccs_fsub + + | isDerivedFromCurrentCCS ccs + = do { + if re_entrant && not is_box + then + enter_ccs_fun node_ccs + else + emit (mkStore curCCSAddr node_ccs) + + -- don't forget to bump the scc count. This closure might have been + -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal + -- pass has turned into simply let x = e in ...x... and attached + -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that + -- we don't lose the scc counter, bump it in the entry code for x. + -- ToDo: for a multi-push we should really bump the counter for + -- each of the intervening CCSs, not just the top one. + ; when (not (isCurrentCCS ccs)) $ + emit (bumpSccCount curCCS) + } + + | isCafCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(not re_entrant) + do { -- This is just a special case of the isDerivedFromCurrentCCS + -- case above. We could delete this, but it's a micro + -- optimisation and saves a bit of code. + emit (mkStore curCCSAddr enc_ccs) + ; emit (bumpSccCount node_ccs) + } + + | otherwise + = panic "enterCostCentre" + where + enc_ccs = CmmLit (mkCCostCentreStack ccs) + re_entrant = closureReEntrant closure_info + node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) + is_box = isBox body + + -- if this is a function, then node will be tagged; we must subract the tag + node_tag = funTag closure_info + +-- set the current CCS when entering a PAP +enterCostCentrePAP :: CmmExpr -> FCode () +enterCostCentrePAP closure = + ifProfiling $ do + enter_ccs_fun (costCentreFrom closure) + enteringPAP 1 + +enterCostCentreThunk :: CmmExpr -> FCode () +enterCostCentreThunk closure = + ifProfiling $ do + emit $ mkStore curCCSAddr (costCentreFrom closure) + +enter_ccs_fun stack = emitRtsCall (sLit "EnterFunCCS") [(stack,AddrHint)] False + -- ToDo: vols + +enter_ccs_fsub = enteringPAP 0 + +-- When entering a PAP, EnterFunCCS is called by both the PAP entry +-- code and the function entry code; we don't want the function's +-- entry code to also update CCCS in the event that it was called via +-- a PAP, so we set the flag entering_PAP to indicate that we are +-- entering via a PAP. +enteringPAP :: Integer -> FCode () +enteringPAP n + = emit (mkStore (CmmLit (CmmLabel (mkRtsDataLabel (sLit "entering_PAP")))) + (CmmLit (CmmInt n cIntWidth))) + +ifProfiling :: FCode () -> FCode () +ifProfiling code + | opt_SccProfilingOn = code + | otherwise = nopC + +ifProfilingL :: [a] -> [a] +ifProfilingL xs + | opt_SccProfilingOn = xs + | otherwise = [] + + +--------------------------------------------------------------- +-- Initialising Cost Centres & CCSs +--------------------------------------------------------------- + +initCostCentres :: CollectedCCs -> FCode CmmAGraph +-- Emit the declarations, and return code to register them +initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs) + = getCode $ whenC opt_SccProfilingOn $ + do { mapM_ emitCostCentreDecl local_CCs + ; mapM_ emitCostCentreStackDecl singleton_CCSs + ; emit $ catAGraphs $ map mkRegisterCC local_CCs + ; emit $ catAGraphs $ map mkRegisterCCS singleton_CCSs } + + +emitCostCentreDecl :: CostCentre -> FCode () +emitCostCentreDecl cc = do + { label <- mkStringCLit (costCentreUserName cc) + ; modl <- mkStringCLit (Module.moduleNameString + (Module.moduleName (cc_mod cc))) + -- All cost centres will be in the main package, since we + -- don't normally use -auto-all or add SCCs to other packages. + -- Hence don't emit the package name in the module here. + ; let lits = [ zero, -- StgInt ccID, + label, -- char *label, + modl, -- char *module, + zero, -- StgWord time_ticks + zero64, -- StgWord64 mem_alloc + subsumed, -- StgInt is_caf + zero -- struct _CostCentre *link + ] + ; emitDataLits (mkCCLabel cc) lits + } + where + subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF + | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring + +emitCostCentreStackDecl :: CostCentreStack -> FCode () +emitCostCentreStackDecl ccs + = case maybeSingletonCCS ccs of + Just cc -> emitDataLits (mkCCSLabel ccs) (mk_lits cc) + Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs) + where + mk_lits cc = zero : + mkCCostCentre cc : + replicate (sizeof_ccs_words - 2) zero + -- Note: to avoid making any assumptions about how the + -- C compiler (that compiles the RTS, in particular) does + -- layouts of structs containing long-longs, simply + -- pad out the struct with zero words until we hit the + -- size of the overall struct (which we get via DerivedConstants.h) + +zero = mkIntCLit 0 +zero64 = CmmInt 0 W64 + +sizeof_ccs_words :: Int +sizeof_ccs_words + -- round up to the next word. + | ms == 0 = ws + | otherwise = ws + 1 + where + (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE + +-- --------------------------------------------------------------------------- +-- Registering CCs and CCSs + +-- (cc)->link = CC_LIST; +-- CC_LIST = (cc); +-- (cc)->ccID = CC_ID++; + +mkRegisterCC :: CostCentre -> CmmAGraph +mkRegisterCC cc + = withTemp cInt $ \tmp -> + catAGraphs [ + mkStore (cmmOffsetB cc_lit oFFSET_CostCentre_link) + (CmmLoad cC_LIST bWord), + mkStore cC_LIST cc_lit, + mkAssign (CmmLocal tmp) (CmmLoad cC_ID cInt), + mkStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg (CmmLocal tmp)), + mkStore cC_ID (cmmRegOffB (CmmLocal tmp) 1) + ] + where + cc_lit = CmmLit (CmmLabel (mkCCLabel cc)) + +-- (ccs)->prevStack = CCS_LIST; +-- CCS_LIST = (ccs); +-- (ccs)->ccsID = CCS_ID++; + +mkRegisterCCS :: CostCentreStack -> CmmAGraph +mkRegisterCCS ccs + = withTemp cInt $ \ tmp -> + catAGraphs [ + mkStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) + (CmmLoad cCS_LIST bWord), + mkStore cCS_LIST ccs_lit, + mkAssign (CmmLocal tmp) (CmmLoad cCS_ID cInt), + mkStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg (CmmLocal tmp)), + mkStore cCS_ID (cmmRegOffB (CmmLocal tmp) 1) + ] + where + ccs_lit = CmmLit (CmmLabel (mkCCSLabel ccs)) + + +cC_LIST = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CC_LIST"))) +cC_ID = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CC_ID"))) + +cCS_LIST = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCS_LIST"))) +cCS_ID = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCS_ID"))) + +-- --------------------------------------------------------------------------- +-- Set the current cost centre stack + +emitSetCCC :: CostCentre -> FCode () +emitSetCCC cc + | not opt_SccProfilingOn = nopC + | otherwise = do + tmp <- newTemp ccsType -- TODO FIXME NOW + ASSERT( sccAbleCostCentre cc ) + pushCostCentre tmp curCCS cc + emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) + when (isSccCountCostCentre cc) $ + emit (bumpSccCount curCCS) + +pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () +pushCostCentre result ccs cc + = emitRtsCallWithResult result AddrHint + (sLit "PushCostCentre") [(ccs,AddrHint), + (CmmLit (mkCCostCentre cc), AddrHint)] + False + +bumpSccCount :: CmmExpr -> CmmAGraph +bumpSccCount ccs + = addToMem REP_CostCentreStack_scc_count + (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1 + +----------------------------------------------------------------------------- +-- +-- Lag/drag/void stuff +-- +----------------------------------------------------------------------------- + +-- +-- Initial value for the LDV field in a static closure +-- +staticLdvInit :: CmmLit +staticLdvInit = zeroCLit + +-- +-- Initial value of the LDV field in a dynamic closure +-- +dynLdvInit :: CmmExpr +dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE + CmmMachOp mo_wordOr [ + CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ], + CmmLit (mkWordCLit lDV_STATE_CREATE) + ] + +-- +-- Initialise the LDV word of a new closure +-- +ldvRecordCreate :: CmmExpr -> FCode () +ldvRecordCreate closure = emit $ mkStore (ldvWord closure) dynLdvInit + +-- +-- Called when a closure is entered, marks the closure as having been "used". +-- The closure is not an 'inherently used' one. +-- The closure is not IND or IND_OLDGEN because neither is considered for LDV +-- profiling. +-- +ldvEnterClosure :: ClosureInfo -> FCode () +ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag)) + where tag = funTag closure_info + -- don't forget to substract node's tag + +ldvEnter :: CmmExpr -> FCode () +-- Argument is a closure pointer +ldvEnter cl_ptr + = ifProfiling $ + -- if (era > 0) { + -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | + -- era | LDV_STATE_USE } + emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) + (mkStore ldv_wd new_ldv_wd) + mkNop) + where + -- don't forget to substract node's tag + ldv_wd = ldvWord cl_ptr + new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord) + (CmmLit (mkWordCLit lDV_CREATE_MASK))) + (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE))) + +loadEra :: CmmExpr +loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth) + [CmmLoad (mkLblExpr (mkRtsDataLabel (sLit "era"))) cInt] + +ldvWord :: CmmExpr -> CmmExpr +-- Takes the address of a closure, and returns +-- the address of the LDV word in the closure +ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw + +-- LDV constants, from ghc/includes/Constants.h +lDV_SHIFT = (LDV_SHIFT :: Int) +--lDV_STATE_MASK = (LDV_STATE_MASK :: StgWord) +lDV_CREATE_MASK = (LDV_CREATE_MASK :: StgWord) +--lDV_LAST_MASK = (LDV_LAST_MASK :: StgWord) +lDV_STATE_CREATE = (LDV_STATE_CREATE :: StgWord) +lDV_STATE_USE = (LDV_STATE_USE :: StgWord) + diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs new file mode 100644 index 0000000..e4bebb4 --- /dev/null +++ b/compiler/codeGen/StgCmmTicky.hs @@ -0,0 +1,397 @@ +{-# OPTIONS -w #-} +-- Lots of missing type sigs etc + +----------------------------------------------------------------------------- +-- +-- Code generation for ticky-ticky profiling +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmTicky ( + emitTickyCounter, + + tickyDynAlloc, + tickyAllocHeap, + tickyAllocPrim, + tickyAllocThunk, + tickyAllocPAP, + + tickySlowCall, tickyDirectCall, + + tickyPushUpdateFrame, + tickyUpdateFrameOmitted, + + tickyEnterDynCon, + tickyEnterStaticCon, + tickyEnterViaNode, + + tickyEnterFun, + tickyEnterThunk, + + tickyUpdateBhCaf, + tickyBlackHole, + tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyReturnOldCon, tickyReturnNewCon, + + tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, + tickyUnknownCall, tickySlowCallPat, + + staticTickyHdr, + ) where + +#include "HsVersions.h" +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import StgCmmClosure +import StgCmmUtils +import StgCmmMonad +import SMRep + +import StgSyn +import Cmm +import MkZipCfgCmm +import CmmUtils +import CLabel + +import Name +import Id +import StaticFlags +import BasicTypes +import FastString +import Constants +import Outputable + +-- Turgid imports for showTypeCategory +import PrelNames +import TcType +import TyCon + +import Data.Maybe + +----------------------------------------------------------------------------- +-- +-- Ticky-ticky profiling +-- +----------------------------------------------------------------------------- + +staticTickyHdr :: [CmmLit] +-- krc: not using this right now -- +-- in the new version of ticky-ticky, we +-- don't change the closure layout. +-- leave it defined, though, to avoid breaking +-- other things. +staticTickyHdr = [] + +emitTickyCounter :: ClosureInfo -> [Id] -> FCode () +emitTickyCounter cl_info args + = ifTicky $ + do { mod_name <- getModuleName + ; fun_descr_lit <- mkStringCLit (fun_descr mod_name) + ; arg_descr_lit <- mkStringCLit arg_descr + ; emitDataLits ticky_ctr_label -- Must match layout of StgEntCounter +-- krc: note that all the fields are I32 now; some were I16 before, +-- but the code generator wasn't handling that properly and it led to chaos, +-- panic and disorder. + [ mkIntCLit 0, + mkIntCLit (length args), -- Arity + mkIntCLit 0, -- XXX: we no longer know this! Words passed on stack + fun_descr_lit, + arg_descr_lit, + zeroCLit, -- Entry count + zeroCLit, -- Allocs + zeroCLit -- Link + ] } + where + name = closureName cl_info + ticky_ctr_label = mkRednCountsLabel name $ clHasCafRefs cl_info + arg_descr = map (showTypeCategory . idType) args + fun_descr mod_name = ppr_for_ticky_name mod_name name + +-- When printing the name of a thing in a ticky file, we want to +-- give the module name even for *local* things. We print +-- just "x (M)" rather that "M.x" to distinguish them from the global kind. +ppr_for_ticky_name mod_name name + | isInternalName name = showSDocDebug (ppr name <+> (parens (ppr mod_name))) + | otherwise = showSDocDebug (ppr name) + +-- ----------------------------------------------------------------------------- +-- Ticky stack frames + +tickyPushUpdateFrame = ifTicky $ bumpTickyCounter (sLit "UPDF_PUSHED_ctr") +tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter (sLit "UPDF_OMITTED_ctr") + +-- ----------------------------------------------------------------------------- +-- Ticky entries + +tickyEnterDynCon = ifTicky $ bumpTickyCounter (sLit "ENT_DYN_CON_ctr") +tickyEnterDynThunk = ifTicky $ bumpTickyCounter (sLit "ENT_DYN_THK_ctr") +tickyEnterStaticCon = ifTicky $ bumpTickyCounter (sLit "ENT_STATIC_CON_ctr") +tickyEnterStaticThunk = ifTicky $ bumpTickyCounter (sLit "ENT_STATIC_THK_ctr") +tickyEnterViaNode = ifTicky $ bumpTickyCounter (sLit "ENT_VIA_NODE_ctr") + +tickyEnterThunk :: ClosureInfo -> FCode () +tickyEnterThunk cl_info + | isStaticClosure cl_info = tickyEnterStaticThunk + | otherwise = tickyEnterDynThunk + +tickyBlackHole :: Bool{-updatable-} -> FCode () +tickyBlackHole updatable + = ifTicky (bumpTickyCounter ctr) + where + ctr | updatable = (sLit "UPD_BH_SINGLE_ENTRY_ctr") + | otherwise = (sLit "UPD_BH_UPDATABLE_ctr") + +tickyUpdateBhCaf cl_info + = ifTicky (bumpTickyCounter ctr) + where + ctr | closureUpdReqd cl_info = (sLit "UPD_CAF_BH_SINGLE_ENTRY_ctr") + | otherwise = (sLit "UPD_CAF_BH_UPDATABLE_ctr") + +tickyEnterFun :: ClosureInfo -> FCode () +tickyEnterFun cl_info + = ifTicky $ + do { bumpTickyCounter ctr + ; fun_ctr_lbl <- getTickyCtrLabel + ; registerTickyCtr fun_ctr_lbl + ; bumpTickyCounter' (cmmLabelOffB fun_ctr_lbl oFFSET_StgEntCounter_entry_count) + } + where + ctr | isStaticClosure cl_info = (sLit "ENT_STATIC_FUN_DIRECT_ctr") + | otherwise = (sLit "ENT_DYN_FUN_DIRECT_ctr") + +registerTickyCtr :: CLabel -> FCode () +-- Register a ticky counter +-- if ( ! f_ct.registeredp ) { +-- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */ +-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ +-- f_ct.registeredp = 1 } +registerTickyCtr ctr_lbl + = emit (mkCmmIfThen test (catAGraphs register_stmts)) + where + -- krc: code generator doesn't handle Not, so we test for Eq 0 instead + test = CmmMachOp (MO_Eq wordWidth) + [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) bWord, + CmmLit (mkIntCLit 0)] + register_stmts + = [ mkStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link)) + (CmmLoad ticky_entry_ctrs bWord) + , mkStore ticky_entry_ctrs (mkLblExpr ctr_lbl) + , mkStore (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) + (CmmLit (mkIntCLit 1)) ] + ticky_entry_ctrs = mkLblExpr (mkRtsDataLabel (sLit "ticky_entry_ctrs")) + +tickyReturnOldCon, tickyReturnNewCon :: Arity -> FCode () +tickyReturnOldCon arity + = ifTicky $ do { bumpTickyCounter (sLit "RET_OLD_ctr") + ; bumpHistogram (sLit "RET_OLD_hst") arity } +tickyReturnNewCon arity + | not opt_DoTickyProfiling = nopC + | otherwise + = ifTicky $ do { bumpTickyCounter (sLit "RET_NEW_ctr") + ; bumpHistogram (sLit "RET_NEW_hst") arity } + +tickyUnboxedTupleReturn :: Int -> FCode () +tickyUnboxedTupleReturn arity + = ifTicky $ do { bumpTickyCounter (sLit "RET_UNBOXED_TUP_ctr") + ; bumpHistogram (sLit "RET_UNBOXED_TUP_hst") arity } + +tickyVectoredReturn :: Int -> FCode () +tickyVectoredReturn family_size + = ifTicky $ do { bumpTickyCounter (sLit "VEC_RETURN_ctr") + ; bumpHistogram (sLit "RET_VEC_RETURN_hst") family_size } + +-- ----------------------------------------------------------------------------- +-- Ticky calls + +-- Ticks at a *call site*: +tickyDirectCall :: Arity -> [StgArg] -> FCode () +tickyDirectCall arity args + | arity == length args = tickyKnownCallExact + | otherwise = do tickyKnownCallExtraArgs + tickySlowCallPat (map argPrimRep (drop arity args)) + +tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (sLit "KNOWN_CALL_TOO_FEW_ARGS_ctr") +tickyKnownCallExact = ifTicky $ bumpTickyCounter (sLit "KNOWN_CALL_ctr") +tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter (sLit "KNOWN_CALL_EXTRA_ARGS_ctr") +tickyUnknownCall = ifTicky $ bumpTickyCounter (sLit "UNKNOWN_CALL_ctr") + +-- Tick for the call pattern at slow call site (i.e. in addition to +-- tickyUnknownCall, tickyKnownCallExtraArgs, etc.) +tickySlowCall :: LambdaFormInfo -> [StgArg] -> FCode () +tickySlowCall lf_info [] + = return () +tickySlowCall lf_info args + = do { if (isKnownFun lf_info) + then tickyKnownCallTooFewArgs + else tickyUnknownCall + ; tickySlowCallPat (map argPrimRep args) } + +tickySlowCallPat :: [PrimRep] -> FCode () +tickySlowCallPat args = return () +{- LATER: (introduces recursive module dependency now). + case callPattern args of + (str, True) -> bumpTickyCounter' (mkRtsSlowTickyCtrLabel pat) + (str, False) -> bumpTickyCounter (sLit "TICK_SLOW_CALL_OTHER") + +callPattern :: [CgRep] -> (String,Bool) +callPattern reps + | match == length reps = (chars, True) + | otherwise = (chars, False) + where (_,match) = findMatch reps + chars = map argChar reps + +argChar VoidArg = 'v' +argChar PtrArg = 'p' +argChar NonPtrArg = 'n' +argChar LongArg = 'l' +argChar FloatArg = 'f' +argChar DoubleArg = 'd' +-} + +-- ----------------------------------------------------------------------------- +-- Ticky allocation + +tickyDynAlloc :: ClosureInfo -> FCode () +-- Called when doing a dynamic heap allocation +tickyDynAlloc cl_info + = ifTicky $ + case smRepClosureType (closureSMRep cl_info) of + Just Constr -> tick_alloc_con + Just ConstrNoCaf -> tick_alloc_con + Just Fun -> tick_alloc_fun + Just Thunk -> tick_alloc_thk + Just ThunkSelector -> tick_alloc_thk + -- black hole + Nothing -> return () + where + -- will be needed when we fill in stubs + cl_size = closureSize cl_info + slop_size = slopSize cl_info + + tick_alloc_thk + | closureUpdReqd cl_info = tick_alloc_up_thk + | otherwise = tick_alloc_se_thk + + -- krc: changed from panic to return () + -- just to get something working + tick_alloc_con = return () + tick_alloc_fun = return () + tick_alloc_up_thk = return () + tick_alloc_se_thk = return () + + +tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +tickyAllocPrim hdr goods slop = ifTicky $ pprTrace "ToDo: tickyAllocPrim" empty (return ()) + +tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode () +tickyAllocThunk goods slop = ifTicky $ pprTrace "ToDo: tickyAllocThunk" empty (return ()) + +tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode () +tickyAllocPAP goods slop = ifTicky $ pprTrace "ToDo: tickyAllocPAP" empty (return ()) + +tickyAllocHeap :: VirtualHpOffset -> FCode () +-- Called when doing a heap check [TICK_ALLOC_HEAP] +-- Must be lazy in the amount of allocation! +tickyAllocHeap hp + = ifTicky $ + do { ticky_ctr <- getTickyCtrLabel + ; emit $ catAGraphs $ + if hp == 0 then [] -- Inside the emitMiddle to avoid control + else [ -- dependency on the argument + -- Bump the allcoation count in the StgEntCounter + addToMem REP_StgEntCounter_allocs + (CmmLit (cmmLabelOffB ticky_ctr + oFFSET_StgEntCounter_allocs)) hp, + -- Bump ALLOC_HEAP_ctr + addToMemLbl cLong (mkRtsDataLabel (sLit "ALLOC_HEAP_ctr")) 1, + -- Bump ALLOC_HEAP_tot + addToMemLbl cLong (mkRtsDataLabel (sLit "ALLOC_HEAP_tot")) hp] } + +-- ----------------------------------------------------------------------------- +-- Ticky utils + +ifTicky :: FCode () -> FCode () +ifTicky code + | opt_DoTickyProfiling = code + | otherwise = nopC + +-- All the ticky-ticky counters are declared "unsigned long" in C +bumpTickyCounter :: LitString -> FCode () +bumpTickyCounter lbl = bumpTickyCounter' (cmmLabelOffB (mkRtsDataLabel lbl) 0) + +bumpTickyCounter' :: CmmLit -> FCode () +-- krc: note that we're incrementing the _entry_count_ field of the ticky counter +bumpTickyCounter' lhs = emit (addToMem cLong (CmmLit lhs) 1) + +bumpHistogram :: LitString -> Int -> FCode () +bumpHistogram lbl n +-- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongWidth)) + = return () -- TEMP SPJ Apr 07 + +bumpHistogramE :: LitString -> CmmExpr -> FCode () +bumpHistogramE lbl n + = do t <- newTemp cLong + emit (mkAssign (CmmLocal t) n) + emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight]) + (mkAssign (CmmLocal t) eight)) + emit (addToMem cLong + (cmmIndexExpr cLongWidth + (CmmLit (CmmLabel (mkRtsDataLabel lbl))) + (CmmReg (CmmLocal t))) + 1) + where + eight = CmmLit (CmmInt 8 cLongWidth) + +------------------------------------------------------------------ +-- Showing the "type category" for ticky-ticky profiling + +showTypeCategory :: Type -> Char + {- {C,I,F,D} char, int, float, double + T tuple + S other single-constructor type + {c,i,f,d} unboxed ditto + t *unpacked* tuple + s *unpacked" single-cons... + + v void# + a primitive array + + E enumeration type + + dictionary, unless it's a ... + L List + > function + M other (multi-constructor) data-con type + . other type + - reserved for others to mark as "uninteresting" + -} +showTypeCategory ty + = if isDictTy ty + then '+' + else + case tcSplitTyConApp_maybe ty of + Nothing -> if isJust (tcSplitFunTy_maybe ty) + then '>' + else '.' + + Just (tycon, _) -> + let utc = getUnique tycon in + if utc == charDataConKey then 'C' + else if utc == intDataConKey then 'I' + else if utc == floatDataConKey then 'F' + else if utc == doubleDataConKey then 'D' + else if utc == charPrimTyConKey then 'c' + else if (utc == intPrimTyConKey || utc == wordPrimTyConKey + || utc == addrPrimTyConKey) then 'i' + else if utc == floatPrimTyConKey then 'f' + else if utc == doublePrimTyConKey then 'd' + else if isPrimTyCon tycon {- array, we hope -} then 'A' -- Bogus + else if isEnumerationTyCon tycon then 'E' + else if isTupleTyCon tycon then 'T' + else if isJust (tyConSingleDataCon_maybe tycon) then 'S' + else if utc == listTyConKey then 'L' + else 'M' -- oh, well... diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs new file mode 100644 index 0000000..6cfca5f --- /dev/null +++ b/compiler/codeGen/StgCmmUtils.hs @@ -0,0 +1,902 @@ +----------------------------------------------------------------------------- +-- +-- Code generator utilities; mostly monadic +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmUtils ( + cgLit, mkSimpleLit, + emitDataLits, mkDataLits, + emitRODataLits, mkRODataLits, + emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, + assignTemp, newTemp, withTemp, + + newUnboxedTupleRegs, + + mkMultiAssign, mkCmmSwitch, mkCmmLitSwitch, + emitSwitch, + + tagToClosure, mkTaggedObjectLoad, + + callerSaveVolatileRegs, get_GlobalReg_addr, + + cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, + cmmUGtWord, + cmmOffsetExprW, cmmOffsetExprB, + cmmRegOffW, cmmRegOffB, + cmmLabelOffW, cmmLabelOffB, + cmmOffsetW, cmmOffsetB, + cmmOffsetLitW, cmmOffsetLitB, + cmmLoadIndexW, + cmmConstrTag, cmmConstrTag1, + + cmmUntag, cmmIsTagged, cmmGetTag, + + addToMem, addToMemE, addToMemLbl, + mkWordCLit, + mkStringCLit, mkByteStringCLit, + packHalfWordsCLit, + blankWord, + + getSRTInfo, clHasCafRefs, srt_escape + ) where + +#include "HsVersions.h" +#include "MachRegs.h" + +import StgCmmMonad +import StgCmmClosure +import BlockId +import Cmm +import CmmExpr +import MkZipCfgCmm +import CLabel +import CmmUtils +import PprCmm ( {- instances -} ) + +import ForeignCall +import IdInfo +import Type +import TyCon +import Constants +import SMRep +import StgSyn ( SRT(..) ) +import Literal +import Digraph +import ListSetOps +import Util +import Unique +import DynFlags +import FastString +import Outputable + +import Data.Char +import Data.Bits +import Data.Word +import Data.Maybe + + +------------------------------------------------------------------------- +-- +-- Literals +-- +------------------------------------------------------------------------- + +cgLit :: Literal -> FCode CmmLit +cgLit (MachStr s) = mkByteStringCLit (bytesFS s) + -- not unpackFS; we want the UTF-8 byte stream. +cgLit other_lit = return (mkSimpleLit other_lit) + +mkSimpleLit :: Literal -> CmmLit +mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth +mkSimpleLit MachNullAddr = zeroCLit +mkSimpleLit (MachInt i) = CmmInt i wordWidth +mkSimpleLit (MachInt64 i) = CmmInt i W64 +mkSimpleLit (MachWord i) = CmmInt i wordWidth +mkSimpleLit (MachWord64 i) = CmmInt i W64 +mkSimpleLit (MachFloat r) = CmmFloat r W32 +mkSimpleLit (MachDouble r) = CmmFloat r W64 +mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn) + where + is_dyn = False -- ToDo: fix me +mkSimpleLit other = pprPanic "mkSimpleLit" (ppr other) + +mkLtOp :: Literal -> MachOp +-- On signed literals we must do a signed comparison +mkLtOp (MachInt _) = MO_S_Lt wordWidth +mkLtOp (MachFloat _) = MO_F_Lt W32 +mkLtOp (MachDouble _) = MO_F_Lt W64 +mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit))) + -- ToDo: seems terribly indirect! + + +--------------------------------------------------- +-- +-- Cmm data type functions +-- +--------------------------------------------------- + +-- The "B" variants take byte offsets +cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr +cmmRegOffB = cmmRegOff + +cmmOffsetB :: CmmExpr -> ByteOff -> CmmExpr +cmmOffsetB = cmmOffset + +cmmOffsetExprB :: CmmExpr -> CmmExpr -> CmmExpr +cmmOffsetExprB = cmmOffsetExpr + +cmmLabelOffB :: CLabel -> ByteOff -> CmmLit +cmmLabelOffB = cmmLabelOff + +cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit +cmmOffsetLitB = cmmOffsetLit + +----------------------- +-- The "W" variants take word offsets +cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr +-- The second arg is a *word* offset; need to change it to bytes +cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n) +cmmOffsetExprW e wd_off = cmmIndexExpr wordWidth e wd_off + +cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr +cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n) + +cmmRegOffW :: CmmReg -> WordOff -> CmmExpr +cmmRegOffW reg wd_off = cmmRegOffB reg (wd_off * wORD_SIZE) + +cmmOffsetLitW :: CmmLit -> WordOff -> CmmLit +cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off) + +cmmLabelOffW :: CLabel -> WordOff -> CmmLit +cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off) + +cmmLoadIndexW :: CmmExpr -> Int -> CmmType -> CmmExpr +cmmLoadIndexW base off ty = CmmLoad (cmmOffsetW base off) ty + +----------------------- +cmmULtWord, cmmUGeWord, cmmUGtWord, cmmSubWord, + cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord + :: CmmExpr -> CmmExpr -> CmmExpr +cmmOrWord e1 e2 = CmmMachOp mo_wordOr [e1, e2] +cmmAndWord e1 e2 = CmmMachOp mo_wordAnd [e1, e2] +cmmNeWord e1 e2 = CmmMachOp mo_wordNe [e1, e2] +cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2] +cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2] +cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2] +cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2] +--cmmShlWord e1 e2 = CmmMachOp mo_wordShl [e1, e2] +--cmmUShrWord e1 e2 = CmmMachOp mo_wordUShr [e1, e2] +cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2] + +cmmNegate :: CmmExpr -> CmmExpr +cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) +cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprWidth e)) [e] + +blankWord :: CmmStatic +blankWord = CmmUninitialised wORD_SIZE + +-- Tagging -- +-- Tag bits mask +--cmmTagBits = CmmLit (mkIntCLit tAG_BITS) +cmmTagMask, cmmPointerMask :: CmmExpr +cmmTagMask = CmmLit (mkIntCLit tAG_MASK) +cmmPointerMask = CmmLit (mkIntCLit (complement tAG_MASK)) + +-- Used to untag a possibly tagged pointer +-- A static label need not be untagged +cmmUntag, cmmGetTag :: CmmExpr -> CmmExpr +cmmUntag e@(CmmLit (CmmLabel _)) = e +-- Default case +cmmUntag e = (e `cmmAndWord` cmmPointerMask) + +cmmGetTag e = (e `cmmAndWord` cmmTagMask) + +-- Test if a closure pointer is untagged +cmmIsTagged :: CmmExpr -> CmmExpr +cmmIsTagged e = (e `cmmAndWord` cmmTagMask) + `cmmNeWord` CmmLit zeroCLit + +cmmConstrTag, cmmConstrTag1 :: CmmExpr -> CmmExpr +cmmConstrTag e = (e `cmmAndWord` cmmTagMask) `cmmSubWord` (CmmLit (mkIntCLit 1)) +-- Get constructor tag, but one based. +cmmConstrTag1 e = e `cmmAndWord` cmmTagMask + +----------------------- +-- Making literals + +mkWordCLit :: StgWord -> CmmLit +mkWordCLit wd = CmmInt (fromIntegral wd) wordWidth + +packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit +-- Make a single word literal in which the lower_half_word is +-- at the lower address, and the upper_half_word is at the +-- higher address +-- ToDo: consider using half-word lits instead +-- but be careful: that's vulnerable when reversed +packHalfWordsCLit lower_half_word upper_half_word +#ifdef WORDS_BIGENDIAN + = mkWordCLit ((fromIntegral lower_half_word `shiftL` hALF_WORD_SIZE_IN_BITS) + .|. fromIntegral upper_half_word) +#else + = mkWordCLit ((fromIntegral lower_half_word) + .|. (fromIntegral upper_half_word `shiftL` hALF_WORD_SIZE_IN_BITS)) +#endif + +-------------------------------------------------------------------------- +-- +-- Incrementing a memory location +-- +-------------------------------------------------------------------------- + +addToMemLbl :: CmmType -> CLabel -> Int -> CmmAGraph +addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n + +addToMem :: CmmType -- rep of the counter + -> CmmExpr -- Address + -> Int -- What to add (a word) + -> CmmAGraph +addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) (typeWidth rep))) + +addToMemE :: CmmType -- rep of the counter + -> CmmExpr -- Address + -> CmmExpr -- What to add (a word-typed expression) + -> CmmAGraph +addToMemE rep ptr n + = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep, n]) + + +------------------------------------------------------------------------- +-- +-- Loading a field from an object, +-- where the object pointer is itself tagged +-- +------------------------------------------------------------------------- + +mkTaggedObjectLoad :: LocalReg -> LocalReg -> WordOff -> DynTag -> CmmAGraph +-- (loadTaggedObjectField reg base off tag) generates assignment +-- reg = bitsK[ base + off - tag ] +-- where K is fixed by 'reg' +mkTaggedObjectLoad reg base offset tag + = mkAssign (CmmLocal reg) + (CmmLoad (cmmOffsetB (CmmReg (CmmLocal base)) + (wORD_SIZE*offset - tag)) + (localRegType reg)) + +------------------------------------------------------------------------- +-- +-- Converting a closure tag to a closure for enumeration types +-- (this is the implementation of tagToEnum#). +-- +------------------------------------------------------------------------- + +tagToClosure :: TyCon -> CmmExpr -> CmmExpr +tagToClosure tycon tag + = CmmLoad (cmmOffsetExprW closure_tbl tag) bWord + where closure_tbl = CmmLit (CmmLabel lbl) + lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs + +------------------------------------------------------------------------- +-- +-- Conditionals and rts calls +-- +------------------------------------------------------------------------- + +emitRtsCall :: LitString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () +emitRtsCall fun args safe = emitRtsCall' [] fun args Nothing safe + -- The 'Nothing' says "save all global registers" + +emitRtsCallWithVols :: LitString -> [(CmmExpr,ForeignHint)] -> [GlobalReg] -> Bool -> FCode () +emitRtsCallWithVols fun args vols safe + = emitRtsCall' [] fun args (Just vols) safe + +emitRtsCallWithResult :: LocalReg -> ForeignHint -> LitString + -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () +emitRtsCallWithResult res hint fun args safe + = emitRtsCall' [(res,hint)] fun args Nothing safe + +-- Make a call to an RTS C procedure +emitRtsCall' + :: [(LocalReg,ForeignHint)] + -> LitString + -> [(CmmExpr,ForeignHint)] + -> Maybe [GlobalReg] + -> Bool -- True <=> CmmSafe call + -> FCode () +emitRtsCall' res fun args _vols safe + = --error "emitRtsCall'" + do { emit caller_save + ; emit call + ; emit caller_load } + where + call = if safe then + mkCall fun_expr CCallConv res' args' undefined + else + mkUnsafeCall (ForeignTarget fun_expr + (ForeignConvention CCallConv arg_hints res_hints)) res' args' + (args', arg_hints) = unzip args + (res', res_hints) = unzip res + (caller_save, caller_load) = callerSaveVolatileRegs + fun_expr = mkLblExpr (mkRtsCodeLabel fun) + + +----------------------------------------------------------------------------- +-- +-- Caller-Save Registers +-- +----------------------------------------------------------------------------- + +-- Here we generate the sequence of saves/restores required around a +-- foreign call instruction. + +-- TODO: reconcile with includes/Regs.h +-- * Regs.h claims that BaseReg should be saved last and loaded first +-- * This might not have been tickled before since BaseReg is callee save +-- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim +callerSaveVolatileRegs :: (CmmAGraph, CmmAGraph) +callerSaveVolatileRegs = (caller_save, caller_load) + where + caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save) + caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save) + + system_regs = [ Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery + {- ,SparkHd,SparkTl,SparkBase,SparkLim -} + , BaseReg ] + + regs_to_save = filter callerSaves system_regs + + callerSaveGlobalReg reg + = mkStore (get_GlobalReg_addr reg) (CmmReg (CmmGlobal reg)) + + callerRestoreGlobalReg reg + = mkAssign (CmmGlobal reg) + (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg)) + +-- ----------------------------------------------------------------------------- +-- Global registers + +-- We map STG registers onto appropriate CmmExprs. Either they map +-- to real machine registers or stored as offsets from BaseReg. Given +-- a GlobalReg, get_GlobalReg_addr always produces the +-- register table address for it. +-- (See also get_GlobalReg_reg_or_addr in MachRegs) + +get_GlobalReg_addr :: GlobalReg -> CmmExpr +get_GlobalReg_addr BaseReg = regTableOffset 0 +get_GlobalReg_addr mid = get_Regtable_addr_from_offset + (globalRegType mid) (baseRegOffset mid) + +-- Calculate a literal representing an offset into the register table. +-- Used when we don't have an actual BaseReg to offset from. +regTableOffset :: Int -> CmmExpr +regTableOffset n = + CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n)) + +get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr +get_Regtable_addr_from_offset _rep offset = +#ifdef REG_Base + CmmRegOff (CmmGlobal BaseReg) offset +#else + regTableOffset offset +#endif + + +-- | Returns 'True' if this global register is stored in a caller-saves +-- machine register. + +callerSaves :: GlobalReg -> Bool + +#ifdef CALLER_SAVES_Base +callerSaves BaseReg = True +#endif +#ifdef CALLER_SAVES_Sp +callerSaves Sp = True +#endif +#ifdef CALLER_SAVES_SpLim +callerSaves SpLim = True +#endif +#ifdef CALLER_SAVES_Hp +callerSaves Hp = True +#endif +#ifdef CALLER_SAVES_HpLim +callerSaves HpLim = True +#endif +#ifdef CALLER_SAVES_CurrentTSO +callerSaves CurrentTSO = True +#endif +#ifdef CALLER_SAVES_CurrentNursery +callerSaves CurrentNursery = True +#endif +callerSaves _ = False + + +-- ----------------------------------------------------------------------------- +-- Information about global registers + +baseRegOffset :: GlobalReg -> Int + +baseRegOffset Sp = oFFSET_StgRegTable_rSp +baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim +baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1 +baseRegOffset Hp = oFFSET_StgRegTable_rHp +baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim +baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO +baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery +baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc +baseRegOffset GCEnter1 = oFFSET_stgGCEnter1 +baseRegOffset GCFun = oFFSET_stgGCFun +baseRegOffset reg = pprPanic "baseRegOffset:" (ppr reg) + +------------------------------------------------------------------------- +-- +-- Strings generate a top-level data block +-- +------------------------------------------------------------------------- + +emitDataLits :: CLabel -> [CmmLit] -> FCode () +-- Emit a data-segment data block +emitDataLits lbl lits + = emitData Data (CmmDataLabel lbl : map CmmStaticLit lits) + +mkDataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info stmt +-- Emit a data-segment data block +mkDataLits lbl lits + = CmmData Data (CmmDataLabel lbl : map CmmStaticLit lits) + +emitRODataLits :: CLabel -> [CmmLit] -> FCode () +-- Emit a read-only data block +emitRODataLits lbl lits + = emitData section (CmmDataLabel lbl : map CmmStaticLit lits) + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False + +mkRODataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info stmt +mkRODataLits lbl lits + = CmmData section (CmmDataLabel lbl : map CmmStaticLit lits) + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False + +mkStringCLit :: String -> FCode CmmLit +-- Make a global definition for the string, +-- and return its label +mkStringCLit str = mkByteStringCLit (map (fromIntegral . ord) str) + +mkByteStringCLit :: [Word8] -> FCode CmmLit +mkByteStringCLit bytes + = do { uniq <- newUnique + ; let lbl = mkStringLitLabel uniq + ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes] + ; return (CmmLabel lbl) } + +------------------------------------------------------------------------- +-- +-- Assigning expressions to temporaries +-- +------------------------------------------------------------------------- + +assignTemp :: CmmExpr -> FCode LocalReg +-- Make sure the argument is in a local register +assignTemp (CmmReg (CmmLocal reg)) = return reg +assignTemp e = do { uniq <- newUnique + ; let reg = LocalReg uniq (cmmExprType e) + ; emit (mkAssign (CmmLocal reg) e) + ; return reg } + +newTemp :: CmmType -> FCode LocalReg +newTemp rep = do { uniq <- newUnique + ; return (LocalReg uniq rep) } + +newUnboxedTupleRegs :: Type -> FCode ([LocalReg], [ForeignHint]) +-- Choose suitable local regs to use for the components +-- of an unboxed tuple that we are about to return to +-- the Sequel. If the Sequel is a joint point, using the +-- regs it wants will save later assignments. +newUnboxedTupleRegs res_ty + = ASSERT( isUnboxedTupleType res_ty ) + do { sequel <- getSequel + ; regs <- choose_regs sequel + ; ASSERT( regs `equalLength` reps ) + return (regs, map primRepForeignHint reps) } + where + ty_args = tyConAppArgs (repType res_ty) + reps = [ rep + | ty <- ty_args + , let rep = typePrimRep ty + , not (isVoidRep rep) ] + choose_regs (AssignTo regs _) = return regs + choose_regs _other = mapM (newTemp . primRepCmmType) reps + + + +------------------------------------------------------------------------- +-- mkMultiAssign +------------------------------------------------------------------------- + +mkMultiAssign :: [LocalReg] -> [CmmExpr] -> CmmAGraph +-- Emit code to perform the assignments in the +-- input simultaneously, using temporary variables when necessary. + +type Key = Int +type Vrtx = (Key, Stmt) -- Give each vertex a unique number, + -- for fast comparison +type Stmt = (LocalReg, CmmExpr) -- r := e + +-- We use the strongly-connected component algorithm, in which +-- * the vertices are the statements +-- * an edge goes from s1 to s2 iff +-- s1 assigns to something s2 uses +-- that is, if s1 should *follow* s2 in the final order + +mkMultiAssign [] [] = mkNop +mkMultiAssign [reg] [rhs] = mkAssign (CmmLocal reg) rhs +mkMultiAssign regs rhss = ASSERT( equalLength regs rhss ) + unscramble ([1..] `zip` (regs `zip` rhss)) + +unscramble :: [Vrtx] -> CmmAGraph +unscramble vertices + = catAGraphs (map do_component components) + where + edges :: [ (Vrtx, Key, [Key]) ] + edges = [ (vertex, key1, edges_from stmt1) + | vertex@(key1, stmt1) <- vertices ] + + edges_from :: Stmt -> [Key] + edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices, + stmt1 `mustFollow` stmt2 ] + + components :: [SCC Vrtx] + components = stronglyConnCompFromEdgedVertices edges + + -- do_components deal with one strongly-connected component + -- Not cyclic, or singleton? Just do it + do_component :: SCC Vrtx -> CmmAGraph + do_component (AcyclicSCC (_,stmt)) = mk_graph stmt + do_component (CyclicSCC []) = panic "do_component" + do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt + + -- Cyclic? Then go via temporaries. Pick one to + -- break the loop and try again with the rest. + do_component (CyclicSCC ((_,first_stmt) : rest)) + = withUnique $ \u -> + let (to_tmp, from_tmp) = split u first_stmt + in mk_graph to_tmp + <*> unscramble rest + <*> mk_graph from_tmp + + split :: Unique -> Stmt -> (Stmt, Stmt) + split uniq (reg, rhs) + = ((tmp, rhs), (reg, CmmReg (CmmLocal tmp))) + where + rep = cmmExprType rhs + tmp = LocalReg uniq rep + + mk_graph :: Stmt -> CmmAGraph + mk_graph (reg, rhs) = mkAssign (CmmLocal reg) rhs + +mustFollow :: Stmt -> Stmt -> Bool +(reg, _) `mustFollow` (_, rhs) = reg `regUsedIn` rhs + +regUsedIn :: LocalReg -> CmmExpr -> Bool +reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e +reg `regUsedIn` CmmReg (CmmLocal reg') = reg == reg' +reg `regUsedIn` CmmRegOff (CmmLocal reg') _ = reg == reg' +reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es +_reg `regUsedIn` _other = False -- The CmmGlobal cases + + +------------------------------------------------------------------------- +-- mkSwitch +------------------------------------------------------------------------- + + +emitSwitch :: CmmExpr -- Tag to switch on + -> [(ConTagZ, CmmAGraph)] -- Tagged branches + -> Maybe CmmAGraph -- Default branch (if any) + -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour + -- outside this range is undefined + -> FCode () +emitSwitch tag_expr branches mb_deflt lo_tag hi_tag + = do { dflags <- getDynFlags + ; emit (mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag) } + where + via_C dflags | HscC <- hscTarget dflags = True + | otherwise = False + + +mkCmmSwitch :: Bool -- True <=> never generate a conditional tree + -> CmmExpr -- Tag to switch on + -> [(ConTagZ, CmmAGraph)] -- Tagged branches + -> Maybe CmmAGraph -- Default branch (if any) + -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour + -- outside this range is undefined + -> CmmAGraph + +-- First, two rather common cases in which there is no work to do +mkCmmSwitch _ _ [] (Just code) _ _ = code +mkCmmSwitch _ _ [(_,code)] Nothing _ _ = code + +-- Right, off we go +mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag + = withFreshLabel "switch join" $ \ join_lbl -> + label_default join_lbl mb_deflt $ \ mb_deflt -> + label_branches join_lbl branches $ \ branches -> + assignTemp' tag_expr $ \tag_expr' -> + + mk_switch tag_expr' (sortLe le branches) mb_deflt + lo_tag hi_tag via_C + -- Sort the branches before calling mk_switch + <*> mkLabel join_lbl Nothing + + where + (t1,_) `le` (t2,_) = t1 <= t2 + +mk_switch :: CmmExpr -> [(ConTagZ, BlockId)] + -> Maybe BlockId + -> ConTagZ -> ConTagZ -> Bool + -> CmmAGraph + +-- SINGLETON TAG RANGE: no case analysis to do +mk_switch _tag_expr [(tag, lbl)] _ lo_tag hi_tag _via_C + | lo_tag == hi_tag + = ASSERT( tag == lo_tag ) + mkBranch lbl + +-- SINGLETON BRANCH, NO DEFAULT: no case analysis to do +mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _ + = mkBranch lbl + -- The simplifier might have eliminated a case + -- so we may have e.g. case xs of + -- [] -> e + -- In that situation we can be sure the (:) case + -- can't happen, so no need to test + +-- SINGLETON BRANCH: one equality check to do +mk_switch tag_expr [(tag,lbl)] (Just deflt) _ _ _ + = mkCbranch cond deflt lbl + where + cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag)) + -- We have lo_tag < hi_tag, but there's only one branch, + -- so there must be a default + +-- ToDo: we might want to check for the two branch case, where one of +-- the branches is the tag 0, because comparing '== 0' is likely to be +-- more efficient than other kinds of comparison. + +-- DENSE TAG RANGE: use a switch statment. +-- +-- We also use a switch uncoditionally when compiling via C, because +-- this will get emitted as a C switch statement and the C compiler +-- should do a good job of optimising it. Also, older GCC versions +-- (2.95 in particular) have problems compiling the complicated +-- if-trees generated by this code, so compiling to a switch every +-- time works around that problem. +-- +mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C + | use_switch -- Use a switch + = let + find_branch :: ConTagZ -> Maybe BlockId + find_branch i = case (assocMaybe branches i) of + Just lbl -> Just lbl + Nothing -> mb_deflt + + -- NB. we have eliminated impossible branches at + -- either end of the range (see below), so the first + -- tag of a real branch is real_lo_tag (not lo_tag). + arms :: [Maybe BlockId] + arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]] + in + mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms + + -- if we can knock off a bunch of default cases with one if, then do so + | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches + = mkCmmIfThenElse + (cmmULtWord tag_expr (CmmLit (mkIntCLit lowest_branch))) + (mkBranch deflt) + (mk_switch tag_expr branches mb_deflt + lowest_branch hi_tag via_C) + + | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches + = mkCmmIfThenElse + (cmmUGtWord tag_expr (CmmLit (mkIntCLit highest_branch))) + (mk_switch tag_expr branches mb_deflt + lo_tag highest_branch via_C) + (mkBranch deflt) + + | otherwise -- Use an if-tree + = mkCmmIfThenElse + (cmmUGeWord tag_expr (CmmLit (mkIntCLit mid_tag))) + (mk_switch tag_expr hi_branches mb_deflt + mid_tag hi_tag via_C) + (mk_switch tag_expr lo_branches mb_deflt + lo_tag (mid_tag-1) via_C) + -- we test (e >= mid_tag) rather than (e < mid_tag), because + -- the former works better when e is a comparison, and there + -- are two tags 0 & 1 (mid_tag == 1). In this case, the code + -- generator can reduce the condition to e itself without + -- having to reverse the sense of the comparison: comparisons + -- can't always be easily reversed (eg. floating + -- pt. comparisons). + where + use_switch = {- pprTrace "mk_switch" ( + ppr tag_expr <+> text "n_tags:" <+> int n_tags <+> + text "branches:" <+> ppr (map fst branches) <+> + text "n_branches:" <+> int n_branches <+> + text "lo_tag:" <+> int lo_tag <+> + text "hi_tag:" <+> int hi_tag <+> + text "real_lo_tag:" <+> int real_lo_tag <+> + text "real_hi_tag:" <+> int real_hi_tag) $ -} + ASSERT( n_branches > 1 && n_tags > 1 ) + n_tags > 2 && (via_C || (dense && big_enough)) + -- up to 4 branches we use a decision tree, otherwise + -- a switch (== jump table in the NCG). This seems to be + -- optimal, and corresponds with what gcc does. + big_enough = n_branches > 4 + dense = n_branches > (n_tags `div` 2) + n_branches = length branches + + -- ignore default slots at each end of the range if there's + -- no default branch defined. + lowest_branch = fst (head branches) + highest_branch = fst (last branches) + + real_lo_tag + | isNothing mb_deflt = lowest_branch + | otherwise = lo_tag + + real_hi_tag + | isNothing mb_deflt = highest_branch + | otherwise = hi_tag + + n_tags = real_hi_tag - real_lo_tag + 1 + + -- INVARIANT: Provided hi_tag > lo_tag (which is true) + -- lo_tag <= mid_tag < hi_tag + -- lo_branches have tags < mid_tag + -- hi_branches have tags >= mid_tag + + (mid_tag,_) = branches !! (n_branches `div` 2) + -- 2 branches => n_branches `div` 2 = 1 + -- => branches !! 1 give the *second* tag + -- There are always at least 2 branches here + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_tag + +-------------- +mkCmmLitSwitch :: CmmExpr -- Tag to switch on + -> [(Literal, CmmAGraph)] -- Tagged branches + -> CmmAGraph -- Default branch (always) + -> CmmAGraph -- Emit the code +-- Used for general literals, whose size might not be a word, +-- where there is always a default case, and where we don't know +-- the range of values for certain. For simplicity we always generate a tree. +-- +-- ToDo: for integers we could do better here, perhaps by generalising +-- mk_switch and using that. --SDM 15/09/2004 +mkCmmLitSwitch _scrut [] deflt = deflt +mkCmmLitSwitch scrut branches deflt + = assignTemp' scrut $ \ scrut' -> + withFreshLabel "switch join" $ \ join_lbl -> + label_code join_lbl deflt $ \ deflt -> + label_branches join_lbl branches $ \ branches -> + mk_lit_switch scrut' deflt (sortLe le branches) + where + le (t1,_) (t2,_) = t1 <= t2 + +mk_lit_switch :: CmmExpr -> BlockId + -> [(Literal,BlockId)] + -> CmmAGraph +mk_lit_switch scrut deflt [(lit,blk)] + = mkCbranch + (CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit]) + deflt blk + where + cmm_lit = mkSimpleLit lit + rep = typeWidth (cmmLitType cmm_lit) + +mk_lit_switch scrut deflt_blk_id branches + = mkCmmIfThenElse cond + (mk_lit_switch scrut deflt_blk_id lo_branches) + (mk_lit_switch scrut deflt_blk_id hi_branches) + where + n_branches = length branches + (mid_lit,_) = branches !! (n_branches `div` 2) + -- See notes above re mid_tag + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_lit + + cond = CmmMachOp (mkLtOp mid_lit) + [scrut, CmmLit (mkSimpleLit mid_lit)] + + +-------------- +label_default :: BlockId -> Maybe CmmAGraph + -> (Maybe BlockId -> CmmAGraph) + -> CmmAGraph +label_default _ Nothing thing_inside + = thing_inside Nothing +label_default join_lbl (Just code) thing_inside + = label_code join_lbl code $ \ lbl -> + thing_inside (Just lbl) + +-------------- +label_branches :: BlockId -> [(a,CmmAGraph)] + -> ([(a,BlockId)] -> CmmAGraph) + -> CmmAGraph +label_branches _join_lbl [] thing_inside + = thing_inside [] +label_branches join_lbl ((tag,code):branches) thing_inside + = label_code join_lbl code $ \ lbl -> + label_branches join_lbl branches $ \ branches' -> + thing_inside ((tag,lbl):branches') + +-------------- +label_code :: BlockId -> CmmAGraph -> (BlockId -> CmmAGraph) -> CmmAGraph +-- (label_code J code fun) +-- generates +-- [L: code; goto J] fun L +label_code join_lbl code thing_inside + = withFreshLabel "switch" $ \lbl -> + outOfLine (mkLabel lbl Nothing <*> code <*> mkBranch join_lbl) + <*> thing_inside lbl + + +-------------- +assignTemp' :: CmmExpr -> (CmmExpr -> CmmAGraph) -> CmmAGraph +assignTemp' e thing_inside + | isTrivialCmmExpr e = thing_inside e + | otherwise = withTemp (cmmExprType e) $ \ lreg -> + let reg = CmmLocal lreg in + mkAssign reg e <*> thing_inside (CmmReg reg) + +withTemp :: CmmType -> (LocalReg -> CmmAGraph) -> CmmAGraph +withTemp rep thing_inside + = withUnique $ \uniq -> thing_inside (LocalReg uniq rep) + + +------------------------------------------------------------------------- +-- +-- Static Reference Tables +-- +------------------------------------------------------------------------- + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTInfo :: SRT -> FCode C_SRT +getSRTInfo (SRTEntries {}) = panic "getSRTInfo" + +getSRTInfo (SRT off len bmp) + | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] + = do { id <- newUnique + ; top_srt <- getSRTLabel + ; let srt_desc_lbl = mkLargeSRTLabel id + ; emitRODataLits srt_desc_lbl + ( cmmLabelOffW top_srt off + : mkWordCLit (fromIntegral len) + : map mkWordCLit bmp) + ; return (C_SRT srt_desc_lbl 0 srt_escape) } + + | otherwise + = do { top_srt <- getSRTLabel + ; return (C_SRT top_srt off (fromIntegral (head bmp))) } + -- The fromIntegral converts to StgHalfWord + +getSRTInfo NoSRT + = -- TODO: Should we panic in this case? + -- Someone obviously thinks there should be an SRT + return NoC_SRT + + +srt_escape :: StgHalfWord +srt_escape = -1 diff --git a/compiler/deSugar/DsForeign.lhs b/compiler/deSugar/DsForeign.lhs index b0c82f8..080289e 100644 --- a/compiler/deSugar/DsForeign.lhs +++ b/compiler/deSugar/DsForeign.lhs @@ -18,8 +18,6 @@ import DsMonad import HsSyn import DataCon -import MachOp -import SMRep import CoreUtils import Id import Literal @@ -31,6 +29,8 @@ import Coercion import TcType import Var +import CmmExpr +import CmmUtils import HscTypes import ForeignCall import TysWiredIn @@ -165,8 +165,7 @@ fun_type_arg_stdcall_info StdCallConv ty = let (_tvs,sans_foralls) = tcSplitForAllTys arg_ty (fe_arg_tys, _orig_res_ty) = tcSplitFunTys sans_foralls - in - Just $ sum (map (machRepByteWidth . typeMachRep . getPrimTyOf) fe_arg_tys) + in Just $ sum (map (widthInBytes . typeWidth . typeCmmType . getPrimTyOf) fe_arg_tys) fun_type_arg_stdcall_info _other_conv _ = Nothing \end{code} @@ -425,19 +424,26 @@ mkFExportCBits :: FastString ) mkFExportCBits c_nm maybe_target arg_htys res_hty is_IO_res_ty cc = (header_bits, c_bits, type_string, - sum [ machRepByteWidth rep | (_,_,_,rep) <- aug_arg_info] -- all the args + sum [ widthInBytes (typeWidth rep) | (_,_,_,rep) <- aug_arg_info] -- all the args + -- NB. the calculation here isn't strictly speaking correct. + -- We have a primitive Haskell type (eg. Int#, Double#), and + -- we want to know the size, when passed on the C stack, of + -- the associated C type (eg. HsInt, HsDouble). We don't have + -- this information to hand, but we know what GHC's conventions + -- are for passing around the primitive Haskell types, so we + -- use that instead. I hope the two coincide --SDM ) where -- list the arguments to the C function arg_info :: [(SDoc, -- arg name SDoc, -- C type Type, -- Haskell type - MachRep)] -- the MachRep + CmmType)] -- the CmmType arg_info = [ let stg_type = showStgType ty in (arg_cname n stg_type, stg_type, ty, - typeMachRep (getPrimTyOf ty)) + typeCmmType (getPrimTyOf ty)) | (ty,n) <- zip arg_htys [1::Int ..] ] arg_cname n stg_ty @@ -464,7 +470,7 @@ mkFExportCBits c_nm maybe_target arg_htys res_hty is_IO_res_ty cc stable_ptr_arg = (text "the_stableptr", text "StgStablePtr", undefined, - typeMachRep (mkStablePtrPrimTy alphaTy)) + typeCmmType (mkStablePtrPrimTy alphaTy)) -- stuff to do with the return type of the C function res_hty_is_unit = res_hty `coreEqType` unitTy -- Look through any newtypes @@ -582,16 +588,6 @@ foreignExportInitialiser hs_fn = ] --- NB. the calculation here isn't strictly speaking correct. --- We have a primitive Haskell type (eg. Int#, Double#), and --- we want to know the size, when passed on the C stack, of --- the associated C type (eg. HsInt, HsDouble). We don't have --- this information to hand, but we know what GHC's conventions --- are for passing around the primitive Haskell types, so we --- use that instead. I hope the two coincide --SDM -typeMachRep :: Type -> MachRep -typeMachRep ty = argMachRep (typeCgRep ty) - mkHObj :: Type -> SDoc mkHObj t = text "rts_mk" <> text (showFFIType t) @@ -608,8 +604,8 @@ showFFIType t = getOccString (getName tc) Just (tc,_) -> tc Nothing -> pprPanic "showFFIType" (ppr t) -insertRetAddr :: CCallConv -> [(SDoc, SDoc, Type, MachRep)] - -> [(SDoc, SDoc, Type, MachRep)] +insertRetAddr :: CCallConv -> [(SDoc, SDoc, Type, CmmType)] + -> [(SDoc, SDoc, Type, CmmType)] #if !defined(x86_64_TARGET_ARCH) insertRetAddr CCallConv args = ret_addr_arg : args insertRetAddr _ args = args @@ -619,19 +615,19 @@ insertRetAddr _ args = args -- need to flush a register argument to the stack (See rts/Adjustor.c for -- details). insertRetAddr CCallConv args = go 0 args - where go :: Int -> [(SDoc, SDoc, Type, MachRep)] - -> [(SDoc, SDoc, Type, MachRep)] + where go :: Int -> [(SDoc, SDoc, Type, CmmType)] + -> [(SDoc, SDoc, Type, CmmType)] go 6 args = ret_addr_arg : args go n (arg@(_,_,_,rep):args) - | I64 <- rep = arg : go (n+1) args + | cmmEqType_ignoring_ptrhood rep b64 = arg : go (n+1) args | otherwise = arg : go n args go _ [] = [] insertRetAddr _ args = args #endif -ret_addr_arg :: (SDoc, SDoc, Type, MachRep) +ret_addr_arg :: (SDoc, SDoc, Type, CmmType) ret_addr_arg = (text "original_return_addr", text "void*", undefined, - typeMachRep addrPrimTy) + typeCmmType addrPrimTy) -- This function returns the primitive type associated with the boxed -- type argument to a foreign export (eg. Int ==> Int#). diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 51d7bbc..3518761 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -188,7 +188,6 @@ Library CmmZipUtil DFMonad Dataflow - MachOp MkZipCfg MkZipCfgCmm OptimizationFuel @@ -221,6 +220,22 @@ Library CgTailCall CgTicky CgUtils + StgCmm + StgCmmBind + StgCmmClosure + StgCmmCon + StgCmmEnv + StgCmmExpr + StgCmmForeign + StgCmmGran + StgCmmHeap + StgCmmHpc + StgCmmLayout + StgCmmMonad + StgCmmPrim + StgCmmProf + StgCmmTicky + StgCmmUtils ClosureInfo CodeGen SMRep diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index ded2443..62acd55 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -291,10 +291,14 @@ data DynFlag | Opt_PrintBindContents | Opt_GenManifest | Opt_EmbedManifest + + -- temporary flags + | Opt_RunCPS | Opt_RunCPSZ | Opt_ConvertToZipCfgAndBack | Opt_AutoLinkPackages | Opt_ImplicitImportQualified + | Opt_TryNewCodeGen -- keeping stuff | Opt_KeepHiDiffs @@ -1599,7 +1603,9 @@ fFlags = [ ( "break-on-error", Opt_BreakOnError, const Supported ), ( "print-evld-with-show", Opt_PrintEvldWithShow, const Supported ), ( "print-bind-contents", Opt_PrintBindContents, const Supported ), - ( "run-cps", Opt_RunCPSZ, const Supported ), + ( "run-cps", Opt_RunCPS, const Supported ), + ( "run-cpsz", Opt_RunCPSZ, const Supported ), + ( "new-codegen", Opt_TryNewCodeGen, const Supported ), ( "convert-to-zipper-and-back", Opt_ConvertToZipCfgAndBack, const Supported ), ( "vectorise", Opt_Vectorise, const Supported ), ( "regs-graph", Opt_RegsGraph, const Supported ), diff --git a/compiler/main/HscMain.lhs b/compiler/main/HscMain.lhs index 2fefcd4..dd88f72 100644 --- a/compiler/main/HscMain.lhs +++ b/compiler/main/HscMain.lhs @@ -71,13 +71,15 @@ import SimplCore ( core2core ) import TidyPgm import CorePrep ( corePrepPgm ) import CoreToStg ( coreToStg ) +import qualified StgCmm ( codeGen ) import StgSyn import CostCentre -import TyCon ( isDataTyCon ) +import TyCon ( TyCon, isDataTyCon ) import Name ( Name, NamedThing(..) ) import SimplStg ( stg2stg ) import CodeGen ( codeGen ) import Cmm ( Cmm ) +import PprCmm ( pprCmms ) import CmmParse ( parseCmmFile ) import CmmCPS import CmmCPSZ @@ -648,7 +650,7 @@ hscGenHardCode cgguts mod_summary cg_dir_imps = dir_imps, cg_foreign = foreign_stubs, cg_dep_pkgs = dependencies, - cg_hpc_info = hpc_info } = cgguts + cg_hpc_info = hpc_info } = cgguts dflags = hsc_dflags hsc_env location = ms_location mod_summary data_tycons = filter isDataTyCon tycons @@ -664,11 +666,20 @@ hscGenHardCode cgguts mod_summary (stg_binds, cost_centre_info) <- {-# SCC "CoreToStg" #-} myCoreToStg dflags this_mod prepd_binds + + ------------------ Try new code gen route ---------- + cmms <- tryNewCodeGen hsc_env this_mod data_tycons + dir_imps cost_centre_info + stg_binds hpc_info + ------------------ Code generation ------------------ - cmms <- {-# SCC "CodeGen" #-} - codeGen dflags this_mod data_tycons - dir_imps cost_centre_info - stg_binds hpc_info + cmms <- if dopt Opt_TryNewCodeGen (hsc_dflags hsc_env) + then pprTrace "cmms" (ppr cmms) $ return cmms + else {-# SCC "CodeGen" #-} + codeGen dflags this_mod data_tycons + dir_imps cost_centre_info + stg_binds hpc_info + --- Optionally run experimental Cmm transformations --- cmms <- optionallyConvertAndOrCPS hsc_env cmms -- unless certain dflags are on, the identity function @@ -732,6 +743,39 @@ hscCmmFile hsc_env filename = do ml_hi_file = panic "hscCmmFile: no hi file", ml_obj_file = panic "hscCmmFile: no obj file" } +-------------------- Stuff for new code gen --------------------- + +tryNewCodeGen :: HscEnv -> Module -> [TyCon] -> [Module] + -> CollectedCCs + -> [(StgBinding,[(Id,[Id])])] + -> HpcInfo + -> IO [Cmm] +tryNewCodeGen hsc_env this_mod data_tycons imported_mods + cost_centre_info stg_binds hpc_info + | not (dopt Opt_TryNewCodeGen (hsc_dflags hsc_env)) + = return [] + | otherwise + = do { let dflags = hsc_dflags hsc_env + ; prog <- StgCmm.codeGen dflags this_mod data_tycons imported_mods + cost_centre_info stg_binds hpc_info + ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "Cmm produced by new codegen" + (pprCmms prog) + + ; prog <- return $ map (runTx $ runCmmOpts cmmCfgOptsZ) prog + -- Control flow optimisation + + ; prog <- mapM (protoCmmCPSZ hsc_env) prog + -- The main CPS conversion + + ; prog <- return $ map (runTx $ runCmmOpts cmmCfgOptsZ) prog + -- Control flow optimisation, again + + ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "New Cmm" + (pprCmms prog) + + ; return $ map cmmOfZgraph prog } + + optionallyConvertAndOrCPS :: HscEnv -> [Cmm] -> IO [Cmm] optionallyConvertAndOrCPS hsc_env cmms = do let dflags = hsc_dflags hsc_env @@ -741,7 +785,7 @@ optionallyConvertAndOrCPS hsc_env cmms = else return cmms --------- Optionally convert to CPS (MDA) ----------- cmms <- if not (dopt Opt_ConvertToZipCfgAndBack dflags) && - dopt Opt_RunCPSZ dflags + dopt Opt_RunCPS dflags then cmmCPS dflags cmms else return cmms return cmms diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index ee39dcd..29f4be4 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -36,7 +36,6 @@ import qualified GraphColor as Color import Cmm import CmmOpt ( cmmMiniInline, cmmMachOpFold ) import PprCmm -import MachOp import CLabel import State @@ -716,9 +715,9 @@ cmmStmtConFold stmt e' <- cmmExprConFold CallReference e return $ CmmCallee e' conv other -> return other - args' <- mapM (\(CmmKinded arg hint) -> do + args' <- mapM (\(CmmHinted arg hint) -> do arg' <- cmmExprConFold DataReference arg - return (CmmKinded arg' hint)) args + return (CmmHinted arg' hint)) args return $ CmmCall target' regs args' srt returns CmmCondBranch test dest @@ -759,9 +758,9 @@ cmmExprConFold referenceKind expr -> do dflags <- getDynFlagsCmmOpt dynRef <- cmmMakeDynamicReference dflags addImportCmmOpt referenceKind lbl - return $ cmmMachOpFold (MO_Add wordRep) [ + return $ cmmMachOpFold (MO_Add wordWidth) [ dynRef, - (CmmLit $ CmmInt (fromIntegral off) wordRep) + (CmmLit $ CmmInt (fromIntegral off) wordWidth) ] #if powerpc_TARGET_ARCH @@ -795,7 +794,7 @@ cmmExprConFold referenceKind expr -> case mid of BaseReg -> cmmExprConFold DataReference baseRegAddr other -> cmmExprConFold DataReference - (CmmLoad baseRegAddr (globalRegRep mid)) + (CmmLoad baseRegAddr (globalRegType mid)) -- eliminate zero offsets CmmRegOff reg 0 -> cmmExprConFold referenceKind (CmmReg reg) @@ -807,10 +806,10 @@ cmmExprConFold referenceKind expr -> case get_GlobalReg_reg_or_addr mid of Left realreg -> return expr Right baseRegAddr - -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordRep) [ + -> cmmExprConFold DataReference (CmmMachOp (MO_Add wordWidth) [ CmmReg (CmmGlobal mid), CmmLit (CmmInt (fromIntegral offset) - wordRep)]) + wordWidth)]) other -> return other diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs index 9901e62..f780636 100644 --- a/compiler/nativeGen/MachCodeGen.hs +++ b/compiler/nativeGen/MachCodeGen.hs @@ -35,7 +35,6 @@ import RegAllocInfo ( mkBranchInstr ) import BlockId import PprCmm ( pprExpr ) import Cmm -import MachOp import CLabel import ClosureInfo ( C_SRT(..) ) @@ -44,6 +43,7 @@ import StaticFlags ( opt_PIC ) import ForeignCall ( CCallConv(..) ) import OrdList import Pretty +import qualified Outputable as O import Outputable import FastString import FastBool ( isFastTrue ) @@ -110,20 +110,22 @@ stmtToInstrs stmt = case stmt of CmmComment s -> return (unitOL (COMMENT s)) CmmAssign reg src - | isFloatingRep kind -> assignReg_FltCode kind reg src + | isFloatType ty -> assignReg_FltCode size reg src #if WORD_SIZE_IN_BITS==32 - | kind == I64 -> assignReg_I64Code reg src + | isWord64 ty -> assignReg_I64Code reg src #endif - | otherwise -> assignReg_IntCode kind reg src - where kind = cmmRegRep reg + | otherwise -> assignReg_IntCode size reg src + where ty = cmmRegType reg + size = cmmTypeSize ty CmmStore addr src - | isFloatingRep kind -> assignMem_FltCode kind addr src + | isFloatType ty -> assignMem_FltCode size addr src #if WORD_SIZE_IN_BITS==32 - | kind == I64 -> assignMem_I64Code addr src + | isWord64 ty -> assignMem_I64Code addr src #endif - | otherwise -> assignMem_IntCode kind addr src - where kind = cmmExprRep src + | otherwise -> assignMem_IntCode size addr src + where ty = cmmExprType src + size = cmmTypeSize ty CmmCall target result_regs args _ _ -> genCCall target result_regs args @@ -142,8 +144,8 @@ stmtToInstrs stmt = case stmt of -- CmmExprs into CmmRegOff? mangleIndexTree :: CmmExpr -> CmmExpr mangleIndexTree (CmmRegOff reg off) - = CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) rep)] - where rep = cmmRegRep reg + = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] + where width = typeWidth (cmmRegType reg) -- ----------------------------------------------------------------------------- -- Code gen for 64-bit arithmetic on 32-bit platforms @@ -190,20 +192,20 @@ assignMem_I64Code addrTree valueTree = do rhi = getHiVRegFromLo rlo -- Little-endian store - mov_lo = MOV I32 (OpReg rlo) (OpAddr addr) - mov_hi = MOV I32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4))) + mov_lo = MOV II32 (OpReg rlo) (OpAddr addr) + mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4))) -- in return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) -assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do ChildCode64 vcode r_src_lo <- iselExpr64 valueTree let - r_dst_lo = mkVReg u_dst I32 + r_dst_lo = mkVReg u_dst II32 r_dst_hi = getHiVRegFromLo r_dst_lo r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = MOV I32 (OpReg r_src_lo) (OpReg r_dst_lo) - mov_hi = MOV I32 (OpReg r_src_hi) (OpReg r_dst_hi) + mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo) + mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi) -- in return ( vcode `snocOL` mov_lo `snocOL` mov_hi @@ -215,72 +217,72 @@ assignReg_I64Code lvalue valueTree ------------ iselExpr64 (CmmLit (CmmInt i _)) = do - (rlo,rhi) <- getNewRegPairNat I32 + (rlo,rhi) <- getNewRegPairNat II32 let r = fromIntegral (fromIntegral i :: Word32) q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32) code = toOL [ - MOV I32 (OpImm (ImmInteger r)) (OpReg rlo), - MOV I32 (OpImm (ImmInteger q)) (OpReg rhi) + MOV II32 (OpImm (ImmInteger r)) (OpReg rlo), + MOV II32 (OpImm (ImmInteger q)) (OpReg rhi) ] -- in return (ChildCode64 code rlo) -iselExpr64 (CmmLoad addrTree I64) = do +iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do Amode addr addr_code <- getAmode addrTree - (rlo,rhi) <- getNewRegPairNat I32 + (rlo,rhi) <- getNewRegPairNat II32 let - mov_lo = MOV I32 (OpAddr addr) (OpReg rlo) - mov_hi = MOV I32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi) + mov_lo = MOV II32 (OpAddr addr) (OpReg rlo) + mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi) -- in return ( ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) rlo ) -iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _))) - = return (ChildCode64 nilOL (mkVReg vu I32)) +iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty + = return (ChildCode64 nilOL (mkVReg vu II32)) -- we handle addition, but rather badly iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do ChildCode64 code1 r1lo <- iselExpr64 e1 - (rlo,rhi) <- getNewRegPairNat I32 + (rlo,rhi) <- getNewRegPairNat II32 let r = fromIntegral (fromIntegral i :: Word32) q = fromIntegral ((fromIntegral i `shiftR` 32) :: Word32) r1hi = getHiVRegFromLo r1lo code = code1 `appOL` - toOL [ MOV I32 (OpReg r1lo) (OpReg rlo), - ADD I32 (OpImm (ImmInteger r)) (OpReg rlo), - MOV I32 (OpReg r1hi) (OpReg rhi), - ADC I32 (OpImm (ImmInteger q)) (OpReg rhi) ] + toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), + ADD II32 (OpImm (ImmInteger r)) (OpReg rlo), + MOV II32 (OpReg r1hi) (OpReg rhi), + ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ] -- in return (ChildCode64 code rlo) iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do ChildCode64 code1 r1lo <- iselExpr64 e1 ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat I32 + (rlo,rhi) <- getNewRegPairNat II32 let r1hi = getHiVRegFromLo r1lo r2hi = getHiVRegFromLo r2lo code = code1 `appOL` code2 `appOL` - toOL [ MOV I32 (OpReg r1lo) (OpReg rlo), - ADD I32 (OpReg r2lo) (OpReg rlo), - MOV I32 (OpReg r1hi) (OpReg rhi), - ADC I32 (OpReg r2hi) (OpReg rhi) ] + toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), + ADD II32 (OpReg r2lo) (OpReg rlo), + MOV II32 (OpReg r1hi) (OpReg rhi), + ADC II32 (OpReg r2hi) (OpReg rhi) ] -- in return (ChildCode64 code rlo) -iselExpr64 (CmmMachOp (MO_U_Conv _ I64) [expr]) = do +iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do fn <- getAnyReg expr - r_dst_lo <- getNewRegNat I32 + r_dst_lo <- getNewRegNat II32 let r_dst_hi = getHiVRegFromLo r_dst_lo code = fn r_dst_lo return ( ChildCode64 (code `snocOL` - MOV I32 (OpImm (ImmInt 0)) (OpReg r_dst_hi)) + MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi)) r_dst_lo ) @@ -300,8 +302,8 @@ assignMem_I64Code addrTree valueTree = do let rhi = getHiVRegFromLo rlo -- Big-endian store - mov_hi = ST I32 rhi (AddrRegImm src (ImmInt 0)) - mov_lo = ST I32 rlo (AddrRegImm src (ImmInt 4)) + mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0)) + mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4)) return (vcode `appOL` code `snocOL` mov_hi `snocOL` mov_lo) assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do @@ -323,21 +325,21 @@ assignReg_I64Code lvalue valueTree -- | trace ("iselExpr64: " ++ showSDoc (ppr expr)) False -- = panic "iselExpr64(???)" -iselExpr64 (CmmLoad addrTree I64) = do +iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do Amode (AddrRegReg r1 r2) addr_code <- getAmode addrTree - rlo <- getNewRegNat I32 + rlo <- getNewRegNat II32 let rhi = getHiVRegFromLo rlo - mov_hi = LD I32 (AddrRegImm r1 (ImmInt 0)) rhi - mov_lo = LD I32 (AddrRegImm r1 (ImmInt 4)) rlo + mov_hi = LD II32 (AddrRegImm r1 (ImmInt 0)) rhi + mov_lo = LD II32 (AddrRegImm r1 (ImmInt 4)) rlo return ( ChildCode64 (addr_code `snocOL` mov_hi `snocOL` mov_lo) rlo ) -iselExpr64 (CmmReg (CmmLocal (LocalReg uq I64 _))) = do - r_dst_lo <- getNewRegNat I32 +iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) isWord64 ty = do + r_dst_lo <- getNewRegNat b32 let r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_lo = mkVReg uq I32 + r_src_lo = mkVReg uq b32 r_src_hi = getHiVRegFromLo r_src_lo mov_lo = mkMOV r_src_lo r_dst_lo mov_hi = mkMOV r_src_hi r_dst_hi @@ -372,15 +374,15 @@ assignMem_I64Code addrTree valueTree = do rhi = getHiVRegFromLo rlo -- Big-endian store - mov_hi = ST I32 rhi hi_addr - mov_lo = ST I32 rlo lo_addr + mov_hi = ST II32 rhi hi_addr + mov_lo = ST II32 rlo lo_addr -- in return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) -assignReg_I64Code (CmmLocal (LocalReg u_dst pk _)) valueTree = do +assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree = do ChildCode64 vcode r_src_lo <- iselExpr64 valueTree let - r_dst_lo = mkVReg u_dst I32 + r_dst_lo = mkVReg u_dst II32 r_dst_hi = getHiVRegFromLo r_dst_lo r_src_hi = getHiVRegFromLo r_src_lo mov_lo = MR r_dst_lo r_src_lo @@ -399,19 +401,19 @@ assignReg_I64Code lvalue valueTree -- | trace ("iselExpr64: " ++ showSDoc (pprCmmExpr expr)) False -- = panic "iselExpr64(???)" -iselExpr64 (CmmLoad addrTree I64) = do +iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree - (rlo, rhi) <- getNewRegPairNat I32 - let mov_hi = LD I32 rhi hi_addr - mov_lo = LD I32 rlo lo_addr + (rlo, rhi) <- getNewRegPairNat II32 + let mov_hi = LD II32 rhi hi_addr + mov_lo = LD II32 rlo lo_addr return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) rlo -iselExpr64 (CmmReg (CmmLocal (LocalReg vu I64 _))) - = return (ChildCode64 nilOL (mkVReg vu I32)) +iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty + = return (ChildCode64 nilOL (mkVReg vu II32)) iselExpr64 (CmmLit (CmmInt i _)) = do - (rlo,rhi) <- getNewRegPairNat I32 + (rlo,rhi) <- getNewRegPairNat II32 let half0 = fromIntegral (fromIntegral i :: Word16) half1 = fromIntegral ((fromIntegral i `shiftR` 16) :: Word16) @@ -430,7 +432,7 @@ iselExpr64 (CmmLit (CmmInt i _)) = do iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do ChildCode64 code1 r1lo <- iselExpr64 e1 ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat I32 + (rlo,rhi) <- getNewRegPairNat II32 let r1hi = getHiVRegFromLo r1lo r2hi = getHiVRegFromLo r2lo @@ -441,9 +443,9 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do -- in return (ChildCode64 code rlo) -iselExpr64 (CmmMachOp (MO_U_Conv I32 I64) [expr]) = do +iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do (expr_reg,expr_code) <- getSomeReg expr - (rlo, rhi) <- getNewRegPairNat I32 + (rlo, rhi) <- getNewRegPairNat II32 let mov_hi = LI rhi (ImmInt 0) mov_lo = MR rlo expr_reg return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) @@ -463,12 +465,13 @@ iselExpr64 expr -- register to put it in. data Register - = Fixed MachRep Reg InstrBlock - | Any MachRep (Reg -> InstrBlock) + = Fixed Size Reg InstrBlock + | Any Size (Reg -> InstrBlock) -swizzleRegisterRep :: Register -> MachRep -> Register -swizzleRegisterRep (Fixed _ reg code) rep = Fixed rep reg code -swizzleRegisterRep (Any _ codefn) rep = Any rep codefn +swizzleRegisterRep :: Register -> Size -> Register +-- Change the width; it's a no-op +swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code +swizzleRegisterRep (Any _ codefn) size = Any size codefn -- ----------------------------------------------------------------------------- @@ -491,8 +494,8 @@ getSomeReg expr = do getRegisterReg :: CmmReg -> Reg -getRegisterReg (CmmLocal (LocalReg u pk _)) - = mkVReg u pk +getRegisterReg (CmmLocal (LocalReg u pk)) + = mkVReg u (cmmTypeSize pk) getRegisterReg (CmmGlobal mid) = case get_GlobalReg_reg_or_addr mid of @@ -518,12 +521,13 @@ getRegister :: CmmExpr -> NatM Register -- register, it can only be used for rip-relative addressing. getRegister (CmmReg (CmmGlobal PicBaseReg)) = do - reg <- getPicBaseNat wordRep - return (Fixed wordRep reg nilOL) + reg <- getPicBaseNat wordSize + return (Fixed wordSize reg nilOL) #endif getRegister (CmmReg reg) - = return (Fixed (cmmRegRep reg) (getRegisterReg reg) nilOL) + = return (Fixed (cmmTypeSize (cmmRegType reg)) + (getRegisterReg reg) nilOL) getRegister tree@(CmmRegOff _ _) = getRegister (mangleIndexTree tree) @@ -533,23 +537,23 @@ getRegister tree@(CmmRegOff _ _) -- for 32-bit architectuers, support some 64 -> 32 bit conversions: -- TO_W_(x), TO_W_(x >> 32) -getRegister (CmmMachOp (MO_U_Conv I64 I32) - [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do +getRegister (CmmMachOp (MO_UU_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do ChildCode64 code rlo <- iselExpr64 x - return $ Fixed I32 (getHiVRegFromLo rlo) code + return $ Fixed II32 (getHiVRegFromLo rlo) code -getRegister (CmmMachOp (MO_S_Conv I64 I32) - [CmmMachOp (MO_U_Shr I64) [x,CmmLit (CmmInt 32 _)]]) = do +getRegister (CmmMachOp (MO_SS_Conv W64 W32) + [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do ChildCode64 code rlo <- iselExpr64 x - return $ Fixed I32 (getHiVRegFromLo rlo) code + return $ Fixed II32 (getHiVRegFromLo rlo) code -getRegister (CmmMachOp (MO_U_Conv I64 I32) [x]) = do +getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do ChildCode64 code rlo <- iselExpr64 x - return $ Fixed I32 rlo code + return $ Fixed II32 rlo code -getRegister (CmmMachOp (MO_S_Conv I64 I32) [x]) = do +getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do ChildCode64 code rlo <- iselExpr64 x - return $ Fixed I32 rlo code + return $ Fixed II32 rlo code #endif @@ -567,7 +571,7 @@ getRegister (StDouble d) LDA tmp (AddrImm (ImmCLbl lbl)), LD TF dst (AddrReg tmp)] in - return (Any F64 code) + return (Any FF64 code) getRegister (StPrim primop [x]) -- unary PrimOps = case primop of @@ -575,8 +579,8 @@ getRegister (StPrim primop [x]) -- unary PrimOps NotOp -> trivialUCode NOT x - FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x - DoubleNegOp -> trivialUFCode F64 (FNEG TF) x + FloatNegOp -> trivialUFCode FloatRep (FNEG TF) x + DoubleNegOp -> trivialUFCode FF64 (FNEG TF) x OrdOp -> coerceIntCode IntRep x ChrOp -> chrCode x @@ -589,7 +593,7 @@ getRegister (StPrim primop [x]) -- unary PrimOps Double2FloatOp -> coerceFltCode x Float2DoubleOp -> coerceFltCode x - other_op -> getRegister (StCall fn CCallConv F64 [x]) + other_op -> getRegister (StCall fn CCallConv FF64 [x]) where fn = case other_op of FloatExpOp -> fsLit "exp" @@ -675,15 +679,15 @@ getRegister (StPrim primop [x, y]) -- dyadic PrimOps WordQuotOp -> trivialCode (DIV Q True) x y WordRemOp -> trivialCode (REM Q True) x y - FloatAddOp -> trivialFCode FloatRep (FADD TF) x y - FloatSubOp -> trivialFCode FloatRep (FSUB TF) x y - FloatMulOp -> trivialFCode FloatRep (FMUL TF) x y - FloatDivOp -> trivialFCode FloatRep (FDIV TF) x y + FloatAddOp -> trivialFCode W32 (FADD TF) x y + FloatSubOp -> trivialFCode W32 (FSUB TF) x y + FloatMulOp -> trivialFCode W32 (FMUL TF) x y + FloatDivOp -> trivialFCode W32 (FDIV TF) x y - DoubleAddOp -> trivialFCode F64 (FADD TF) x y - DoubleSubOp -> trivialFCode F64 (FSUB TF) x y - DoubleMulOp -> trivialFCode F64 (FMUL TF) x y - DoubleDivOp -> trivialFCode F64 (FDIV TF) x y + DoubleAddOp -> trivialFCode W64 (FADD TF) x y + DoubleSubOp -> trivialFCode W64 (FSUB TF) x y + DoubleMulOp -> trivialFCode W64 (FMUL TF) x y + DoubleDivOp -> trivialFCode W64 (FDIV TF) x y AddrAddOp -> trivialCode (ADD Q False) x y AddrSubOp -> trivialCode (SUB Q False) x y @@ -699,8 +703,8 @@ getRegister (StPrim primop [x, y]) -- dyadic PrimOps ISraOp -> trivialCode SRA x y -- was: panic "AlphaGen:isra" ISrlOp -> trivialCode SRL x y -- was: panic "AlphaGen:isrl" - FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y]) - DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv F64 [x,y]) + FloatPowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y]) + DoublePowerOp -> getRegister (StCall (fsLit "pow") CCallConv FF64 [x,y]) where {- ------------------------------------------------------------ Some bizarre special code for getting condition codes into @@ -733,7 +737,7 @@ getRegister (StPrim primop [x, y]) -- dyadic PrimOps cmpF_code instr cond x y = trivialFCode pr instr x y `thenNat` \ register -> - getNewRegNat F64 `thenNat` \ tmp -> + getNewRegNat FF64 `thenNat` \ tmp -> getBlockIdNat `thenNat` \ lbl -> let code = registerCode register tmp @@ -790,7 +794,7 @@ getRegister leaf #if i386_TARGET_ARCH -getRegister (CmmLit (CmmFloat f F32)) = do +getRegister (CmmLit (CmmFloat f W32)) = do lbl <- getNewLabelNat dflags <- getDynFlagsNat dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl @@ -798,21 +802,21 @@ getRegister (CmmLit (CmmFloat f F32)) = do let code dst = LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f F32)] + CmmStaticLit (CmmFloat f W32)] `consOL` (addr_code `snocOL` - GLD F32 addr dst) + GLD FF32 addr dst) -- in - return (Any F32 code) + return (Any FF32 code) -getRegister (CmmLit (CmmFloat d F64)) +getRegister (CmmLit (CmmFloat d W64)) | d == 0.0 = let code dst = unitOL (GLDZ dst) - in return (Any F64 code) + in return (Any FF64 code) | d == 1.0 = let code dst = unitOL (GLD1 dst) - in return (Any F64 code) + in return (Any FF64 code) | otherwise = do lbl <- getNewLabelNat @@ -822,94 +826,96 @@ getRegister (CmmLit (CmmFloat d F64)) let code dst = LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit (CmmFloat d F64)] + CmmStaticLit (CmmFloat d W64)] `consOL` (addr_code `snocOL` - GLD F64 addr dst) + GLD FF64 addr dst) -- in - return (Any F64 code) + return (Any FF64 code) #endif /* i386_TARGET_ARCH */ #if x86_64_TARGET_ARCH -getRegister (CmmLit (CmmFloat 0.0 rep)) = do - let code dst = unitOL (XOR rep (OpReg dst) (OpReg dst)) +getRegister (CmmLit (CmmFloat 0.0 w)) = do + let size = floatSize w + code dst = unitOL (XOR size (OpReg dst) (OpReg dst)) -- I don't know why there are xorpd, xorps, and pxor instructions. -- They all appear to do the same thing --SDM - return (Any rep code) + return (Any size code) -getRegister (CmmLit (CmmFloat f rep)) = do +getRegister (CmmLit (CmmFloat f w)) = do lbl <- getNewLabelNat let code dst = toOL [ LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f rep)], - MOV rep (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) + CmmStaticLit (CmmFloat f w)], + MOV size (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) ] -- in - return (Any rep code) + return (Any size code) + where size = floatSize w #endif /* x86_64_TARGET_ARCH */ #if i386_TARGET_ARCH || x86_64_TARGET_ARCH -- catch simple cases of zero- or sign-extended load -getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL I8) addr - return (Any I32 code) +getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL II8) addr + return (Any II32 code) -getRegister (CmmMachOp (MO_S_Conv I8 I32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL I8) addr - return (Any I32 code) +getRegister (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL II8) addr + return (Any II32 code) -getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL I16) addr - return (Any I32 code) +getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL II16) addr + return (Any II32 code) -getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL I16) addr - return (Any I32 code) +getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL II16) addr + return (Any II32 code) #endif #if x86_64_TARGET_ARCH -- catch simple cases of zero- or sign-extended load -getRegister (CmmMachOp (MO_U_Conv I8 I64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL I8) addr - return (Any I64 code) +getRegister (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL II8) addr + return (Any II64 code) -getRegister (CmmMachOp (MO_S_Conv I8 I64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL I8) addr - return (Any I64 code) +getRegister (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL II8) addr + return (Any II64 code) -getRegister (CmmMachOp (MO_U_Conv I16 I64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL I16) addr - return (Any I64 code) +getRegister (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVZxL II16) addr + return (Any II64 code) -getRegister (CmmMachOp (MO_S_Conv I16 I64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL I16) addr - return (Any I64 code) +getRegister (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL II16) addr + return (Any II64 code) -getRegister (CmmMachOp (MO_U_Conv I32 I64) [CmmLoad addr _]) = do - code <- intLoadCode (MOV I32) addr -- 32-bit loads zero-extend - return (Any I64 code) +getRegister (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend + return (Any II64 code) -getRegister (CmmMachOp (MO_S_Conv I32 I64) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL I32) addr - return (Any I64 code) +getRegister (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) = do + code <- intLoadCode (MOVSxL II32) addr + return (Any II64 code) #endif #if x86_64_TARGET_ARCH -getRegister (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg), +getRegister (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), CmmLit displacement]) - = return $ Any I64 (\dst -> unitOL $ - LEA I64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) + = return $ Any II64 (\dst -> unitOL $ + LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) #endif #if x86_64_TARGET_ARCH -getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do +getRegister (CmmMachOp (MO_F_Neg W32) [x]) = do x_code <- getAnyReg x lbl <- getNewLabelNat let @@ -918,19 +924,19 @@ getRegister (CmmMachOp (MO_S_Neg F32) [x]) = do LDATA ReadOnlyData16 [ CmmAlign 16, CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x80000000 I32), - CmmStaticLit (CmmInt 0 I32), - CmmStaticLit (CmmInt 0 I32), - CmmStaticLit (CmmInt 0 I32) + CmmStaticLit (CmmInt 0x80000000 W32), + CmmStaticLit (CmmInt 0 W32), + CmmStaticLit (CmmInt 0 W32), + CmmStaticLit (CmmInt 0 W32) ], - XOR F32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) + XOR FF32 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) -- xorps, so we need the 128-bit constant -- ToDo: rip-relative ] -- - return (Any F32 code) + return (Any FF32 code) -getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do +getRegister (CmmMachOp (MO_F_Neg W64) [x]) = do x_code <- getAnyReg x lbl <- getNewLabelNat let @@ -939,15 +945,15 @@ getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do LDATA ReadOnlyData16 [ CmmAlign 16, CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x8000000000000000 I64), - CmmStaticLit (CmmInt 0 I64) + CmmStaticLit (CmmInt 0x8000000000000000 W64), + CmmStaticLit (CmmInt 0 W64) ], -- gcc puts an unpck here. Wonder if we need it. - XOR F64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) + XOR FF64 (OpAddr (ripRel (ImmCLbl lbl))) (OpReg dst) -- xorpd, so we need the 128-bit constant ] -- - return (Any F64 code) + return (Any FF64 code) #endif #if i386_TARGET_ARCH || x86_64_TARGET_ARCH @@ -955,48 +961,50 @@ getRegister (CmmMachOp (MO_S_Neg F64) [x]) = do getRegister (CmmMachOp mop [x]) -- unary MachOps = case mop of #if i386_TARGET_ARCH - MO_S_Neg F32 -> trivialUFCode F32 (GNEG F32) x - MO_S_Neg F64 -> trivialUFCode F64 (GNEG F64) x + MO_F_Neg W32 -> trivialUFCode FF32 (GNEG FF32) x + MO_F_Neg W64 -> trivialUFCode FF64 (GNEG FF64) x #endif - MO_S_Neg rep -> trivialUCode rep (NEGI rep) x - MO_Not rep -> trivialUCode rep (NOT rep) x + MO_S_Neg w -> triv_ucode NEGI (intSize w) + MO_F_Neg w -> triv_ucode NEGI (floatSize w) + MO_Not w -> triv_ucode NOT (intSize w) -- Nop conversions - MO_U_Conv I32 I8 -> toI8Reg I32 x - MO_S_Conv I32 I8 -> toI8Reg I32 x - MO_U_Conv I16 I8 -> toI8Reg I16 x - MO_S_Conv I16 I8 -> toI8Reg I16 x - MO_U_Conv I32 I16 -> toI16Reg I32 x - MO_S_Conv I32 I16 -> toI16Reg I32 x + MO_UU_Conv W32 W8 -> toI8Reg W32 x + MO_SS_Conv W32 W8 -> toI8Reg W32 x + MO_UU_Conv W16 W8 -> toI8Reg W16 x + MO_SS_Conv W16 W8 -> toI8Reg W16 x + MO_UU_Conv W32 W16 -> toI16Reg W32 x + MO_SS_Conv W32 W16 -> toI16Reg W32 x + #if x86_64_TARGET_ARCH - MO_U_Conv I64 I32 -> conversionNop I64 x - MO_S_Conv I64 I32 -> conversionNop I64 x - MO_U_Conv I64 I16 -> toI16Reg I64 x - MO_S_Conv I64 I16 -> toI16Reg I64 x - MO_U_Conv I64 I8 -> toI8Reg I64 x - MO_S_Conv I64 I8 -> toI8Reg I64 x + MO_UU_Conv W64 W32 -> conversionNop II64 x + MO_SS_Conv W64 W32 -> conversionNop II64 x + MO_UU_Conv W64 W16 -> toI16Reg W64 x + MO_SS_Conv W64 W16 -> toI16Reg W64 x + MO_UU_Conv W64 W8 -> toI8Reg W64 x + MO_SS_Conv W64 W8 -> toI8Reg W64 x #endif - MO_U_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x - MO_S_Conv rep1 rep2 | rep1 == rep2 -> conversionNop rep1 x + MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x + MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x -- widenings - MO_U_Conv I8 I32 -> integerExtend I8 I32 MOVZxL x - MO_U_Conv I16 I32 -> integerExtend I16 I32 MOVZxL x - MO_U_Conv I8 I16 -> integerExtend I8 I16 MOVZxL x + MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x + MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x + MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x - MO_S_Conv I8 I32 -> integerExtend I8 I32 MOVSxL x - MO_S_Conv I16 I32 -> integerExtend I16 I32 MOVSxL x - MO_S_Conv I8 I16 -> integerExtend I8 I16 MOVSxL x + MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x + MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x + MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x #if x86_64_TARGET_ARCH - MO_U_Conv I8 I64 -> integerExtend I8 I64 MOVZxL x - MO_U_Conv I16 I64 -> integerExtend I16 I64 MOVZxL x - MO_U_Conv I32 I64 -> integerExtend I32 I64 MOVZxL x - MO_S_Conv I8 I64 -> integerExtend I8 I64 MOVSxL x - MO_S_Conv I16 I64 -> integerExtend I16 I64 MOVSxL x - MO_S_Conv I32 I64 -> integerExtend I32 I64 MOVSxL x + MO_UU_Conv W8 W64 -> integerExtend W8 W64 MOVZxL x + MO_UU_Conv W16 W64 -> integerExtend W16 W64 MOVZxL x + MO_UU_Conv W32 W64 -> integerExtend W32 W64 MOVZxL x + MO_SS_Conv W8 W64 -> integerExtend W8 W64 MOVSxL x + MO_SS_Conv W16 W64 -> integerExtend W16 W64 MOVSxL x + MO_SS_Conv W32 W64 -> integerExtend W32 W64 MOVSxL x -- for 32-to-64 bit zero extension, amd64 uses an ordinary movl. -- However, we don't want the register allocator to throw it -- away as an unnecessary reg-to-reg move, so we keep it in @@ -1004,32 +1012,38 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps #endif #if i386_TARGET_ARCH - MO_S_Conv F32 F64 -> conversionNop F64 x - MO_S_Conv F64 F32 -> conversionNop F32 x + MO_FF_Conv W32 W64 -> conversionNop FF64 x + MO_FF_Conv W64 W32 -> conversionNop FF32 x #else - MO_S_Conv F32 F64 -> coerceFP2FP F64 x - MO_S_Conv F64 F32 -> coerceFP2FP F32 x + MO_FF_Conv W32 W64 -> coerceFP2FP W64 x + MO_FF_Conv W64 W32 -> coerceFP2FP W32 x #endif - MO_S_Conv from to - | isFloatingRep from -> coerceFP2Int from to x - | isFloatingRep to -> coerceInt2FP from to x + MO_FS_Conv from to -> coerceFP2Int from to x + MO_SF_Conv from to -> coerceInt2FP from to x other -> pprPanic "getRegister" (pprMachOp mop) where + triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register + triv_ucode instr size = trivialUCode size (instr size) x + -- signed or unsigned extension. + integerExtend :: Width -> Width + -> (Size -> Operand -> Operand -> Instr) + -> CmmExpr -> NatM Register integerExtend from to instr expr = do - (reg,e_code) <- if from == I8 then getByteReg expr + (reg,e_code) <- if from == W8 then getByteReg expr else getSomeReg expr let code dst = e_code `snocOL` - instr from (OpReg reg) (OpReg dst) - return (Any to code) + instr (intSize from) (OpReg reg) (OpReg dst) + return (Any (intSize to) code) + toI8Reg :: Width -> CmmExpr -> NatM Register toI8Reg new_rep expr = do codefn <- getAnyReg expr - return (Any new_rep codefn) + return (Any (intSize new_rep) codefn) -- HACK: use getAnyReg to get a byte-addressable register. -- If the source was a Fixed register, this will add the -- mov instruction to put it into the desired destination. @@ -1039,26 +1053,20 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps toI16Reg = toI8Reg -- for now - conversionNop new_rep expr + conversionNop :: Size -> CmmExpr -> NatM Register + conversionNop new_size expr = do e_code <- getRegister expr - return (swizzleRegisterRep e_code new_rep) + return (swizzleRegisterRep e_code new_size) getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps = case mop of - MO_Eq F32 -> condFltReg EQQ x y - MO_Ne F32 -> condFltReg NE x y - MO_S_Gt F32 -> condFltReg GTT x y - MO_S_Ge F32 -> condFltReg GE x y - MO_S_Lt F32 -> condFltReg LTT x y - MO_S_Le F32 -> condFltReg LE x y - - MO_Eq F64 -> condFltReg EQQ x y - MO_Ne F64 -> condFltReg NE x y - MO_S_Gt F64 -> condFltReg GTT x y - MO_S_Ge F64 -> condFltReg GE x y - MO_S_Lt F64 -> condFltReg LTT x y - MO_S_Le F64 -> condFltReg LE x y + MO_F_Eq w -> condFltReg EQQ x y + MO_F_Ne w -> condFltReg NE x y + MO_F_Gt w -> condFltReg GTT x y + MO_F_Ge w -> condFltReg GE x y + MO_F_Lt w -> condFltReg LTT x y + MO_F_Le w -> condFltReg LE x y MO_Eq rep -> condIntReg EQQ x y MO_Ne rep -> condIntReg NE x y @@ -1074,25 +1082,17 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps MO_U_Le rep -> condIntReg LEU x y #if i386_TARGET_ARCH - MO_Add F32 -> trivialFCode F32 GADD x y - MO_Sub F32 -> trivialFCode F32 GSUB x y - - MO_Add F64 -> trivialFCode F64 GADD x y - MO_Sub F64 -> trivialFCode F64 GSUB x y - - MO_S_Quot F32 -> trivialFCode F32 GDIV x y - MO_S_Quot F64 -> trivialFCode F64 GDIV x y + MO_F_Add w -> trivialFCode w GADD x y + MO_F_Sub w -> trivialFCode w GSUB x y + MO_F_Quot w -> trivialFCode w GDIV x y + MO_F_Mul w -> trivialFCode w GMUL x y #endif #if x86_64_TARGET_ARCH - MO_Add F32 -> trivialFCode F32 ADD x y - MO_Sub F32 -> trivialFCode F32 SUB x y - - MO_Add F64 -> trivialFCode F64 ADD x y - MO_Sub F64 -> trivialFCode F64 SUB x y - - MO_S_Quot F32 -> trivialFCode F32 FDIV x y - MO_S_Quot F64 -> trivialFCode F64 FDIV x y + MO_F_Add w -> trivialFCode w ADD x y + MO_F_Sub w -> trivialFCode w SUB x y + MO_F_Quot w -> trivialFCode w FDIV x y + MO_F_Mul w -> trivialFCode w MUL x y #endif MO_Add rep -> add_code rep x y @@ -1103,77 +1103,67 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps MO_U_Quot rep -> div_code rep False True x y MO_U_Rem rep -> div_code rep False False x y -#if i386_TARGET_ARCH - MO_Mul F32 -> trivialFCode F32 GMUL x y - MO_Mul F64 -> trivialFCode F64 GMUL x y -#endif - -#if x86_64_TARGET_ARCH - MO_Mul F32 -> trivialFCode F32 MUL x y - MO_Mul F64 -> trivialFCode F64 MUL x y -#endif - - MO_Mul rep -> let op = IMUL rep in - trivialCode rep op (Just op) x y - MO_S_MulMayOflo rep -> imulMayOflo rep x y - MO_And rep -> let op = AND rep in - trivialCode rep op (Just op) x y - MO_Or rep -> let op = OR rep in - trivialCode rep op (Just op) x y - MO_Xor rep -> let op = XOR rep in - trivialCode rep op (Just op) x y + MO_Mul rep -> triv_op rep IMUL + MO_And rep -> triv_op rep AND + MO_Or rep -> triv_op rep OR + MO_Xor rep -> triv_op rep XOR {- Shift ops on x86s have constraints on their source, it either has to be Imm, CL or 1 => trivialCode is not restrictive enough (sigh.) -} - MO_Shl rep -> shift_code rep (SHL rep) x y {-False-} - MO_U_Shr rep -> shift_code rep (SHR rep) x y {-False-} - MO_S_Shr rep -> shift_code rep (SAR rep) x y {-False-} + MO_Shl rep -> shift_code rep SHL x y {-False-} + MO_U_Shr rep -> shift_code rep SHR x y {-False-} + MO_S_Shr rep -> shift_code rep SAR x y {-False-} other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) where -------------------- - imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register + triv_op width instr = trivialCode width op (Just op) x y + where op = instr (intSize width) + + imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register imulMayOflo rep a b = do (a_reg, a_code) <- getNonClobberedReg a b_code <- getAnyReg b let shift_amt = case rep of - I32 -> 31 - I64 -> 63 + W32 -> 31 + W64 -> 63 _ -> panic "shift_amt" + size = intSize rep code = a_code `appOL` b_code eax `appOL` toOL [ - IMUL2 rep (OpReg a_reg), -- result in %edx:%eax - SAR rep (OpImm (ImmInt shift_amt)) (OpReg eax), + IMUL2 size (OpReg a_reg), -- result in %edx:%eax + SAR size (OpImm (ImmInt shift_amt)) (OpReg eax), -- sign extend lower part - SUB rep (OpReg edx) (OpReg eax) + SUB size (OpReg edx) (OpReg eax) -- compare against upper -- eax==0 if high part == sign extended low part ] -- in - return (Fixed rep eax code) + return (Fixed size eax code) -------------------- - shift_code :: MachRep - -> (Operand -> Operand -> Instr) + shift_code :: Width + -> (Size -> Operand -> Operand -> Instr) -> CmmExpr -> CmmExpr -> NatM Register {- Case1: shift length as immediate -} - shift_code rep instr x y@(CmmLit lit) = do + shift_code width instr x y@(CmmLit lit) = do x_code <- getAnyReg x let + size = intSize width code dst = x_code dst `snocOL` - instr (OpImm (litToImm lit)) (OpReg dst) + instr size (OpImm (litToImm lit)) (OpReg dst) -- in - return (Any rep code) + return (Any size code) {- Case2: shift length is complex (non-immediate) * y must go in %ecx. @@ -1189,86 +1179,92 @@ getRegister e@(CmmMachOp mop [x, y]) -- dyadic MachOps eliminate this reg->reg move here (it won't eliminate the other one, because the move is into the fixed %ecx). -} - shift_code rep instr x y{-amount-} = do + shift_code width instr x y{-amount-} = do x_code <- getAnyReg x - tmp <- getNewRegNat rep + let size = intSize width + tmp <- getNewRegNat size y_code <- getAnyReg y let code = x_code tmp `appOL` y_code ecx `snocOL` - instr (OpReg ecx) (OpReg tmp) + instr size (OpReg ecx) (OpReg tmp) -- in - return (Fixed rep tmp code) + return (Fixed size tmp code) -------------------- - add_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register + add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register add_code rep x (CmmLit (CmmInt y _)) | is32BitInteger y = add_int rep x y - add_code rep x y = trivialCode rep (ADD rep) (Just (ADD rep)) x y + add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y + where size = intSize rep -------------------- - sub_code :: MachRep -> CmmExpr -> CmmExpr -> NatM Register + sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register sub_code rep x (CmmLit (CmmInt y _)) | is32BitInteger (-y) = add_int rep x (-y) - sub_code rep x y = trivialCode rep (SUB rep) Nothing x y + sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y -- our three-operand add instruction: - add_int rep x y = do + add_int width x y = do (x_reg, x_code) <- getSomeReg x let + size = intSize width imm = ImmInt (fromInteger y) code dst = x_code `snocOL` - LEA rep + LEA size (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm)) (OpReg dst) -- - return (Any rep code) + return (Any size code) ---------------------- - div_code rep signed quotient x y = do + div_code width signed quotient x y = do (y_op, y_code) <- getRegOrMem y -- cannot be clobbered x_code <- getAnyReg x let - widen | signed = CLTD rep - | otherwise = XOR rep (OpReg edx) (OpReg edx) + size = intSize width + widen | signed = CLTD size + | otherwise = XOR size (OpReg edx) (OpReg edx) instr | signed = IDIV | otherwise = DIV code = y_code `appOL` x_code eax `appOL` - toOL [widen, instr rep y_op] + toOL [widen, instr size y_op] result | quotient = eax | otherwise = edx -- in - return (Fixed rep result code) + return (Fixed size result code) getRegister (CmmLoad mem pk) - | isFloatingRep pk + | isFloatType pk = do Amode src mem_code <- getAmode mem let + size = cmmTypeSize pk code dst = mem_code `snocOL` - IF_ARCH_i386(GLD pk src dst, - MOV pk (OpAddr src) (OpReg dst)) - -- - return (Any pk code) + IF_ARCH_i386(GLD size src dst, + MOV size (OpAddr src) (OpReg dst)) + return (Any size code) #if i386_TARGET_ARCH getRegister (CmmLoad mem pk) - | pk /= I64 + | not (isWord64 pk) = do - code <- intLoadCode (instr pk) mem - return (Any pk code) + code <- intLoadCode instr mem + return (Any size code) where - instr I8 = MOVZxL pk - instr I16 = MOV I16 - instr I32 = MOV I32 - -- we always zero-extend 8-bit loads, if we + width = typeWidth pk + size = intSize width + instr = case width of + W8 -> MOVZxL II8 + _other -> MOV size + -- We always zero-extend 8-bit loads, if we -- can't think of anything better. This is because -- we can't guarantee access to an 8-bit variant of every register -- (esi and edi don't have 8-bit variants), so to make things @@ -1279,33 +1275,36 @@ getRegister (CmmLoad mem pk) -- Simpler memory load code on x86_64 getRegister (CmmLoad mem pk) = do - code <- intLoadCode (MOV pk) mem - return (Any pk code) + code <- intLoadCode (MOV size) mem + return (Any size code) + where size = intSize $ typeWidth pk #endif -getRegister (CmmLit (CmmInt 0 rep)) +getRegister (CmmLit (CmmInt 0 width)) = let + size = intSize width + -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits - adj_rep = case rep of I64 -> I32; _ -> rep - rep1 = IF_ARCH_i386( rep, adj_rep ) + adj_size = case size of II64 -> II32; _ -> size + size1 = IF_ARCH_i386( size, adj_size ) code dst - = unitOL (XOR rep1 (OpReg dst) (OpReg dst)) + = unitOL (XOR size1 (OpReg dst) (OpReg dst)) in - return (Any rep code) + return (Any size code) #if x86_64_TARGET_ARCH -- optimisation for loading small literals on x86_64: take advantage -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit -- instruction forms are shorter. getRegister (CmmLit lit) - | I64 <- cmmLitRep lit, not (isBigLit lit) + | isWord64 (cmmLitType lit), not (isBigLit lit) = let imm = litToImm lit - code dst = unitOL (MOV I32 (OpImm imm) (OpReg dst)) + code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst)) in - return (Any I64 code) + return (Any II64 code) where - isBigLit (CmmInt i I64) = i < 0 || i > 0xffffffff + isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff isBigLit _ = False -- note1: not the same as (not.is32BitLit), because that checks for -- signed literals that fit in 32 bits, but we want unsigned @@ -1316,11 +1315,11 @@ getRegister (CmmLit lit) getRegister (CmmLit lit) = let - rep = cmmLitRep lit + size = cmmTypeSize (cmmLitType lit) imm = litToImm lit - code dst = unitOL (MOV rep (OpImm imm) (OpReg dst)) + code dst = unitOL (MOV size (OpImm imm) (OpReg dst)) in - return (Any rep code) + return (Any size code) getRegister other = pprPanic "getRegister(x86)" (ppr other) @@ -1381,12 +1380,12 @@ getNonClobberedReg expr = do | otherwise -> return (reg, code) -reg2reg :: MachRep -> Reg -> Reg -> Instr -reg2reg rep src dst +reg2reg :: Size -> Reg -> Reg -> Instr +reg2reg size src dst #if i386_TARGET_ARCH - | isFloatingRep rep = GMOV src dst + | isFloatSize size = GMOV src dst #endif - | otherwise = MOV rep (OpReg src) (OpReg dst) + | otherwise = MOV size (OpReg src) (OpReg dst) #endif /* i386_TARGET_ARCH || x86_64_TARGET_ARCH */ @@ -1394,55 +1393,52 @@ reg2reg rep src dst #if sparc_TARGET_ARCH -getRegister (CmmLit (CmmFloat f F32)) = do +getRegister (CmmLit (CmmFloat f W32)) = do lbl <- getNewLabelNat let code dst = toOL [ LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit (CmmFloat f F32)], + CmmStaticLit (CmmFloat f W32)], SETHI (HI (ImmCLbl lbl)) dst, - LD F32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] - return (Any F32 code) + LD FF32 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] + return (Any FF32 code) -getRegister (CmmLit (CmmFloat d F64)) = do +getRegister (CmmLit (CmmFloat d W64)) = do lbl <- getNewLabelNat let code dst = toOL [ LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit (CmmFloat d F64)], + CmmStaticLit (CmmFloat d W64)], SETHI (HI (ImmCLbl lbl)) dst, - LD F64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] - return (Any F64 code) + LD FF64 (AddrRegImm dst (LO (ImmCLbl lbl))) dst] + return (Any FF64 code) getRegister (CmmMachOp mop [x]) -- unary MachOps = case mop of - MO_S_Neg F32 -> trivialUFCode F32 (FNEG F32) x - MO_S_Neg F64 -> trivialUFCode F64 (FNEG F64) x + MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x + MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x - MO_S_Neg rep -> trivialUCode rep (SUB False False g0) x - MO_Not rep -> trivialUCode rep (XNOR False g0) x + MO_S_Neg rep -> trivialUCode (intSize rep) (SUB False False g0) x + MO_Not rep -> trivialUCode (intSize rep) (XNOR False g0) x - MO_U_Conv I32 I8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8)) + MO_FF_Conv W64 W32-> coerceDbl2Flt x + MO_FF_Conv W32 W64-> coerceFlt2Dbl x - MO_U_Conv F64 F32-> coerceDbl2Flt x - MO_U_Conv F32 F64-> coerceFlt2Dbl x - - MO_S_Conv F32 I32-> coerceFP2Int F32 I32 x - MO_S_Conv I32 F32-> coerceInt2FP I32 F32 x - MO_S_Conv F64 I32-> coerceFP2Int F64 I32 x - MO_S_Conv I32 F64-> coerceInt2FP I32 F64 x + MO_FS_Conv from to -> coerceFP2Int from to x + MO_SF_Conv from to -> coerceInt2FP from to x -- Conversions which are a nop on sparc - MO_U_Conv from to - | from == to -> conversionNop to x - MO_U_Conv I32 to -> conversionNop to x - MO_S_Conv I32 to -> conversionNop to x + MO_UU_Conv from to + | from == to -> conversionNop to x + MO_UU_Conv W32 W8 -> trivialCode I8 (AND False) x (CmmLit (CmmInt 255 I8)) + MO_UU_Conv W32 to -> conversionNop to x + MO_SS_Conv W32 to -> conversionNop to x -- widenings - MO_U_Conv I8 I32 -> integerExtend False I8 I32 x - MO_U_Conv I16 I32 -> integerExtend False I16 I32 x - MO_U_Conv I8 I16 -> integerExtend False I8 I16 x - MO_S_Conv I16 I32 -> integerExtend True I16 I32 x + MO_UU_Conv W8 W32 -> integerExtend False W8 W32 x + MO_UU_Conv W16 W32 -> integerExtend False W16 W32 x + MO_UU_Conv W8 W16 -> integerExtend False W8 W16 x + MO_SS_Conv W16 W32 -> integerExtend True W16 W32 x other_op -> panic "Unknown unary mach op" where @@ -1454,29 +1450,13 @@ getRegister (CmmMachOp mop [x]) -- unary MachOps e_code `snocOL` ((if signed then SRA else SRL) reg (RIImm (ImmInt 0)) dst) - return (Any to code) + return (Any (intSize to) code) conversionNop new_rep expr = do e_code <- getRegister expr return (swizzleRegisterRep e_code new_rep) getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps = case mop of - MO_Eq F32 -> condFltReg EQQ x y - MO_Ne F32 -> condFltReg NE x y - - MO_S_Gt F32 -> condFltReg GTT x y - MO_S_Ge F32 -> condFltReg GE x y - MO_S_Lt F32 -> condFltReg LTT x y - MO_S_Le F32 -> condFltReg LE x y - - MO_Eq F64 -> condFltReg EQQ x y - MO_Ne F64 -> condFltReg NE x y - - MO_S_Gt F64 -> condFltReg GTT x y - MO_S_Ge F64 -> condFltReg GE x y - MO_S_Lt F64 -> condFltReg LTT x y - MO_S_Le F64 -> condFltReg LE x y - MO_Eq rep -> condIntReg EQQ x y MO_Ne rep -> condIntReg NE x y @@ -1485,36 +1465,40 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps MO_S_Lt rep -> condIntReg LTT x y MO_S_Le rep -> condIntReg LE x y - MO_U_Gt I32 -> condIntReg GTT x y - MO_U_Ge I32 -> condIntReg GE x y - MO_U_Lt I32 -> condIntReg LTT x y - MO_U_Le I32 -> condIntReg LE x y + MO_U_Gt W32 -> condIntReg GTT x y + MO_U_Ge W32 -> condIntReg GE x y + MO_U_Lt W32 -> condIntReg LTT x y + MO_U_Le W32 -> condIntReg LE x y - MO_U_Gt I16 -> condIntReg GU x y - MO_U_Ge I16 -> condIntReg GEU x y - MO_U_Lt I16 -> condIntReg LU x y - MO_U_Le I16 -> condIntReg LEU x y + MO_U_Gt W16 -> condIntReg GU x y + MO_U_Ge W16 -> condIntReg GEU x y + MO_U_Lt W16 -> condIntReg LU x y + MO_U_Le W16 -> condIntReg LEU x y - MO_Add I32 -> trivialCode I32 (ADD False False) x y - MO_Sub I32 -> trivialCode I32 (SUB False False) x y + MO_Add W32 -> trivialCode W32 (ADD False False) x y + MO_Sub W32 -> trivialCode W32 (SUB False False) x y MO_S_MulMayOflo rep -> imulMayOflo rep x y {- -- ToDo: teach about V8+ SPARC div instructions - MO_S_Quot I32 -> idiv (fsLit ".div") x y - MO_S_Rem I32 -> idiv (fsLit ".rem") x y - MO_U_Quot I32 -> idiv (fsLit ".udiv") x y - MO_U_Rem I32 -> idiv (fsLit ".urem") x y + MO_S_Quot W32 -> idiv FSLIT(".div") x y + MO_S_Rem W32 -> idiv FSLIT(".rem") x y + MO_U_Quot W32 -> idiv FSLIT(".udiv") x y + MO_U_Rem W32 -> idiv FSLIT(".urem") x y -} - MO_Add F32 -> trivialFCode F32 FADD x y - MO_Sub F32 -> trivialFCode F32 FSUB x y - MO_Mul F32 -> trivialFCode F32 FMUL x y - MO_S_Quot F32 -> trivialFCode F32 FDIV x y - MO_Add F64 -> trivialFCode F64 FADD x y - MO_Sub F64 -> trivialFCode F64 FSUB x y - MO_Mul F64 -> trivialFCode F64 FMUL x y - MO_S_Quot F64 -> trivialFCode F64 FDIV x y + MO_F_Eq w -> condFltReg EQQ x y + MO_F_Ne w -> condFltReg NE x y + + MO_F_Gt w -> condFltReg GTT x y + MO_F_Ge w -> condFltReg GE x y + MO_F_Lt w -> condFltReg LTT x y + MO_F_Le w -> condFltReg LE x y + + MO_F_Add w -> trivialFCode w FADD x y + MO_F_Sub w -> trivialFCode w FSUB x y + MO_F_Mul w -> trivialFCode w FMUL x y + MO_F_Quot w -> trivialFCode w FDIV x y MO_And rep -> trivialCode rep (AND False) x y MO_Or rep -> trivialCode rep (OR False) x y @@ -1527,27 +1511,27 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps MO_S_Shr rep -> trivialCode rep SRA x y {- - MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64 + MO_F32_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 [promote x, promote y]) where promote x = CmmMachOp MO_F32_to_Dbl [x] - MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv F64 + MO_F64_Pwr -> getRegister (StCall (Left (fsLit "pow")) CCallConv FF64 [x, y]) -} other -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop) where - --idiv fn x y = getRegister (StCall (Left fn) CCallConv I32 [x, y]) + --idiv fn x y = getRegister (StCall (Left fn) CCallConv II32 [x, y]) -------------------- - imulMayOflo :: MachRep -> CmmExpr -> CmmExpr -> NatM Register + imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register imulMayOflo rep a b = do (a_reg, a_code) <- getSomeReg a (b_reg, b_code) <- getSomeReg b - res_lo <- getNewRegNat I32 - res_hi <- getNewRegNat I32 + res_lo <- getNewRegNat b32 + res_hi <- getNewRegNat b32 let shift_amt = case rep of - I32 -> 31 - I64 -> 63 + W32 -> 31 + W64 -> 63 _ -> panic "shift_amt" code dst = a_code `appOL` b_code `appOL` toOL [ @@ -1556,7 +1540,7 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps SRA res_lo (RIImm (ImmInt shift_amt)) res_lo, SUB False False res_lo (RIReg res_hi) dst ] - return (Any I32 code) + return (Any II32 code) getRegister (CmmLoad mem pk) = do Amode src code <- getAmode mem @@ -1570,94 +1554,89 @@ getRegister (CmmLit (CmmInt i _)) src = ImmInt (fromInteger i) code dst = unitOL (OR False g0 (RIImm src) dst) in - return (Any I32 code) + return (Any II32 code) getRegister (CmmLit lit) - = let rep = cmmLitRep lit + = let rep = cmmLitType lit imm = litToImm lit code dst = toOL [ SETHI (HI imm) dst, OR False dst (RIImm (LO imm)) dst] - in return (Any I32 code) + in return (Any II32 code) #endif /* sparc_TARGET_ARCH */ #if powerpc_TARGET_ARCH getRegister (CmmLoad mem pk) - | pk /= I64 + | not (isWord64 pk) = do Amode addr addr_code <- getAmode mem - let code dst = ASSERT((regClass dst == RcDouble) == isFloatingRep pk) - addr_code `snocOL` LD pk dst addr - return (Any pk code) + let code dst = ASSERT((regClass dst == RcDouble) == isFloatType pk) + addr_code `snocOL` LD size dst addr + return (Any size code) + where size = cmmTypeSize pk -- catch simple cases of zero- or sign-extended load -getRegister (CmmMachOp (MO_U_Conv I8 I32) [CmmLoad mem _]) = do +getRegister (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do Amode addr addr_code <- getAmode mem - return (Any I32 (\dst -> addr_code `snocOL` LD I8 dst addr)) + return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) -- Note: there is no Load Byte Arithmetic instruction, so no signed case here -getRegister (CmmMachOp (MO_U_Conv I16 I32) [CmmLoad mem _]) = do +getRegister (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do Amode addr addr_code <- getAmode mem - return (Any I32 (\dst -> addr_code `snocOL` LD I16 dst addr)) + return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) -getRegister (CmmMachOp (MO_S_Conv I16 I32) [CmmLoad mem _]) = do +getRegister (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do Amode addr addr_code <- getAmode mem - return (Any I32 (\dst -> addr_code `snocOL` LA I16 dst addr)) + return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) getRegister (CmmMachOp mop [x]) -- unary MachOps = case mop of - MO_Not rep -> trivialUCode rep NOT x + MO_Not rep -> triv_ucode_int rep NOT + + MO_F_Neg w -> triv_ucode_float w FNEG + MO_S_Neg w -> triv_ucode_int w NEG - MO_S_Conv F64 F32 -> trivialUCode F32 FRSP x - MO_S_Conv F32 F64 -> conversionNop F64 x + MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x + MO_FF_Conv W32 W64 -> conversionNop FF64 x - MO_S_Conv from to - | from == to -> conversionNop to x - | isFloatingRep from -> coerceFP2Int from to x - | isFloatingRep to -> coerceInt2FP from to x + MO_FS_Conv from to -> coerceFP2Int from to x + MO_SF_Conv from to -> coerceInt2FP from to x + + MO_SS_Conv from to + | from == to -> conversionNop (intSize to) x -- narrowing is a nop: we treat the high bits as undefined - MO_S_Conv I32 to -> conversionNop to x - MO_S_Conv I16 I8 -> conversionNop I8 x - MO_S_Conv I8 to -> trivialUCode to (EXTS I8) x - MO_S_Conv I16 to -> trivialUCode to (EXTS I16) x + MO_SS_Conv W32 to -> conversionNop (intSize to) x + MO_SS_Conv W16 W8 -> conversionNop II8 x + MO_SS_Conv W8 to -> triv_ucode_int to (EXTS II8) + MO_SS_Conv W16 to -> triv_ucode_int to (EXTS II16) - MO_U_Conv from to - | from == to -> conversionNop to x + MO_UU_Conv from to + | from == to -> conversionNop (intSize to) x -- narrowing is a nop: we treat the high bits as undefined - MO_U_Conv I32 to -> conversionNop to x - MO_U_Conv I16 I8 -> conversionNop I8 x - MO_U_Conv I8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 I32)) - MO_U_Conv I16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 I32)) - - MO_S_Neg F32 -> trivialUCode F32 FNEG x - MO_S_Neg F64 -> trivialUCode F64 FNEG x - MO_S_Neg rep -> trivialUCode rep NEG x - + MO_UU_Conv W32 to -> conversionNop (intSize to) x + MO_UU_Conv W16 W8 -> conversionNop II8 x + MO_UU_Conv W8 to -> trivialCode to False AND x (CmmLit (CmmInt 255 W32)) + MO_UU_Conv W16 to -> trivialCode to False AND x (CmmLit (CmmInt 65535 W32)) + where - conversionNop new_rep expr + triv_ucode_int width instr = trivialUCode (intSize width) instr x + triv_ucode_float width instr = trivialUCode (floatSize width) instr x + + conversionNop new_size expr = do e_code <- getRegister expr - return (swizzleRegisterRep e_code new_rep) + return (swizzleRegisterRep e_code new_size) getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps = case mop of - MO_Eq F32 -> condFltReg EQQ x y - MO_Ne F32 -> condFltReg NE x y - - MO_S_Gt F32 -> condFltReg GTT x y - MO_S_Ge F32 -> condFltReg GE x y - MO_S_Lt F32 -> condFltReg LTT x y - MO_S_Le F32 -> condFltReg LE x y - - MO_Eq F64 -> condFltReg EQQ x y - MO_Ne F64 -> condFltReg NE x y - - MO_S_Gt F64 -> condFltReg GTT x y - MO_S_Ge F64 -> condFltReg GE x y - MO_S_Lt F64 -> condFltReg LTT x y - MO_S_Le F64 -> condFltReg LE x y + MO_F_Eq w -> condFltReg EQQ x y + MO_F_Ne w -> condFltReg NE x y + MO_F_Gt w -> condFltReg GTT x y + MO_F_Ge w -> condFltReg GE x y + MO_F_Lt w -> condFltReg LTT x y + MO_F_Le w -> condFltReg LE x y MO_Eq rep -> condIntReg EQQ (extendUExpr rep x) (extendUExpr rep y) MO_Ne rep -> condIntReg NE (extendUExpr rep x) (extendUExpr rep y) @@ -1672,22 +1651,17 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps MO_U_Lt rep -> condIntReg LU (extendUExpr rep x) (extendUExpr rep y) MO_U_Le rep -> condIntReg LEU (extendUExpr rep x) (extendUExpr rep y) - MO_Add F32 -> trivialCodeNoImm F32 (FADD F32) x y - MO_Sub F32 -> trivialCodeNoImm F32 (FSUB F32) x y - MO_Mul F32 -> trivialCodeNoImm F32 (FMUL F32) x y - MO_S_Quot F32 -> trivialCodeNoImm F32 (FDIV F32) x y + MO_F_Add w -> triv_float w FADD + MO_F_Sub w -> triv_float w FSUB + MO_F_Mul w -> triv_float w FMUL + MO_F_Quot w -> triv_float w FDIV - MO_Add F64 -> trivialCodeNoImm F64 (FADD F64) x y - MO_Sub F64 -> trivialCodeNoImm F64 (FSUB F64) x y - MO_Mul F64 -> trivialCodeNoImm F64 (FMUL F64) x y - MO_S_Quot F64 -> trivialCodeNoImm F64 (FDIV F64) x y - -- optimize addition with 32-bit immediate -- (needed for PIC) - MO_Add I32 -> + MO_Add W32 -> case y of - CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate I32 True (-imm) - -> trivialCode I32 True ADD x (CmmLit $ CmmInt imm immrep) + CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm) + -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep) CmmLit lit -> do (src, srcCode) <- getSomeReg x @@ -1696,25 +1670,25 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps ADDIS dst src (HA imm), ADD dst dst (RIImm (LO imm)) ] - return (Any I32 code) - _ -> trivialCode I32 True ADD x y + return (Any II32 code) + _ -> trivialCode W32 True ADD x y MO_Add rep -> trivialCode rep True ADD x y MO_Sub rep -> case y of -- subfi ('substract from' with immediate) doesn't exist CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) - _ -> trivialCodeNoImm rep SUBF y x + _ -> trivialCodeNoImm' (intSize rep) SUBF y x MO_Mul rep -> trivialCode rep True MULLW x y - MO_S_MulMayOflo I32 -> trivialCodeNoImm I32 MULLW_MayOflo x y + MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y - MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= I32): not implemented" + MO_S_MulMayOflo rep -> panic "S_MulMayOflo (rep /= II32): not implemented" MO_U_MulMayOflo rep -> panic "U_MulMayOflo: not implemented" - MO_S_Quot rep -> trivialCodeNoImm rep DIVW (extendSExpr rep x) (extendSExpr rep y) - MO_U_Quot rep -> trivialCodeNoImm rep DIVWU (extendUExpr rep x) (extendUExpr rep y) + MO_S_Quot rep -> trivialCodeNoImm' (intSize rep) DIVW (extendSExpr rep x) (extendSExpr rep y) + MO_U_Quot rep -> trivialCodeNoImm' (intSize rep) DIVWU (extendUExpr rep x) (extendUExpr rep y) MO_S_Rem rep -> remainderCode rep DIVW (extendSExpr rep x) (extendSExpr rep y) MO_U_Rem rep -> remainderCode rep DIVWU (extendUExpr rep x) (extendUExpr rep y) @@ -1726,42 +1700,46 @@ getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps MO_Shl rep -> trivialCode rep False SLW x y MO_S_Shr rep -> trivialCode rep False SRAW (extendSExpr rep x) y MO_U_Shr rep -> trivialCode rep False SRW (extendUExpr rep x) y + where + triv_float :: Width -> (Size -> Reg -> Reg -> Reg -> Instr) -> NatM Register + triv_float width instr = trivialCodeNoImm (floatSize width) instr x y getRegister (CmmLit (CmmInt i rep)) | Just imm <- makeImmediate rep True i = let code dst = unitOL (LI dst imm) in - return (Any rep code) + return (Any (intSize rep) code) getRegister (CmmLit (CmmFloat f frep)) = do lbl <- getNewLabelNat dflags <- getDynFlagsNat dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl Amode addr addr_code <- getAmode dynRef - let code dst = + let size = floatSize frep + code dst = LDATA ReadOnlyData [CmmDataLabel lbl, CmmStaticLit (CmmFloat f frep)] - `consOL` (addr_code `snocOL` LD frep dst addr) - return (Any frep code) + `consOL` (addr_code `snocOL` LD size dst addr) + return (Any size code) getRegister (CmmLit lit) - = let rep = cmmLitRep lit + = let rep = cmmLitType lit imm = litToImm lit code dst = toOL [ LIS dst (HA imm), ADD dst dst (RIImm (LO imm)) ] - in return (Any rep code) + in return (Any (cmmTypeSize rep) code) getRegister other = pprPanic "getRegister(ppc)" (pprExpr other) -- extend?Rep: wrap integer expression of type rep - -- in a conversion to I32 -extendSExpr I32 x = x -extendSExpr rep x = CmmMachOp (MO_S_Conv rep I32) [x] -extendUExpr I32 x = x -extendUExpr rep x = CmmMachOp (MO_U_Conv rep I32) [x] + -- in a conversion to II32 +extendSExpr W32 x = x +extendSExpr rep x = CmmMachOp (MO_SS_Conv rep W32) [x] +extendUExpr W32 x = x +extendUExpr rep x = CmmMachOp (MO_UU_Conv rep W32) [x] #endif /* powerpc_TARGET_ARCH */ @@ -1838,7 +1816,7 @@ getAmode other #if x86_64_TARGET_ARCH -getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg), +getAmode (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), CmmLit displacement]) = return $ Amode (ripRel (litToImm displacement)) nilOL @@ -1850,14 +1828,14 @@ getAmode (CmmMachOp (MO_Add I64) [CmmReg (CmmGlobal PicBaseReg), -- what mangleIndexTree has just done. getAmode (CmmMachOp (MO_Sub rep) [x, CmmLit lit@(CmmInt i _)]) | is32BitLit lit - -- ASSERT(rep == I32)??? + -- ASSERT(rep == II32)??? = do (x_reg, x_code) <- getSomeReg x let off = ImmInt (-(fromInteger i)) return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) getAmode (CmmMachOp (MO_Add rep) [x, CmmLit lit@(CmmInt i _)]) | is32BitLit lit - -- ASSERT(rep == I32)??? + -- ASSERT(rep == II32)??? = do (x_reg, x_code) <- getSomeReg x let off = ImmInt (fromInteger i) return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) @@ -1938,7 +1916,7 @@ getAmode (CmmMachOp (MO_Add rep) [x, y]) -- XXX Is this same as "leaf" in Stix? getAmode (CmmLit lit) = do - tmp <- getNewRegNat I32 + tmp <- getNewRegNat b32 let code = unitOL (SETHI (HI imm__2) tmp) return (Amode (AddrRegImm tmp (LO imm__2)) code) @@ -1955,24 +1933,24 @@ getAmode other #endif /* sparc_TARGET_ARCH */ #ifdef powerpc_TARGET_ARCH -getAmode (CmmMachOp (MO_Sub I32) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate I32 True (-i) +getAmode (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W32 True (-i) = do (reg, code) <- getSomeReg x return (Amode (AddrRegImm reg off) code) -getAmode (CmmMachOp (MO_Add I32) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate I32 True i +getAmode (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) + | Just off <- makeImmediate W32 True i = do (reg, code) <- getSomeReg x return (Amode (AddrRegImm reg off) code) -- optimize addition with 32-bit immediate -- (needed for PIC) -getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit]) +getAmode (CmmMachOp (MO_Add W32) [x, CmmLit lit]) = do - tmp <- getNewRegNat I32 + tmp <- getNewRegNat II32 (src, srcCode) <- getSomeReg x let imm = litToImm lit code = srcCode `snocOL` ADDIS tmp src (HA imm) @@ -1980,12 +1958,12 @@ getAmode (CmmMachOp (MO_Add I32) [x, CmmLit lit]) getAmode (CmmLit lit) = do - tmp <- getNewRegNat I32 + tmp <- getNewRegNat II32 let imm = litToImm lit code = unitOL (LIS tmp (HA imm)) return (Amode (AddrRegImm tmp (LO imm)) code) -getAmode (CmmMachOp (MO_Add I32) [x, y]) +getAmode (CmmMachOp (MO_Add W32) [x, y]) = do (regX, codeX) <- getSomeReg x (regY, codeY) <- getSomeReg y @@ -2019,17 +1997,17 @@ getNonClobberedOperand (CmmLit lit) return (OpAddr (ripRel (ImmCLbl lbl)), code) #endif getNonClobberedOperand (CmmLit lit) - | is32BitLit lit && not (isFloatingRep (cmmLitRep lit)) = + | is32BitLit lit && not (isFloatType (cmmLitType lit)) = return (OpImm (litToImm lit), nilOL) getNonClobberedOperand (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do + | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do Amode src mem_code <- getAmode mem (src',save_code) <- if (amodeCouldBeClobbered src) then do - tmp <- getNewRegNat wordRep + tmp <- getNewRegNat wordSize return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0), - unitOL (LEA I32 (OpAddr src) (OpReg tmp))) + unitOL (LEA II32 (OpAddr src) (OpReg tmp))) else return (src, nilOL) return (OpAddr src', save_code `appOL` mem_code) @@ -2055,10 +2033,10 @@ getOperand (CmmLit lit) return (OpAddr (ripRel (ImmCLbl lbl)), code) #endif getOperand (CmmLit lit) - | is32BitLit lit && not (isFloatingRep (cmmLitRep lit)) = do + | is32BitLit lit && not (isFloatType (cmmLitType lit)) = do return (OpImm (litToImm lit), nilOL) getOperand (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do + | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do Amode src mem_code <- getAmode mem return (OpAddr src, mem_code) getOperand e = do @@ -2080,7 +2058,7 @@ isSuitableFloatingPointLit _ = False getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock) getRegOrMem (CmmLoad mem pk) - | IF_ARCH_i386(not (isFloatingRep pk) && pk /= I64, True) = do + | IF_ARCH_i386(not (isFloatType pk) && not (isWord64 pk), True) = do Amode src mem_code <- getAmode mem return (OpAddr src, mem_code) getRegOrMem e = do @@ -2088,7 +2066,7 @@ getRegOrMem e = do return (OpReg reg, code) #if x86_64_TARGET_ARCH -is32BitLit (CmmInt i I64) = is32BitInteger i +is32BitLit (CmmInt i W64) = is32BitInteger i -- assume that labels are in the range 0-2^31-1: this assumes the -- small memory model (see gcc docs, -mcmodel=small). #endif @@ -2126,21 +2104,19 @@ getCondCode = panic "MachCode.getCondCode: not on Alphas" getCondCode (CmmMachOp mop [x, y]) = case mop of - MO_Eq F32 -> condFltCode EQQ x y - MO_Ne F32 -> condFltCode NE x y - - MO_S_Gt F32 -> condFltCode GTT x y - MO_S_Ge F32 -> condFltCode GE x y - MO_S_Lt F32 -> condFltCode LTT x y - MO_S_Le F32 -> condFltCode LE x y - - MO_Eq F64 -> condFltCode EQQ x y - MO_Ne F64 -> condFltCode NE x y - - MO_S_Gt F64 -> condFltCode GTT x y - MO_S_Ge F64 -> condFltCode GE x y - MO_S_Lt F64 -> condFltCode LTT x y - MO_S_Le F64 -> condFltCode LE x y + MO_F_Eq W32 -> condFltCode EQQ x y + MO_F_Ne W32 -> condFltCode NE x y + MO_F_Gt W32 -> condFltCode GTT x y + MO_F_Ge W32 -> condFltCode GE x y + MO_F_Lt W32 -> condFltCode LTT x y + MO_F_Le W32 -> condFltCode LE x y + + MO_F_Eq W64 -> condFltCode EQQ x y + MO_F_Ne W64 -> condFltCode NE x y + MO_F_Gt W64 -> condFltCode GTT x y + MO_F_Ge W64 -> condFltCode GE x y + MO_F_Lt W64 -> condFltCode LTT x y + MO_F_Le W64 -> condFltCode LE x y MO_Eq rep -> condIntCode EQQ x y MO_Ne rep -> condIntCode NE x y @@ -2166,21 +2142,19 @@ getCondCode other = pprPanic "getCondCode(2)(x86,sparc)" (ppr other) getCondCode (CmmMachOp mop [x, y]) = case mop of - MO_Eq F32 -> condFltCode EQQ x y - MO_Ne F32 -> condFltCode NE x y - - MO_S_Gt F32 -> condFltCode GTT x y - MO_S_Ge F32 -> condFltCode GE x y - MO_S_Lt F32 -> condFltCode LTT x y - MO_S_Le F32 -> condFltCode LE x y - - MO_Eq F64 -> condFltCode EQQ x y - MO_Ne F64 -> condFltCode NE x y - - MO_S_Gt F64 -> condFltCode GTT x y - MO_S_Ge F64 -> condFltCode GE x y - MO_S_Lt F64 -> condFltCode LTT x y - MO_S_Le F64 -> condFltCode LE x y + MO_F_Eq W32 -> condFltCode EQQ x y + MO_F_Ne W32 -> condFltCode NE x y + MO_F_Gt W32 -> condFltCode GTT x y + MO_F_Ge W32 -> condFltCode GE x y + MO_F_Lt W32 -> condFltCode LTT x y + MO_F_Le W32 -> condFltCode LE x y + + MO_F_Eq W64 -> condFltCode EQQ x y + MO_F_Ne W64 -> condFltCode NE x y + MO_F_Gt W64 -> condFltCode GTT x y + MO_F_Ge W64 -> condFltCode GE x y + MO_F_Lt W64 -> condFltCode LTT x y + MO_F_Le W64 -> condFltCode LE x y MO_Eq rep -> condIntCode EQQ (extendUExpr rep x) (extendUExpr rep y) MO_Ne rep -> condIntCode NE (extendUExpr rep x) (extendUExpr rep y) @@ -2222,7 +2196,7 @@ condIntCode cond (CmmLoad x pk) (CmmLit lit) | is32BitLit lit = do let imm = litToImm lit code = x_code `snocOL` - CMP pk (OpImm imm) (OpAddr x_addr) + CMP (cmmTypeSize pk) (OpImm imm) (OpAddr x_addr) -- return (CondCode False cond code) @@ -2234,7 +2208,7 @@ condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk)) (x_reg, x_code) <- getSomeReg x let code = x_code `snocOL` - TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg) + TEST (intSize pk) (OpImm (ImmInteger mask)) (OpReg x_reg) -- return (CondCode False cond code) @@ -2243,7 +2217,7 @@ condIntCode cond x (CmmLit (CmmInt 0 pk)) = do (x_reg, x_code) <- getSomeReg x let code = x_code `snocOL` - TEST pk (OpReg x_reg) (OpReg x_reg) + TEST (intSize pk) (OpReg x_reg) (OpReg x_reg) -- return (CondCode False cond code) @@ -2253,7 +2227,7 @@ condIntCode cond x y | isOperand y = do (y_op, y_code) <- getOperand y let code = x_code `appOL` y_code `snocOL` - CMP (cmmExprRep x) y_op (OpReg x_reg) + CMP (cmmTypeSize (cmmExprType x)) y_op (OpReg x_reg) -- in return (CondCode False cond code) @@ -2264,7 +2238,7 @@ condIntCode cond x y = do let code = y_code `appOL` x_code `snocOL` - CMP (cmmExprRep x) (OpReg y_reg) x_op + CMP (cmmTypeSize (cmmExprType x)) (OpReg y_reg) x_op -- in return (CondCode False cond code) #endif @@ -2292,7 +2266,7 @@ condFltCode cond x y = do let code = x_code `appOL` y_code `snocOL` - CMP (cmmExprRep x) y_op (OpReg x_reg) + CMP (floatSize $ cmmExprWidth x) y_op (OpReg x_reg) -- NB(1): we need to use the unsigned comparison operators on the -- result of this comparison. -- in @@ -2324,23 +2298,23 @@ condIntCode cond x y = do condFltCode cond x y = do (src1, code1) <- getSomeReg x (src2, code2) <- getSomeReg y - tmp <- getNewRegNat F64 + tmp <- getNewRegNat FF64 let - promote x = FxTOy F32 F64 x tmp + promote x = FxTOy FF32 FF64 x tmp - pk1 = cmmExprRep x - pk2 = cmmExprRep y + pk1 = cmmExprType x + pk2 = cmmExprType y code__2 = if pk1 == pk2 then code1 `appOL` code2 `snocOL` FCMP True pk1 src1 src2 - else if pk1 == F32 then + else if typeWidth pk1 == W32 then code1 `snocOL` promote src1 `appOL` code2 `snocOL` - FCMP True F64 tmp src2 + FCMP True FF64 tmp src2 else code1 `appOL` code2 `snocOL` promote src2 `snocOL` - FCMP True F64 src1 tmp + FCMP True FF64 src1 tmp return (CondCode True cond code__2) #endif /* sparc_TARGET_ARCH */ @@ -2353,7 +2327,7 @@ condIntCode cond x (CmmLit (CmmInt y rep)) (src1, code) <- getSomeReg x let code' = code `snocOL` - (if condUnsigned cond then CMPL else CMP) I32 src1 (RIImm src2) + (if condUnsigned cond then CMPL else CMP) II32 src1 (RIImm src2) return (CondCode False cond code') condIntCode cond x y = do @@ -2361,7 +2335,7 @@ condIntCode cond x y = do (src2, code2) <- getSomeReg y let code' = code1 `appOL` code2 `snocOL` - (if condUnsigned cond then CMPL else CMP) I32 src1 (RIReg src2) + (if condUnsigned cond then CMPL else CMP) II32 src1 (RIReg src2) return (CondCode False cond code') condFltCode cond x y = do @@ -2391,11 +2365,11 @@ condFltCode cond x y = do -- fails when the right hand side is forced into a fixed register -- (e.g. the result of a call). -assignMem_IntCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_IntCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock +assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock -assignMem_FltCode :: MachRep -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_FltCode :: MachRep -> CmmReg -> CmmExpr -> NatM InstrBlock +assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -2441,7 +2415,7 @@ assignIntCode pk dst src -- address. assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _, CmmLit (CmmInt i _)]) - | addr == addr2, pk /= I64 || is32BitInteger i, + | addr == addr2, pk /= II64 || is32BitInteger i, Just instr <- check op = do Amode amode code_addr <- getAmode addr let code = code_addr `snocOL` @@ -2594,7 +2568,7 @@ assignMem_FltCode pk addr src = do (src__2, code2) <- getSomeReg src tmp1 <- getNewRegNat pk let - pk__2 = cmmExprRep src + pk__2 = cmmExprType src code__2 = code1 `appOL` code2 `appOL` if pk == pk__2 then unitOL (ST pk src__2 dst__2) @@ -2806,7 +2780,7 @@ genCondJump lbl (StPrim op [x, StDouble 0.0]) genCondJump lbl (StPrim op [x, y]) | fltCmpOp op = trivialFCode pr instr x y `thenNat` \ register -> - getNewRegNat F64 `thenNat` \ tmp -> + getNewRegNat FF64 `thenNat` \ tmp -> let code = registerCode register tmp result = registerName register tmp @@ -2967,8 +2941,8 @@ genCondJump id bool = do genCCall :: CmmCallTarget -- function to call - -> CmmFormals -- where to put the result - -> CmmActuals -- arguments (of mixed type) + -> HintedCmmFormals -- where to put the result + -> HintedCmmActuals -- arguments (of mixed type) -> NatM InstrBlock -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -3011,13 +2985,13 @@ genCCall fn cconv result_regs args get_arg ((iDst,fDst):dsts, offset) arg = getRegister arg `thenNat` \ register -> let - reg = if isFloatingRep pk then fDst else iDst + reg = if isFloatType pk then fDst else iDst code = registerCode register reg src = registerName register reg pk = registerRep register in return ( - if isFloatingRep pk then + if isFloatType pk then ((dsts, offset), if isFixed register then code . mkSeqInstr (FMOV src fDst) else code) @@ -3052,32 +3026,32 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL -- we keep it this long in order to prevent earlier optimisations. -- we only cope with a single result for foreign calls -genCCall (CmmPrim op) [CmmKinded r _] args = do +genCCall (CmmPrim op) [CmmHinted r _] args = do l1 <- getNewLabelNat l2 <- getNewLabelNat case op of - MO_F32_Sqrt -> actuallyInlineFloatOp F32 (GSQRT F32) args - MO_F64_Sqrt -> actuallyInlineFloatOp F64 (GSQRT F64) args - - MO_F32_Sin -> actuallyInlineFloatOp F32 (GSIN F32 l1 l2) args - MO_F64_Sin -> actuallyInlineFloatOp F64 (GSIN F64 l1 l2) args + MO_F32_Sqrt -> actuallyInlineFloatOp GSQRT FF32 args + MO_F64_Sqrt -> actuallyInlineFloatOp GSQRT FF64 args - MO_F32_Cos -> actuallyInlineFloatOp F32 (GCOS F32 l1 l2) args - MO_F64_Cos -> actuallyInlineFloatOp F64 (GCOS F64 l1 l2) args - - MO_F32_Tan -> actuallyInlineFloatOp F32 (GTAN F32 l1 l2) args - MO_F64_Tan -> actuallyInlineFloatOp F64 (GTAN F64 l1 l2) args + MO_F32_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF32 args + MO_F64_Sin -> actuallyInlineFloatOp (\s -> GSIN s l1 l2) FF64 args + + MO_F32_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF32 args + MO_F64_Cos -> actuallyInlineFloatOp (\s -> GCOS s l1 l2) FF64 args + + MO_F32_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF32 args + MO_F64_Tan -> actuallyInlineFloatOp (\s -> GTAN s l1 l2) FF64 args other_op -> outOfLineFloatOp op r args where - actuallyInlineFloatOp rep instr [CmmKinded x _] - = do res <- trivialUFCode rep instr x + actuallyInlineFloatOp instr size [CmmHinted x _] + = do res <- trivialUFCode size (instr size) x any <- anyReg res return (any (getRegisterReg (CmmLocal r))) genCCall target dest_regs args = do let - sizes = map (arg_size . cmmExprRep . kindlessCmm) (reverse args) + sizes = map (arg_size . cmmExprType . hintlessCmm) (reverse args) #if !darwin_TARGET_OS tot_arg_size = sum sizes #else @@ -3101,14 +3075,14 @@ genCCall target dest_regs args = do return (unitOL (CALL (Left fn_imm) []), conv) where fn_imm = ImmCLbl lbl CmmCallee expr conv - -> do (dyn_c, dyn_r, dyn_rep) <- get_op expr - ASSERT(dyn_rep == I32) - return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) + -> do { (dyn_c, dyn_r) <- get_op expr + ; ASSERT( isWord32 (cmmExprType expr) ) + return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) } let push_code #if darwin_TARGET_OS | arg_pad_size /= 0 - = toOL [SUB I32 (OpImm (ImmInt arg_pad_size)) (OpReg esp), + = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp), DELTA (delta0 - arg_pad_size)] `appOL` concatOL push_codes | otherwise @@ -3119,7 +3093,7 @@ genCCall target dest_regs args = do -- Deallocate parameters after call for ccall; -- but not for stdcall (callee does it) (if cconv == StdCallConv || tot_arg_size==0 then [] else - [ADD I32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)]) + [ADD II32 (OpImm (ImmInt tot_arg_size)) (OpReg esp)]) ++ [DELTA (delta + tot_arg_size)] ) @@ -3129,17 +3103,16 @@ genCCall target dest_regs args = do let -- assign the results, if necessary assign_code [] = nilOL - assign_code [CmmKinded dest _hint] = - case rep of - I64 -> toOL [MOV I32 (OpReg eax) (OpReg r_dest), - MOV I32 (OpReg edx) (OpReg r_dest_hi)] - F32 -> unitOL (GMOV fake0 r_dest) - F64 -> unitOL (GMOV fake0 r_dest) - rep -> unitOL (MOV rep (OpReg eax) (OpReg r_dest)) + assign_code [CmmHinted dest _hint] + | isFloatType ty = unitOL (GMOV fake0 r_dest) + | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest), + MOV II32 (OpReg edx) (OpReg r_dest_hi)] + | otherwise = unitOL (MOV (intSize w) (OpReg eax) (OpReg r_dest)) where + ty = localRegType dest + w = typeWidth ty r_dest_hi = getHiVRegFromLo r_dest - rep = localRegRep dest - r_dest = getRegisterReg (CmmLocal dest) + r_dest = getRegisterReg (CmmLocal dest) assign_code many = panic "genCCall.assign_code many" return (push_code `appOL` @@ -3147,20 +3120,18 @@ genCCall target dest_regs args = do assign_code dest_regs) where - arg_size F64 = 8 - arg_size F32 = 4 - arg_size I64 = 8 - arg_size _ = 4 + arg_size :: CmmType -> Int -- Width in bytes + arg_size ty = widthInBytes (typeWidth ty) roundTo a x | x `mod` a == 0 = x | otherwise = x + a - (x `mod` a) - push_arg :: (CmmKinded CmmExpr){-current argument-} + push_arg :: HintedCmmActual {-current argument-} -> NatM InstrBlock -- code - push_arg (CmmKinded arg _hint) -- we don't need the hints on x86 - | arg_rep == I64 = do + push_arg (CmmHinted arg _hint) -- we don't need the hints on x86 + | isWord64 arg_ty = do ChildCode64 code r_lo <- iselExpr64 arg delta <- getDeltaNat setDeltaNat (delta - 8) @@ -3168,42 +3139,43 @@ genCCall target dest_regs args = do r_hi = getHiVRegFromLo r_lo -- in return ( code `appOL` - toOL [PUSH I32 (OpReg r_hi), DELTA (delta - 4), - PUSH I32 (OpReg r_lo), DELTA (delta - 8), + toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4), + PUSH II32 (OpReg r_lo), DELTA (delta - 8), DELTA (delta-8)] ) | otherwise = do - (code, reg, sz) <- get_op arg + (code, reg) <- get_op arg delta <- getDeltaNat - let size = arg_size sz + let size = arg_size arg_ty -- Byte size setDeltaNat (delta-size) - if (case sz of F64 -> True; F32 -> True; _ -> False) + if (isFloatType arg_ty) then return (code `appOL` - toOL [SUB I32 (OpImm (ImmInt size)) (OpReg esp), + toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp), DELTA (delta-size), - GST sz reg (AddrBaseIndex (EABaseReg esp) + GST (floatSize (typeWidth arg_ty)) + reg (AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt 0))] ) else return (code `snocOL` - PUSH I32 (OpReg reg) `snocOL` + PUSH II32 (OpReg reg) `snocOL` DELTA (delta-size) ) where - arg_rep = cmmExprRep arg + arg_ty = cmmExprType arg ------------ - get_op :: CmmExpr -> NatM (InstrBlock, Reg, MachRep) -- code, reg, size + get_op :: CmmExpr -> NatM (InstrBlock, Reg) -- code, reg get_op op = do (reg,code) <- getSomeReg op - return (code, reg, cmmExprRep op) + return (code, reg) #endif /* i386_TARGET_ARCH */ #if i386_TARGET_ARCH || x86_64_TARGET_ARCH -outOfLineFloatOp :: CallishMachOp -> CmmFormalWithoutKind -> CmmActuals +outOfLineFloatOp :: CallishMachOp -> CmmFormal -> HintedCmmActuals -> NatM InstrBlock outOfLineFloatOp mop res args = do @@ -3211,15 +3183,15 @@ outOfLineFloatOp mop res args targetExpr <- cmmMakeDynamicReference dflags addImportNat CallReference lbl let target = CmmCallee targetExpr CCallConv - if localRegRep res == F64 + if isFloat64 (localRegType res) then - stmtToInstrs (CmmCall target [CmmKinded res FloatHint] args CmmUnsafe CmmMayReturn) + stmtToInstrs (CmmCall target [CmmHinted res NoHint] args CmmUnsafe CmmMayReturn) else do uq <- getUniqueNat let - tmp = LocalReg uq F64 GCKindNonPtr + tmp = LocalReg uq f64 -- in - code1 <- stmtToInstrs (CmmCall target [CmmKinded tmp FloatHint] args CmmUnsafe CmmMayReturn) + code1 <- stmtToInstrs (CmmCall target [CmmHinted tmp NoHint] args CmmUnsafe CmmMayReturn) code2 <- stmtToInstrs (CmmAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) return (code1 `appOL` code2) where @@ -3269,7 +3241,7 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL -- we keep it this long in order to prevent earlier optimisations. -genCCall (CmmPrim op) [CmmKinded r _] args = +genCCall (CmmPrim op) [CmmHinted r _] args = outOfLineFloatOp op r args genCCall target dest_regs args = do @@ -3303,7 +3275,7 @@ genCCall target dest_regs args = do delta <- getDeltaNat setDeltaNat (delta-8) return (tot_arg_size+8, toOL [ - SUB I64 (OpImm (ImmInt 8)) (OpReg rsp), + SUB II64 (OpImm (ImmInt 8)) (OpReg rsp), DELTA (delta-8) ]) @@ -3332,14 +3304,14 @@ genCCall target dest_regs args = do -- It's not safe to omit this assignment, even if the number -- of SSE regs in use is zero. If %al is larger than 8 -- on entry to a varargs function, seg faults ensue. - assign_eax n = unitOL (MOV I32 (OpImm (ImmInt n)) (OpReg eax)) + assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax)) let call = callinsns `appOL` toOL ( -- Deallocate parameters after call for ccall; -- but not for stdcall (callee does it) (if cconv == StdCallConv || real_size==0 then [] else - [ADD wordRep (OpImm (ImmInt real_size)) (OpReg esp)]) + [ADD (intSize wordWidth) (OpImm (ImmInt real_size)) (OpReg esp)]) ++ [DELTA (delta + real_size)] ) @@ -3349,13 +3321,13 @@ genCCall target dest_regs args = do let -- assign the results, if necessary assign_code [] = nilOL - assign_code [CmmKinded dest _hint] = - case rep of - F32 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest)) - F64 -> unitOL (MOV rep (OpReg xmm0) (OpReg r_dest)) - rep -> unitOL (MOV rep (OpReg rax) (OpReg r_dest)) + assign_code [CmmHinted dest _hint] = + case typeWidth rep of + W32 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest)) + W64 | isFloatType rep -> unitOL (MOV (floatSize W32) (OpReg xmm0) (OpReg r_dest)) + _ -> unitOL (MOV (cmmTypeSize rep) (OpReg rax) (OpReg r_dest)) where - rep = localRegRep dest + rep = localRegType dest r_dest = getRegisterReg (CmmLocal dest) assign_code many = panic "genCCall.assign_code many" @@ -3369,17 +3341,17 @@ genCCall target dest_regs args = do where arg_size = 8 -- always, at the mo - load_args :: [CmmKinded CmmExpr] + load_args :: [CmmHinted CmmExpr] -> [Reg] -- int regs avail for args -> [Reg] -- FP regs avail for args -> InstrBlock - -> NatM ([CmmKinded CmmExpr],[Reg],[Reg],InstrBlock) + -> NatM ([CmmHinted CmmExpr],[Reg],[Reg],InstrBlock) load_args args [] [] code = return (args, [], [], code) -- no more regs to use load_args [] aregs fregs code = return ([], aregs, fregs, code) -- no more args to push - load_args ((CmmKinded arg hint) : rest) aregs fregs code - | isFloatingRep arg_rep = + load_args ((CmmHinted arg hint) : rest) aregs fregs code + | isFloatType arg_rep = case fregs of [] -> push_this_arg (r:rs) -> do @@ -3392,37 +3364,38 @@ genCCall target dest_regs args = do arg_code <- getAnyReg arg load_args rest rs fregs (code `appOL` arg_code r) where - arg_rep = cmmExprRep arg + arg_rep = cmmExprType arg push_this_arg = do (args',ars,frs,code') <- load_args rest aregs fregs code - return ((CmmKinded arg hint):args', ars, frs, code') + return ((CmmHinted arg hint):args', ars, frs, code') push_args [] code = return code - push_args ((CmmKinded arg hint):rest) code - | isFloatingRep arg_rep = do + push_args ((CmmHinted arg hint):rest) code + | isFloatType arg_rep = do (arg_reg, arg_code) <- getSomeReg arg delta <- getDeltaNat setDeltaNat (delta-arg_size) let code' = code `appOL` arg_code `appOL` toOL [ - SUB wordRep (OpImm (ImmInt arg_size)) (OpReg rsp) , + SUB (intSize wordWidth) (OpImm (ImmInt arg_size)) (OpReg rsp) , DELTA (delta-arg_size), - MOV arg_rep (OpReg arg_reg) (OpAddr (spRel 0))] + MOV (floatSize width) (OpReg arg_reg) (OpAddr (spRel 0))] push_args rest code' | otherwise = do -- we only ever generate word-sized function arguments. Promotion -- has already happened: our Int8# type is kept sign-extended -- in an Int#, for example. - ASSERT(arg_rep == I64) return () + ASSERT(width == W64) return () (arg_op, arg_code) <- getOperand arg delta <- getDeltaNat setDeltaNat (delta-arg_size) - let code' = code `appOL` toOL [PUSH I64 arg_op, + let code' = code `appOL` toOL [PUSH II64 arg_op, DELTA (delta-arg_size)] push_args rest code' where - arg_rep = cmmExprRep arg + arg_rep = cmmExprType arg + width = typeWidth arg_rep #endif -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -3460,7 +3433,7 @@ genCCall target dest_regs args = do genCCall target dest_regs argsAndHints = do let - args = map kindlessCmm argsAndHints + args = map hintlessCmm argsAndHints argcode_and_vregs <- mapM arg_to_int_vregs args let (argcodes, vregss) = unzip argcode_and_vregs @@ -3510,7 +3483,7 @@ genCCall target dest_regs argsAndHints = do = [] move_final (v:vs) [] offset -- out of aregs; move to stack - = ST I32 v (spRel offset) + = ST II32 v (spRel offset) : move_final vs [] (offset+1) move_final (v:vs) (a:az) offset -- move into an arg (%o[0..5]) reg @@ -3521,7 +3494,7 @@ genCCall target dest_regs argsAndHints = do -- or two integer vregs. arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg]) arg_to_int_vregs arg - | (cmmExprRep arg) == I64 + | isWord64 (cmmExprType arg) = do (ChildCode64 code r_lo) <- iselExpr64 arg let @@ -3530,34 +3503,34 @@ genCCall target dest_regs argsAndHints = do | otherwise = do (src, code) <- getSomeReg arg - tmp <- getNewRegNat (cmmExprRep arg) + tmp <- getNewRegNat (cmmExprType arg) let - pk = cmmExprRep arg + pk = cmmExprType arg case pk of - F64 -> do - v1 <- getNewRegNat I32 - v2 <- getNewRegNat I32 + FF64 -> do + v1 <- getNewRegNat II32 + v2 <- getNewRegNat II32 return ( code `snocOL` - FMOV F64 src f0 `snocOL` - ST F32 f0 (spRel 16) `snocOL` - LD I32 (spRel 16) v1 `snocOL` - ST F32 (fPair f0) (spRel 16) `snocOL` - LD I32 (spRel 16) v2 + FMOV FF64 src f0 `snocOL` + ST FF32 f0 (spRel 16) `snocOL` + LD II32 (spRel 16) v1 `snocOL` + ST FF32 (fPair f0) (spRel 16) `snocOL` + LD II32 (spRel 16) v2 , [v1,v2] ) - F32 -> do - v1 <- getNewRegNat I32 + FF32 -> do + v1 <- getNewRegNat II32 return ( code `snocOL` - ST F32 src (spRel 16) `snocOL` - LD I32 (spRel 16) v1 + ST FF32 src (spRel 16) `snocOL` + LD II32 (spRel 16) v1 , [v1] ) other -> do - v1 <- getNewRegNat I32 + v1 <- getNewRegNat II32 return ( code `snocOL` OR False g0 (RIReg src) v1 , @@ -3637,10 +3610,10 @@ outOfLineFloatOp mop = parameter is passed in an FPR. * SysV insists on either passing I64 arguments on the stack, or in two GPRs, starting with an odd-numbered GPR. It may skip a GPR to achieve this. - Darwin just treats an I64 like two separate I32s (high word first). - * I64 and F64 arguments are 8-byte aligned on the stack for SysV, but only + Darwin just treats an I64 like two separate II32s (high word first). + * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only 4-byte aligned like everything else on Darwin. - * The SysV spec claims that F32 is represented as F64 on the stack. GCC on + * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on PowerPC Linux does not agree, so neither do we. According to both conventions, The parameter area should be part of the @@ -3656,7 +3629,7 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return $ unitOL LWSYNC genCCall target dest_regs argsAndHints - = ASSERT (not $ any (`elem` [I8,I16]) argReps) + = ASSERT (not $ any (`elem` [II8,II16]) $ map cmmTypeSize argReps) -- we rely on argument promotion in the codeGen do (finalStack,passArgumentsCode,usedRegs) <- passArguments @@ -3665,13 +3638,13 @@ genCCall target dest_regs argsAndHints initialStackOffset (toOL []) [] - (labelOrExpr, reduceToF32) <- case target of + (labelOrExpr, reduceToFF32) <- case target of CmmCallee (CmmLit (CmmLabel lbl)) conv -> return (Left lbl, False) CmmCallee expr conv -> return (Right expr, False) CmmPrim mop -> outOfLineFloatOp mop let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode - codeAfter = move_sp_up finalStack `appOL` moveResult reduceToF32 + codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32 case labelOrExpr of Left lbl -> do @@ -3690,20 +3663,20 @@ genCCall target dest_regs argsAndHints initialStackOffset = 24 -- size of linkage area + size of arguments, in bytes stackDelta _finalStack = roundTo 16 $ (24 +) $ max 32 $ sum $ - map machRepByteWidth argReps + map (widthInBytes . typeWidth) argReps #elif linux_TARGET_OS initialStackOffset = 8 stackDelta finalStack = roundTo 16 finalStack #endif - args = map kindlessCmm argsAndHints - argReps = map cmmExprRep args + args = map hintlessCmm argsAndHints + argReps = map cmmExprType args roundTo a x | x `mod` a == 0 = x | otherwise = x + a - (x `mod` a) move_sp_down finalStack | delta > 64 = - toOL [STU I32 sp (AddrRegImm sp (ImmInt (-delta))), + toOL [STU II32 sp (AddrRegImm sp (ImmInt (-delta))), DELTA (-delta)] | otherwise = nilOL where delta = stackDelta finalStack @@ -3716,8 +3689,8 @@ genCCall target dest_regs argsAndHints passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) - passArguments ((arg,I64):args) gprs fprs stackOffset - accumCode accumUsed = + passArguments ((arg,arg_ty):args) gprs fprs stackOffset + accumCode accumUsed | isWord64 arg_ty = do ChildCode64 code vr_lo <- iselExpr64 arg let vr_hi = getHiVRegFromLo vr_lo @@ -3733,13 +3706,13 @@ genCCall target dest_regs argsAndHints ((take 2 gprs) ++ accumUsed) where storeWord vr (gpr:_) offset = MR gpr vr - storeWord vr [] offset = ST I32 vr (AddrRegImm sp (ImmInt offset)) + storeWord vr [] offset = ST II32 vr (AddrRegImm sp (ImmInt offset)) #elif linux_TARGET_OS let stackOffset' = roundTo 8 stackOffset stackCode = accumCode `appOL` code - `snocOL` ST I32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) - `snocOL` ST I32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) + `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) + `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) regCode hireg loreg = accumCode `appOL` code `snocOL` MR hireg vr_hi @@ -3781,7 +3754,7 @@ genCCall target dest_regs argsAndHints (drop nGprs gprs) (drop nFprs fprs) (stackOffset' + stackBytes) - (accumCode `appOL` code `snocOL` ST rep vr stackSlot) + (accumCode `appOL` code `snocOL` ST (cmmTypeSize rep) vr stackSlot) accumUsed where #if darwin_TARGET_OS @@ -3790,33 +3763,34 @@ genCCall target dest_regs argsAndHints stackOffset' = stackOffset #else -- ... the SysV ABI requires 8-byte alignment for doubles. - stackOffset' | rep == F64 = roundTo 8 stackOffset + stackOffset' | isFloatType rep && typeWidth rep == W64 = + roundTo 8 stackOffset | otherwise = stackOffset #endif stackSlot = AddrRegImm sp (ImmInt stackOffset') - (nGprs, nFprs, stackBytes, regs) = case rep of - I32 -> (1, 0, 4, gprs) + (nGprs, nFprs, stackBytes, regs) = case cmmTypeSize rep of + II32 -> (1, 0, 4, gprs) #if darwin_TARGET_OS -- The Darwin ABI requires that we skip a corresponding number of GPRs when -- we use the FPRs. - F32 -> (1, 1, 4, fprs) - F64 -> (2, 1, 8, fprs) + FF32 -> (1, 1, 4, fprs) + FF64 -> (2, 1, 8, fprs) #elif linux_TARGET_OS -- ... the SysV ABI doesn't. - F32 -> (0, 1, 4, fprs) - F64 -> (0, 1, 8, fprs) + FF32 -> (0, 1, 4, fprs) + FF64 -> (0, 1, 8, fprs) #endif - moveResult reduceToF32 = + moveResult reduceToFF32 = case dest_regs of [] -> nilOL - [CmmKinded dest _hint] - | reduceToF32 && rep == F32 -> unitOL (FRSP r_dest f1) - | rep == F32 || rep == F64 -> unitOL (MR r_dest f1) - | rep == I64 -> toOL [MR (getHiVRegFromLo r_dest) r3, + [CmmHinted dest _hint] + | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1) + | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1) + | isWord64 rep -> toOL [MR (getHiVRegFromLo r_dest) r3, MR r_dest r4] | otherwise -> unitOL (MR r_dest r3) - where rep = cmmRegRep (CmmLocal dest) + where rep = cmmRegType (CmmLocal dest) r_dest = getRegisterReg (CmmLocal dest) outOfLineFloatOp mop = @@ -3889,7 +3863,7 @@ genSwitch expr ids jumpTable = map jumpTableEntryRel ids jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 wordRep) + = CmmStaticLit (CmmInt 0 wordWidth) jumpTableEntryRel (Just (BlockId id)) = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) where blockLabel = mkAsmTempLabel id @@ -3906,7 +3880,7 @@ genSwitch expr ids -- if L0 is not preceded by a non-anonymous label in its section. code = e_code `appOL` t_code `appOL` toOL [ - ADD wordRep op (OpReg tableReg), + ADD (intSize wordWidth) op (OpReg tableReg), JMP_TBL (OpReg tableReg) [ id | Just id <- ids ], LDATA Text (CmmDataLabel lbl : jumpTable) ] @@ -3919,18 +3893,18 @@ genSwitch expr ids -- binutils 2.17 is standard. code = e_code `appOL` t_code `appOL` toOL [ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - MOVSxL I32 + MOVSxL II32 (OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg wORD_SIZE) (ImmInt 0))) (OpReg reg), - ADD wordRep (OpReg reg) (OpReg tableReg), + ADD (intSize wordWidth) (OpReg reg) (OpReg tableReg), JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] ] #endif #else code = e_code `appOL` t_code `appOL` toOL [ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), - ADD wordRep op (OpReg tableReg), + ADD (intSize wordWidth) op (OpReg tableReg), JMP_TBL (OpReg tableReg) [ id | Just id <- ids ] ] #endif @@ -3953,7 +3927,7 @@ genSwitch expr ids | opt_PIC = do (reg,e_code) <- getSomeReg expr - tmp <- getNewRegNat I32 + tmp <- getNewRegNat II32 lbl <- getNewLabelNat dflags <- getDynFlagsNat dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl @@ -3962,7 +3936,7 @@ genSwitch expr ids jumpTable = map jumpTableEntryRel ids jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 wordRep) + = CmmStaticLit (CmmInt 0 wordWidth) jumpTableEntryRel (Just (BlockId id)) = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) where blockLabel = mkAsmTempLabel id @@ -3970,7 +3944,7 @@ genSwitch expr ids code = e_code `appOL` t_code `appOL` toOL [ LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), SLW tmp reg (RIImm (ImmInt 2)), - LD I32 tmp (AddrRegReg tableReg tmp), + LD II32 tmp (AddrRegReg tableReg tmp), ADD tmp tmp (RIReg tableReg), MTCTR tmp, BCTR [ id | Just id <- ids ] @@ -3979,7 +3953,7 @@ genSwitch expr ids | otherwise = do (reg,e_code) <- getSomeReg expr - tmp <- getNewRegNat I32 + tmp <- getNewRegNat II32 lbl <- getNewLabelNat let jumpTable = map jumpTableEntry ids @@ -3988,7 +3962,7 @@ genSwitch expr ids LDATA ReadOnlyData (CmmDataLabel lbl : jumpTable), SLW tmp reg (RIImm (ImmInt 2)), ADDIS tmp tmp (HA (ImmCLbl lbl)), - LD I32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), + LD II32 tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), MTCTR tmp, BCTR [ id | Just id <- ids ] ] @@ -3997,7 +3971,7 @@ genSwitch expr ids #error "ToDo: genSwitch" #endif -jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordRep) +jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth) jumpTableEntry (Just (BlockId id)) = CmmStaticLit (CmmLabel blockLabel) where blockLabel = mkAsmTempLabel id @@ -4030,14 +4004,14 @@ condFltReg = panic "MachCode.condFltReg (not on Alpha)" condIntReg cond x y = do CondCode _ cond cond_code <- condIntCode cond x y - tmp <- getNewRegNat I8 + tmp <- getNewRegNat II8 let code dst = cond_code `appOL` toOL [ SETCC cond (OpReg tmp), - MOVZxL I8 (OpReg tmp) (OpReg dst) + MOVZxL II8 (OpReg tmp) (OpReg dst) ] -- in - return (Any I32 code) + return (Any II32 code) #endif @@ -4045,14 +4019,14 @@ condIntReg cond x y = do condFltReg cond x y = do CondCode _ cond cond_code <- condFltCode cond x y - tmp <- getNewRegNat I8 + tmp <- getNewRegNat II8 let code dst = cond_code `appOL` toOL [ SETCC cond (OpReg tmp), - MOVZxL I8 (OpReg tmp) (OpReg dst) + MOVZxL II8 (OpReg tmp) (OpReg dst) ] -- in - return (Any I32 code) + return (Any II32 code) #endif @@ -4060,8 +4034,8 @@ condFltReg cond x y = do condFltReg cond x y = do CondCode _ cond cond_code <- condFltCode cond x y - tmp1 <- getNewRegNat wordRep - tmp2 <- getNewRegNat wordRep + tmp1 <- getNewRegNat wordSize + tmp2 <- getNewRegNat wordSize let -- We have to worry about unordered operands (eg. comparisons -- against NaN). If the operands are unordered, the comparison @@ -4087,22 +4061,22 @@ condFltReg cond x y = do plain_test dst = toOL [ SETCC cond (OpReg tmp1), - MOVZxL I8 (OpReg tmp1) (OpReg dst) + MOVZxL II8 (OpReg tmp1) (OpReg dst) ] or_unordered dst = toOL [ SETCC cond (OpReg tmp1), SETCC PARITY (OpReg tmp2), - OR I8 (OpReg tmp1) (OpReg tmp2), - MOVZxL I8 (OpReg tmp2) (OpReg dst) + OR II8 (OpReg tmp1) (OpReg tmp2), + MOVZxL II8 (OpReg tmp2) (OpReg dst) ] and_ordered dst = toOL [ SETCC cond (OpReg tmp1), SETCC NOTPARITY (OpReg tmp2), - AND I8 (OpReg tmp1) (OpReg tmp2), - MOVZxL I8 (OpReg tmp2) (OpReg dst) + AND II8 (OpReg tmp1) (OpReg tmp2), + MOVZxL II8 (OpReg tmp2) (OpReg dst) ] -- in - return (Any I32 code) + return (Any II32 code) #endif @@ -4112,45 +4086,45 @@ condFltReg cond x y = do condIntReg EQQ x (CmmLit (CmmInt 0 d)) = do (src, code) <- getSomeReg x - tmp <- getNewRegNat I32 + tmp <- getNewRegNat II32 let code__2 dst = code `appOL` toOL [ SUB False True g0 (RIReg src) g0, SUB True False g0 (RIImm (ImmInt (-1))) dst] - return (Any I32 code__2) + return (Any II32 code__2) condIntReg EQQ x y = do (src1, code1) <- getSomeReg x (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat I32 - tmp2 <- getNewRegNat I32 + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 let code__2 dst = code1 `appOL` code2 `appOL` toOL [ XOR False src1 (RIReg src2) dst, SUB False True g0 (RIReg dst) g0, SUB True False g0 (RIImm (ImmInt (-1))) dst] - return (Any I32 code__2) + return (Any II32 code__2) condIntReg NE x (CmmLit (CmmInt 0 d)) = do (src, code) <- getSomeReg x - tmp <- getNewRegNat I32 + tmp <- getNewRegNat II32 let code__2 dst = code `appOL` toOL [ SUB False True g0 (RIReg src) g0, ADD True False g0 (RIImm (ImmInt 0)) dst] - return (Any I32 code__2) + return (Any II32 code__2) condIntReg NE x y = do (src1, code1) <- getSomeReg x (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat I32 - tmp2 <- getNewRegNat I32 + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 let code__2 dst = code1 `appOL` code2 `appOL` toOL [ XOR False src1 (RIReg src2) dst, SUB False True g0 (RIReg dst) g0, ADD True False g0 (RIImm (ImmInt 0)) dst] - return (Any I32 code__2) + return (Any II32 code__2) condIntReg cond x y = do BlockId lbl1 <- getBlockIdNat @@ -4164,7 +4138,7 @@ condIntReg cond x y = do NEWBLOCK (BlockId lbl1), OR False g0 (RIImm (ImmInt 1)) dst, NEWBLOCK (BlockId lbl2)] - return (Any I32 code__2) + return (Any II32 code__2) condFltReg cond x y = do BlockId lbl1 <- getBlockIdNat @@ -4179,7 +4153,7 @@ condFltReg cond x y = do NEWBLOCK (BlockId lbl1), OR False g0 (RIImm (ImmInt 1)) dst, NEWBLOCK (BlockId lbl2)] - return (Any I32 code__2) + return (Any II32 code__2) #endif /* sparc_TARGET_ARCH */ @@ -4222,7 +4196,7 @@ condReg getCond = do GEU -> (0, True) GU -> (1, False) - return (Any I32 code) + return (Any II32 code) condIntReg cond x y = condReg (condIntCode cond x y) condFltReg cond x y = condReg (condFltCode cond x y) @@ -4242,7 +4216,7 @@ condFltReg cond x y = condReg (condFltCode cond x y) -- have handled the constant-folding. trivialCode - :: MachRep + :: Width -- Int only -> IF_ARCH_alpha((Reg -> RI -> Reg -> Instr) ,IF_ARCH_i386 ((Operand -> Operand -> Instr) -> Maybe (Operand -> Operand -> Instr) @@ -4256,18 +4230,18 @@ trivialCode #ifndef powerpc_TARGET_ARCH trivialFCode - :: MachRep + :: Width -- Floating point only -> IF_ARCH_alpha((Reg -> Reg -> Reg -> Instr) - ,IF_ARCH_sparc((MachRep -> Reg -> Reg -> Reg -> Instr) - ,IF_ARCH_i386 ((MachRep -> Reg -> Reg -> Reg -> Instr) - ,IF_ARCH_x86_64 ((MachRep -> Operand -> Operand -> Instr) + ,IF_ARCH_sparc((Size -> Reg -> Reg -> Reg -> Instr) + ,IF_ARCH_i386 ((Size -> Reg -> Reg -> Reg -> Instr) + ,IF_ARCH_x86_64 ((Size -> Operand -> Operand -> Instr) ,)))) -> CmmExpr -> CmmExpr -- the two arguments -> NatM Register #endif trivialUCode - :: MachRep + :: Size -> IF_ARCH_alpha((RI -> Reg -> Instr) ,IF_ARCH_i386 ((Operand -> Instr) ,IF_ARCH_x86_64 ((Operand -> Instr) @@ -4279,7 +4253,7 @@ trivialUCode #ifndef powerpc_TARGET_ARCH trivialUFCode - :: MachRep + :: Size -> IF_ARCH_alpha((Reg -> Reg -> Instr) ,IF_ARCH_i386 ((Reg -> Reg -> Instr) ,IF_ARCH_x86_64 ((Reg -> Reg -> Instr) @@ -4335,8 +4309,8 @@ trivialUCode instr x trivialFCode _ instr x y = getRegister x `thenNat` \ register1 -> getRegister y `thenNat` \ register2 -> - getNewRegNat F64 `thenNat` \ tmp1 -> - getNewRegNat F64 `thenNat` \ tmp2 -> + getNewRegNat FF64 `thenNat` \ tmp1 -> + getNewRegNat FF64 `thenNat` \ tmp2 -> let code1 = registerCode register1 tmp1 src1 = registerName register1 tmp1 @@ -4347,17 +4321,17 @@ trivialFCode _ instr x y code__2 dst = asmSeqThen [code1 [], code2 []] . mkSeqInstr (instr src1 src2 dst) in - return (Any F64 code__2) + return (Any FF64 code__2) trivialUFCode _ instr x = getRegister x `thenNat` \ register -> - getNewRegNat F64 `thenNat` \ tmp -> + getNewRegNat FF64 `thenNat` \ tmp -> let code = registerCode register tmp src = registerName register tmp code__2 dst = code . mkSeqInstr (instr src dst) in - return (Any F64 code__2) + return (Any FF64 code__2) #endif /* alpha_TARGET_ARCH */ @@ -4410,7 +4384,7 @@ SDM's version of The Rules: register happens to be the destination register. -} -trivialCode rep instr (Just revinstr) (CmmLit lit_a) b +trivialCode width instr (Just revinstr) (CmmLit lit_a) b | is32BitLit lit_a = do b_code <- getAnyReg b let @@ -4418,9 +4392,10 @@ trivialCode rep instr (Just revinstr) (CmmLit lit_a) b = b_code dst `snocOL` revinstr (OpImm (litToImm lit_a)) (OpReg dst) -- in - return (Any rep code) + return (Any (intSize width) code) -trivialCode rep instr maybe_revinstr a b = genTrivialCode rep instr a b +trivialCode width instr maybe_revinstr a b + = genTrivialCode (intSize width) instr a b -- This is re-used for floating pt instructions too. genTrivialCode rep instr a b = do @@ -4459,42 +4434,41 @@ trivialUCode rep instr x = do code dst = x_code dst `snocOL` instr (OpReg dst) - -- in return (Any rep code) ----------- #if i386_TARGET_ARCH -trivialFCode pk instr x y = do +trivialFCode width instr x y = do (x_reg, x_code) <- getNonClobberedReg x -- these work for float regs too (y_reg, y_code) <- getSomeReg y let + size = floatSize width code dst = x_code `appOL` y_code `snocOL` - instr pk x_reg y_reg dst - -- in - return (Any pk code) + instr size x_reg y_reg dst + return (Any size code) #endif #if x86_64_TARGET_ARCH - -trivialFCode pk instr x y = genTrivialCode pk (instr pk) x y - +trivialFCode pk instr x y + = genTrivialCode size (instr size) x y + where size = floatSize pk #endif ------------- -trivialUFCode rep instr x = do +trivialUFCode size instr x = do (x_reg, x_code) <- getSomeReg x let code dst = x_code `snocOL` instr x_reg dst -- in - return (Any rep code) + return (Any size code) #endif /* i386_TARGET_ARCH */ @@ -4506,54 +4480,54 @@ trivialCode pk instr x (CmmLit (CmmInt y d)) | fits13Bits y = do (src1, code) <- getSomeReg x - tmp <- getNewRegNat I32 + tmp <- getNewRegNat II32 let src2 = ImmInt (fromInteger y) code__2 dst = code `snocOL` instr src1 (RIImm src2) dst - return (Any I32 code__2) + return (Any II32 code__2) trivialCode pk instr x y = do (src1, code1) <- getSomeReg x (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat I32 - tmp2 <- getNewRegNat I32 + tmp1 <- getNewRegNat II32 + tmp2 <- getNewRegNat II32 let code__2 dst = code1 `appOL` code2 `snocOL` instr src1 (RIReg src2) dst - return (Any I32 code__2) + return (Any II32 code__2) ------------ trivialFCode pk instr x y = do (src1, code1) <- getSomeReg x (src2, code2) <- getSomeReg y - tmp1 <- getNewRegNat (cmmExprRep x) - tmp2 <- getNewRegNat (cmmExprRep y) - tmp <- getNewRegNat F64 + tmp1 <- getNewRegNat (cmmExprType x) + tmp2 <- getNewRegNat (cmmExprType y) + tmp <- getNewRegNat FF64 let - promote x = FxTOy F32 F64 x tmp + promote x = FxTOy FF32 FF64 x tmp - pk1 = cmmExprRep x - pk2 = cmmExprRep y + pk1 = cmmExprType x + pk2 = cmmExprType y code__2 dst = if pk1 == pk2 then code1 `appOL` code2 `snocOL` - instr pk src1 src2 dst - else if pk1 == F32 then + instr (floatSize pk) src1 src2 dst + else if typeWidth pk1 == W32 then code1 `snocOL` promote src1 `appOL` code2 `snocOL` - instr F64 tmp src2 dst + instr FF64 tmp src2 dst else code1 `appOL` code2 `snocOL` promote src2 `snocOL` - instr F64 src1 tmp dst - return (Any (if pk1 == pk2 then pk1 else F64) code__2) + instr FF64 src1 tmp dst + return (Any (if pk1 == pk2 then pk1 else cmmFloat W64) code__2) ------------ -trivialUCode pk instr x = do +trivialUCode size instr x = do (src, code) <- getSomeReg x - tmp <- getNewRegNat pk + tmp <- getNewRegNat size let code__2 dst = code `snocOL` instr (RIReg src) dst - return (Any pk code__2) + return (Any size code__2) ------------- trivialUFCode pk instr x = do @@ -4599,21 +4573,25 @@ trivialCode rep signed instr x (CmmLit (CmmInt y _)) = do (src1, code1) <- getSomeReg x let code dst = code1 `snocOL` instr dst src1 (RIImm imm) - return (Any rep code) + return (Any (intSize rep) code) trivialCode rep signed instr x y = do (src1, code1) <- getSomeReg x (src2, code2) <- getSomeReg y let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) - return (Any rep code) + return (Any (intSize rep) code) -trivialCodeNoImm :: MachRep -> (Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCodeNoImm rep instr x y = do +trivialCodeNoImm' :: Size -> (Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCodeNoImm' size instr x y = do (src1, code1) <- getSomeReg x (src2, code2) <- getSomeReg y let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2 - return (Any rep code) + return (Any size code) + +trivialCodeNoImm :: Size -> (Size -> Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCodeNoImm size instr x y = trivialCodeNoImm' size (instr size) x y trivialUCode rep instr x = do (src, code) <- getSomeReg x @@ -4624,7 +4602,7 @@ trivialUCode rep instr x = do -- it the hard way. -- The "div" parameter is the division instruction to use (DIVW or DIVWU) -remainderCode :: MachRep -> (Reg -> Reg -> Reg -> Instr) +remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr) -> CmmExpr -> CmmExpr -> NatM Register remainderCode rep div x y = do (src1, code1) <- getSomeReg x @@ -4634,7 +4612,7 @@ remainderCode rep div x y = do MULLW dst dst (RIReg src2), SUBF dst dst src1 ] - return (Any rep code) + return (Any (intSize rep) code) #endif /* powerpc_TARGET_ARCH */ @@ -4653,8 +4631,8 @@ remainderCode rep div x y = do -- kinds, so the value has to be computed into one kind before being -- explicitly "converted" to live in the other kind. -coerceInt2FP :: MachRep -> MachRep -> CmmExpr -> NatM Register -coerceFP2Int :: MachRep -> MachRep -> CmmExpr -> NatM Register +coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register +coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register #if sparc_TARGET_ARCH coerceDbl2Flt :: CmmExpr -> NatM Register @@ -4677,12 +4655,12 @@ coerceInt2FP _ x LD TF dst (spRel 0), CVTxy Q TF dst dst] in - return (Any F64 code__2) + return (Any FF64 code__2) ------------- coerceFP2Int x = getRegister x `thenNat` \ register -> - getNewRegNat F64 `thenNat` \ tmp -> + getNewRegNat FF64 `thenNat` \ tmp -> let code = registerCode register tmp src = registerName register tmp @@ -4703,22 +4681,21 @@ coerceFP2Int x coerceInt2FP from to x = do (x_reg, x_code) <- getSomeReg x let - opc = case to of F32 -> GITOF; F64 -> GITOD + opc = case to of W32 -> GITOF; W64 -> GITOD code dst = x_code `snocOL` opc x_reg dst - -- ToDo: works for non-I32 reps? - -- in - return (Any to code) + -- ToDo: works for non-II32 reps? + return (Any (floatSize to) code) ------------ coerceFP2Int from to x = do (x_reg, x_code) <- getSomeReg x let - opc = case from of F32 -> GFTOI; F64 -> GDTOI + opc = case from of W32 -> GFTOI; W64 -> GDTOI code dst = x_code `snocOL` opc x_reg dst - -- ToDo: works for non-I32 reps? + -- ToDo: works for non-II32 reps? -- in - return (Any to code) + return (Any (intSize to) code) #endif /* i386_TARGET_ARCH */ @@ -4729,28 +4706,27 @@ coerceFP2Int from to x = do coerceFP2Int from to x = do (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand let - opc = case from of F32 -> CVTTSS2SIQ; F64 -> CVTTSD2SIQ + opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ code dst = x_code `snocOL` opc x_op dst -- in - return (Any to code) -- works even if the destination rep is CVTSI2SS; F64 -> CVTSI2SD + opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD code dst = x_code `snocOL` opc x_op dst -- in - return (Any to code) -- works even if the destination rep is CmmExpr -> NatM Register +coerceFP2FP :: Width -> CmmExpr -> NatM Register coerceFP2FP to x = do (x_reg, x_code) <- getSomeReg x let - opc = case to of F32 -> CVTSD2SS; F64 -> CVTSS2SD + opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD code dst = x_code `snocOL` opc x_reg dst -- in - return (Any to code) - + return (Any (floatSize to) code) #endif -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -4772,7 +4748,7 @@ coerceFP2Int pk fprep x = do reg <- getNewRegNat fprep tmp <- getNewRegNat pk let - code__2 dst = ASSERT(fprep == F64 || fprep == F32) + code__2 dst = ASSERT(fprep == FF64 || fprep == FF32) code `appOL` toOL [ FxTOy fprep pk src tmp, ST pk tmp (spRel (-2)), @@ -4782,12 +4758,12 @@ coerceFP2Int pk fprep x = do ------------ coerceDbl2Flt x = do (src, code) <- getSomeReg x - return (Any F32 (\dst -> code `snocOL` FxTOy F64 F32 src dst)) + return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) ------------ coerceFlt2Dbl x = do (src, code) <- getSomeReg x - return (Any F64 (\dst -> code `snocOL` FxTOy F32 F64 src dst)) + return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst)) #endif /* sparc_TARGET_ARCH */ @@ -4795,8 +4771,8 @@ coerceFlt2Dbl x = do coerceInt2FP fromRep toRep x = do (src, code) <- getSomeReg x lbl <- getNewLabelNat - itmp <- getNewRegNat I32 - ftmp <- getNewRegNat F64 + itmp <- getNewRegNat II32 + ftmp <- getNewRegNat FF64 dflags <- getDynFlagsNat dynRef <- cmmMakeDynamicReference dflags addImportNat DataReference lbl Amode addr addr_code <- getAmode dynRef @@ -4804,40 +4780,40 @@ coerceInt2FP fromRep toRep x = do code' dst = code `appOL` maybe_exts `appOL` toOL [ LDATA ReadOnlyData [CmmDataLabel lbl, - CmmStaticLit (CmmInt 0x43300000 I32), - CmmStaticLit (CmmInt 0x80000000 I32)], + CmmStaticLit (CmmInt 0x43300000 W32), + CmmStaticLit (CmmInt 0x80000000 W32)], XORIS itmp src (ImmInt 0x8000), - ST I32 itmp (spRel 3), + ST II32 itmp (spRel 3), LIS itmp (ImmInt 0x4330), - ST I32 itmp (spRel 2), - LD F64 ftmp (spRel 2) + ST II32 itmp (spRel 2), + LD FF64 ftmp (spRel 2) ] `appOL` addr_code `appOL` toOL [ - LD F64 dst addr, - FSUB F64 dst ftmp dst + LD FF64 dst addr, + FSUB FF64 dst ftmp dst ] `appOL` maybe_frsp dst maybe_exts = case fromRep of - I8 -> unitOL $ EXTS I8 src src - I16 -> unitOL $ EXTS I16 src src - I32 -> nilOL + W8 -> unitOL $ EXTS II8 src src + W16 -> unitOL $ EXTS II16 src src + W32 -> nilOL maybe_frsp dst = case toRep of - F32 -> unitOL $ FRSP dst dst - F64 -> nilOL - return (Any toRep code') + W32 -> unitOL $ FRSP dst dst + W64 -> nilOL + return (Any (floatSize toRep) code') coerceFP2Int fromRep toRep x = do - -- the reps don't really matter: F*->F64 and I32->I* are no-ops + -- the reps don't really matter: F*->FF64 and II32->I* are no-ops (src, code) <- getSomeReg x - tmp <- getNewRegNat F64 + tmp <- getNewRegNat FF64 let code' dst = code `appOL` toOL [ -- convert to int in FP reg FCTIWZ tmp src, -- store value (64bit) from FP to stack - ST F64 tmp (spRel 2), + ST FF64 tmp (spRel 2), -- read low word of value (high word is undefined) - LD I32 dst (spRel 3)] - return (Any toRep code') + LD II32 dst (spRel 3)] + return (Any (intSize toRep) code') #endif /* powerpc_TARGET_ARCH */ @@ -4854,4 +4830,3 @@ eXTRA_STK_ARGS_HERE :: Int eXTRA_STK_ARGS_HERE = IF_ARCH_alpha(0, IF_ARCH_sparc(23, ???)) #endif - diff --git a/compiler/nativeGen/MachInstrs.hs b/compiler/nativeGen/MachInstrs.hs index dc7731c..2ae4474 100644 --- a/compiler/nativeGen/MachInstrs.hs +++ b/compiler/nativeGen/MachInstrs.hs @@ -25,9 +25,6 @@ module MachInstrs ( #if powerpc_TARGET_ARCH condNegate, #endif -#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH - Size(..), machRepSize, -#endif RI(..), #if i386_TARGET_ARCH || x86_64_TARGET_ARCH @@ -46,7 +43,6 @@ module MachInstrs ( import BlockId import MachRegs import Cmm -import MachOp ( MachRep(..) ) import CLabel ( CLabel, pprCLabel ) import Panic ( panic ) import Outputable @@ -165,48 +161,6 @@ condNegate NE = EQQ #endif -- ----------------------------------------------------------------------------- --- Sizes on this architecture - --- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes --- here. I've removed them from the x86 version, we'll see what happens --SDM - -#if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH -data Size -#if alpha_TARGET_ARCH - = B -- byte - | Bu --- | W -- word (2 bytes): UNUSED --- | Wu -- : UNUSED - | L -- longword (4 bytes) - | Q -- quadword (8 bytes) --- | FF -- VAX F-style floating pt: UNUSED --- | GF -- VAX G-style floating pt: UNUSED --- | DF -- VAX D-style floating pt: UNUSED --- | SF -- IEEE single-precision floating pt: UNUSED - | TF -- IEEE double-precision floating pt -#endif -#if sparc_TARGET_ARCH || powerpc_TARGET_ARCH - = B -- byte (signed) - | Bu -- byte (unsigned) - | H -- halfword (signed, 2 bytes) - | Hu -- halfword (unsigned, 2 bytes) - | W -- word (4 bytes) - | F -- IEEE single-precision floating pt - | DF -- IEEE single-precision floating pt -#endif - deriving Eq - -machRepSize :: MachRep -> Size -machRepSize I8 = IF_ARCH_alpha(Bu, IF_ARCH_sparc(Bu, )) -machRepSize I16 = IF_ARCH_alpha(err,IF_ARCH_sparc(Hu, )) -machRepSize I32 = IF_ARCH_alpha(L, IF_ARCH_sparc(W, )) -machRepSize I64 = panic "machRepSize: I64" -machRepSize I128 = panic "machRepSize: I128" -machRepSize F32 = IF_ARCH_alpha(TF, IF_ARCH_sparc(F, )) -machRepSize F64 = IF_ARCH_alpha(TF, IF_ARCH_sparc(DF,)) -#endif - --- ----------------------------------------------------------------------------- -- Register or immediate (a handy type on some platforms) data RI = RIReg Reg @@ -412,41 +366,41 @@ bit or 64 bit precision. -- data Instr continues... -- Moves. - | MOV MachRep Operand Operand - | MOVZxL MachRep Operand Operand -- size is the size of operand 1 - | MOVSxL MachRep Operand Operand -- size is the size of operand 1 + | MOV Size Operand Operand + | MOVZxL Size Operand Operand -- size is the size of operand 1 + | MOVSxL Size Operand Operand -- size is the size of operand 1 -- x86_64 note: plain mov into a 32-bit register always zero-extends -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which -- don't affect the high bits of the register. -- Load effective address (also a very useful three-operand add instruction :-) - | LEA MachRep Operand Operand + | LEA Size Operand Operand -- Int Arithmetic. - | ADD MachRep Operand Operand - | ADC MachRep Operand Operand - | SUB MachRep Operand Operand + | ADD Size Operand Operand + | ADC Size Operand Operand + | SUB Size Operand Operand - | MUL MachRep Operand Operand - | IMUL MachRep Operand Operand -- signed int mul - | IMUL2 MachRep Operand -- %edx:%eax = operand * %eax + | MUL Size Operand Operand + | IMUL Size Operand Operand -- signed int mul + | IMUL2 Size Operand -- %edx:%eax = operand * %eax - | DIV MachRep Operand -- eax := eax:edx/op, edx := eax:edx%op - | IDIV MachRep Operand -- ditto, but signed + | DIV Size Operand -- eax := eax:edx/op, edx := eax:edx%op + | IDIV Size Operand -- ditto, but signed -- Simple bit-twiddling. - | AND MachRep Operand Operand - | OR MachRep Operand Operand - | XOR MachRep Operand Operand - | NOT MachRep Operand - | NEGI MachRep Operand -- NEG instruction (name clash with Cond) + | AND Size Operand Operand + | OR Size Operand Operand + | XOR Size Operand Operand + | NOT Size Operand + | NEGI Size Operand -- NEG instruction (name clash with Cond) -- Shifts (amount may be immediate or %cl only) - | SHL MachRep Operand{-amount-} Operand - | SAR MachRep Operand{-amount-} Operand - | SHR MachRep Operand{-amount-} Operand + | SHL Size Operand{-amount-} Operand + | SAR Size Operand{-amount-} Operand + | SHR Size Operand{-amount-} Operand - | BT MachRep Imm Operand + | BT Size Imm Operand | NOP #if i386_TARGET_ARCH @@ -458,8 +412,8 @@ bit or 64 bit precision. -- and furthermore are constrained to be fp regs only. -- IMPORTANT: keep is_G_insn up to date with any changes here | GMOV Reg Reg -- src(fpreg), dst(fpreg) - | GLD MachRep AddrMode Reg -- src, dst(fpreg) - | GST MachRep Reg AddrMode -- src(fpreg), dst + | GLD Size AddrMode Reg -- src, dst(fpreg) + | GST Size Reg AddrMode -- src(fpreg), dst | GLDZ Reg -- dst(fpreg) | GLD1 Reg -- dst(fpreg) @@ -470,10 +424,10 @@ bit or 64 bit precision. | GITOF Reg Reg -- src(intreg), dst(fpreg) | GITOD Reg Reg -- src(intreg), dst(fpreg) - | GADD MachRep Reg Reg Reg -- src1, src2, dst - | GDIV MachRep Reg Reg Reg -- src1, src2, dst - | GSUB MachRep Reg Reg Reg -- src1, src2, dst - | GMUL MachRep Reg Reg Reg -- src1, src2, dst + | GADD Size Reg Reg Reg -- src1, src2, dst + | GDIV Size Reg Reg Reg -- src1, src2, dst + | GSUB Size Reg Reg Reg -- src1, src2, dst + | GMUL Size Reg Reg Reg -- src1, src2, dst -- FP compare. Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT] -- Compare src1 with src2; set the Zero flag iff the numbers are @@ -481,12 +435,12 @@ bit or 64 bit precision. -- test the %eflags zero flag regardless of the supplied Cond. | GCMP Cond Reg Reg -- src1, src2 - | GABS MachRep Reg Reg -- src, dst - | GNEG MachRep Reg Reg -- src, dst - | GSQRT MachRep Reg Reg -- src, dst - | GSIN MachRep CLabel CLabel Reg Reg -- src, dst - | GCOS MachRep CLabel CLabel Reg Reg -- src, dst - | GTAN MachRep CLabel CLabel Reg Reg -- src, dst + | GABS Size Reg Reg -- src, dst + | GNEG Size Reg Reg -- src, dst + | GSQRT Size Reg Reg -- src, dst + | GSIN Size CLabel CLabel Reg Reg -- src, dst + | GCOS Size CLabel CLabel Reg Reg -- src, dst + | GTAN Size CLabel CLabel Reg Reg -- src, dst | GFREE -- do ffree on all x86 regs; an ugly hack #endif @@ -508,22 +462,22 @@ bit or 64 bit precision. -- are Operand Reg. -- SSE2 floating-point division: - | FDIV MachRep Operand Operand -- divisor, dividend(dst) + | FDIV Size Operand Operand -- divisor, dividend(dst) -- use CMP for comparisons. ucomiss and ucomisd instructions -- compare single/double prec floating point respectively. - | SQRT MachRep Operand Reg -- src, dst + | SQRT Size Operand Reg -- src, dst #endif -- Comparison - | TEST MachRep Operand Operand - | CMP MachRep Operand Operand + | TEST Size Operand Operand + | CMP Size Operand Operand | SETCC Cond Operand -- Stack Operations. - | PUSH MachRep Operand - | POP MachRep Operand + | PUSH Size Operand + | POP Size Operand -- both unused (SDM): -- | PUSHA -- | POPA @@ -536,7 +490,7 @@ bit or 64 bit precision. | CALL (Either Imm Reg) [Reg] -- Other things. - | CLTD MachRep -- sign extend %eax into %edx:%eax + | CLTD Size -- sign extend %eax into %edx:%eax | FETCHGOT Reg -- pseudo-insn for ELF position-independent code -- pretty-prints as @@ -598,8 +552,8 @@ is_G_instr instr -- data Instr continues... -- Loads and stores. - | LD MachRep AddrMode Reg -- size, src, dst - | ST MachRep Reg AddrMode -- size, src, dst + | LD Size AddrMode Reg -- size, src, dst + | ST Size Reg AddrMode -- size, src, dst -- Int Arithmetic. | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst @@ -625,16 +579,16 @@ is_G_instr instr -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single -- instructions right up until we spit them out. - | FABS MachRep Reg Reg -- src dst - | FADD MachRep Reg Reg Reg -- src1, src2, dst - | FCMP Bool MachRep Reg Reg -- exception?, src1, src2, dst - | FDIV MachRep Reg Reg Reg -- src1, src2, dst - | FMOV MachRep Reg Reg -- src, dst - | FMUL MachRep Reg Reg Reg -- src1, src2, dst - | FNEG MachRep Reg Reg -- src, dst - | FSQRT MachRep Reg Reg -- src, dst - | FSUB MachRep Reg Reg Reg -- src1, src2, dst - | FxTOy MachRep MachRep Reg Reg -- src, dst + | FABS Size Reg Reg -- src dst + | FADD Size Reg Reg Reg -- src1, src2, dst + | FCMP Bool Size Reg Reg -- exception?, src1, src2, dst + | FDIV Size Reg Reg Reg -- src1, src2, dst + | FMOV Size Reg Reg -- src, dst + | FMUL Size Reg Reg Reg -- src1, src2, dst + | FNEG Size Reg Reg -- src, dst + | FSQRT Size Reg Reg -- src, dst + | FSUB Size Reg Reg Reg -- src1, src2, dst + | FxTOy Size Size Reg Reg -- src, dst -- Jumping around. | BI Cond Bool Imm -- cond, annul?, target @@ -676,16 +630,16 @@ fPair other = pprPanic "fPair(sparc NCG)" (ppr other) -- data Instr continues... -- Loads and stores. - | LD MachRep Reg AddrMode -- Load size, dst, src - | LA MachRep Reg AddrMode -- Load arithmetic size, dst, src - | ST MachRep Reg AddrMode -- Store size, src, dst - | STU MachRep Reg AddrMode -- Store with Update size, src, dst + | LD Size Reg AddrMode -- Load size, dst, src + | LA Size Reg AddrMode -- Load arithmetic size, dst, src + | ST Size Reg AddrMode -- Store size, src, dst + | STU Size Reg AddrMode -- Store with Update size, src, dst | LIS Reg Imm -- Load Immediate Shifted dst, src | LI Reg Imm -- Load Immediate dst, src | MR Reg Reg -- Move Register dst, src -- also for fmr - | CMP MachRep Reg RI --- size, src1, src2 - | CMPL MachRep Reg RI --- size, src1, src2 + | CMP Size Reg RI --- size, src1, src2 + | CMPL Size Reg RI --- size, src1, src2 | BCC Cond BlockId | BCCFAR Cond BlockId @@ -717,7 +671,7 @@ fPair other = pprPanic "fPair(sparc NCG)" (ppr other) | XOR Reg Reg RI -- dst, src1, src2 | XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2 - | EXTS MachRep Reg Reg + | EXTS Size Reg Reg | NEG Reg Reg | NOT Reg Reg @@ -729,10 +683,10 @@ fPair other = pprPanic "fPair(sparc NCG)" (ppr other) -- Rotate Left Word Immediate then AND with Mask | RLWINM Reg Reg Int Int Int - | FADD MachRep Reg Reg Reg - | FSUB MachRep Reg Reg Reg - | FMUL MachRep Reg Reg Reg - | FDIV MachRep Reg Reg Reg + | FADD Size Reg Reg Reg + | FSUB Size Reg Reg Reg + | FMUL Size Reg Reg Reg + | FDIV Size Reg Reg Reg | FNEG Reg Reg -- negate is the same for single and double prec. | FCMP Reg Reg diff --git a/compiler/nativeGen/MachRegs.lhs b/compiler/nativeGen/MachRegs.lhs index 5832abe..2e578c0 100644 --- a/compiler/nativeGen/MachRegs.lhs +++ b/compiler/nativeGen/MachRegs.lhs @@ -24,6 +24,10 @@ module MachRegs ( + -- * Sizes + Size(..), intSize, floatSize, isFloatSize, + wordSize, cmmTypeSize, sizeToWidth, + -- * Immediate values Imm(..), strImmLit, litToImm, @@ -93,9 +97,7 @@ module MachRegs ( #include "../includes/MachRegs.h" import Cmm -import MachOp ( MachRep(..) ) import CgUtils ( get_GlobalReg_addr ) - import CLabel ( CLabel, mkMainCapabilityLabel ) import Pretty import Outputable ( Outputable(..), pprPanic, panic ) @@ -113,6 +115,95 @@ import Data.Int ( Int8, Int16, Int32 ) #endif -- ----------------------------------------------------------------------------- +-- Sizes on this architecture +-- +-- A Size is usually a combination of width and class + +-- It looks very like the old MachRep, but it's now of purely local +-- significance, here in the native code generator. You can change it +-- without global consequences. +-- +-- A major use is as an opcode qualifier; thus the opcode +-- mov.l a b +-- might be encoded +-- MOV II32 a b +-- where the Size field encodes the ".l" part. + +-- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes +-- here. I've removed them from the x86 version, we'll see what happens --SDM + +-- ToDo: quite a few occurrences of Size could usefully be replaced by Width + +#if powerpc_TARGET_ARCH || i386_TARGET_ARCH || x86_64_TARGET_ARCH +data Size -- For these three, the "size" also gives the int/float + -- distinction, because the instructions for int/float + -- differ only in their suffices + = II8 | II16 | II32 | II64 | FF32 | FF64 | FF80 + deriving Eq + +intSize, floatSize :: Width -> Size +intSize W8 = II8 +intSize W16 = II16 +intSize W32 = II32 +intSize W64 = II64 +intSize other = pprPanic "MachInstrs.intSize" (ppr other) + +floatSize W32 = FF32 +floatSize W64 = FF64 +floatSize other = pprPanic "MachInstrs.intSize" (ppr other) + +wordSize :: Size +wordSize = intSize wordWidth + +sizeToWidth :: Size -> Width +sizeToWidth II8 = W8 +sizeToWidth II16 = W16 +sizeToWidth II32 = W32 +sizeToWidth II64 = W64 +sizeToWidth FF32 = W32 +sizeToWidth FF64 = W64 +sizeToWidth _ = panic "MachInstrs.sizeToWidth" + +cmmTypeSize :: CmmType -> Size +cmmTypeSize ty | isFloatType ty = floatSize (typeWidth ty) + | otherwise = intSize (typeWidth ty) + +isFloatSize :: Size -> Bool +isFloatSize FF32 = True +isFloatSize FF64 = True +isFloatSize FF80 = True +isFloatSize other = False +#endif + +#if alpha_TARGET_ARCH +data Size + = B -- byte + | Bu +-- | W -- word (2 bytes): UNUSED +-- | Wu -- : UNUSED + | L -- longword (4 bytes) + | Q -- quadword (8 bytes) +-- | FF -- VAX F-style floating pt: UNUSED +-- | GF -- VAX G-style floating pt: UNUSED +-- | DF -- VAX D-style floating pt: UNUSED +-- | SF -- IEEE single-precision floating pt: UNUSED + | TF -- IEEE double-precision floating pt + deriving Eq +#endif + +#if sparc_TARGET_ARCH /* || powerpc_TARGET_ARCH */ +data Size + = B -- byte (signed) + | Bu -- byte (unsigned) + | H -- halfword (signed, 2 bytes) + | Hu -- halfword (unsigned, 2 bytes) + | W -- word (4 bytes) + | F -- IEEE single-precision floating pt + | DF -- IEEE single-precision floating pt + deriving Eq +#endif + +-- ----------------------------------------------------------------------------- -- Immediates data Imm @@ -138,8 +229,8 @@ strImmLit s = ImmLit (text s) litToImm :: CmmLit -> Imm litToImm (CmmInt i _) = ImmInteger i -litToImm (CmmFloat f F32) = ImmFloat f -litToImm (CmmFloat f F64) = ImmDouble f +litToImm (CmmFloat f W32) = ImmFloat f +litToImm (CmmFloat f W64) = ImmDouble f litToImm (CmmLabel l) = ImmCLbl l litToImm (CmmLabelOff l off) = ImmIndex l off litToImm (CmmLabelDiffOff l1 l2 off) @@ -265,23 +356,22 @@ largeOffsetError i fits16Bits :: Integral a => a -> Bool fits16Bits x = x >= -32768 && x < 32768 -makeImmediate :: Integral a => MachRep -> Bool -> a -> Maybe Imm - +makeImmediate :: Integral a => Width -> Bool -> a -> Maybe Imm makeImmediate rep signed x = fmap ImmInt (toI16 rep signed) where - narrow I32 False = fromIntegral (fromIntegral x :: Word32) - narrow I16 False = fromIntegral (fromIntegral x :: Word16) - narrow I8 False = fromIntegral (fromIntegral x :: Word8) - narrow I32 True = fromIntegral (fromIntegral x :: Int32) - narrow I16 True = fromIntegral (fromIntegral x :: Int16) - narrow I8 True = fromIntegral (fromIntegral x :: Int8) + narrow W32 False = fromIntegral (fromIntegral x :: Word32) + narrow W16 False = fromIntegral (fromIntegral x :: Word16) + narrow W8 False = fromIntegral (fromIntegral x :: Word8) + narrow W32 True = fromIntegral (fromIntegral x :: Int32) + narrow W16 True = fromIntegral (fromIntegral x :: Int16) + narrow W8 True = fromIntegral (fromIntegral x :: Int8) narrowed = narrow rep signed - toI16 I32 True + toI16 W32 True | narrowed >= -32768 && narrowed < 32768 = Just narrowed | otherwise = Nothing - toI16 I32 False + toI16 W32 False | narrowed >= 0 && narrowed < 65536 = Just narrowed | otherwise = Nothing toI16 _ _ = Just narrowed @@ -392,16 +482,18 @@ instance Uniquable Reg where unRealReg (RealReg i) = i unRealReg vreg = pprPanic "unRealReg on VirtualReg" (ppr vreg) -mkVReg :: Unique -> MachRep -> Reg -mkVReg u rep - = case rep of +mkVReg :: Unique -> Size -> Reg +mkVReg u size + | not (isFloatSize size) = VirtualRegI u + | otherwise + = case size of #if sparc_TARGET_ARCH - F32 -> VirtualRegF u + FF32 -> VirtualRegF u #else - F32 -> VirtualRegD u + FF32 -> VirtualRegD u #endif - F64 -> VirtualRegD u - other -> VirtualRegI u + FF64 -> VirtualRegD u + _other -> panic "mkVReg" isVirtualReg :: Reg -> Bool isVirtualReg (RealReg _) = False @@ -1358,34 +1450,34 @@ globalRegMaybe :: GlobalReg -> Maybe Reg globalRegMaybe BaseReg = Just (RealReg REG_Base) #endif #ifdef REG_R1 -globalRegMaybe (VanillaReg 1) = Just (RealReg REG_R1) +globalRegMaybe (VanillaReg 1 _) = Just (RealReg REG_R1) #endif #ifdef REG_R2 -globalRegMaybe (VanillaReg 2) = Just (RealReg REG_R2) +globalRegMaybe (VanillaReg 2 _) = Just (RealReg REG_R2) #endif #ifdef REG_R3 -globalRegMaybe (VanillaReg 3) = Just (RealReg REG_R3) +globalRegMaybe (VanillaReg 3 _) = Just (RealReg REG_R3) #endif #ifdef REG_R4 -globalRegMaybe (VanillaReg 4) = Just (RealReg REG_R4) +globalRegMaybe (VanillaReg 4 _) = Just (RealReg REG_R4) #endif #ifdef REG_R5 -globalRegMaybe (VanillaReg 5) = Just (RealReg REG_R5) +globalRegMaybe (VanillaReg 5 _) = Just (RealReg REG_R5) #endif #ifdef REG_R6 -globalRegMaybe (VanillaReg 6) = Just (RealReg REG_R6) +globalRegMaybe (VanillaReg 6 _) = Just (RealReg REG_R6) #endif #ifdef REG_R7 -globalRegMaybe (VanillaReg 7) = Just (RealReg REG_R7) +globalRegMaybe (VanillaReg 7 _) = Just (RealReg REG_R7) #endif #ifdef REG_R8 -globalRegMaybe (VanillaReg 8) = Just (RealReg REG_R8) +globalRegMaybe (VanillaReg 8 _) = Just (RealReg REG_R8) #endif #ifdef REG_R9 -globalRegMaybe (VanillaReg 9) = Just (RealReg REG_R9) +globalRegMaybe (VanillaReg 9 _) = Just (RealReg REG_R9) #endif #ifdef REG_R10 -globalRegMaybe (VanillaReg 10) = Just (RealReg REG_R10) +globalRegMaybe (VanillaReg 10 _) = Just (RealReg REG_R10) #endif #ifdef REG_F1 globalRegMaybe (FloatReg 1) = Just (RealReg REG_F1) diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs index e5da5a5..a8283ea 100644 --- a/compiler/nativeGen/NCGMonad.hs +++ b/compiler/nativeGen/NCGMonad.hs @@ -28,7 +28,6 @@ module NCGMonad ( import BlockId import CLabel ( CLabel, mkAsmTempLabel ) import MachRegs -import MachOp ( MachRep ) import UniqSupply import Unique ( Unique ) import DynFlags @@ -102,10 +101,10 @@ getBlockIdNat = do u <- getUniqueNat; return (BlockId u) getNewLabelNat :: NatM CLabel getNewLabelNat = do u <- getUniqueNat; return (mkAsmTempLabel u) -getNewRegNat :: MachRep -> NatM Reg +getNewRegNat :: Size -> NatM Reg getNewRegNat rep = do u <- getUniqueNat; return (mkVReg u rep) -getNewRegPairNat :: MachRep -> NatM (Reg,Reg) +getNewRegPairNat :: Size -> NatM (Reg,Reg) getNewRegPairNat rep = do u <- getUniqueNat let lo = mkVReg u rep; hi = getHiVRegFromLo lo @@ -114,7 +113,7 @@ getNewRegPairNat rep = do getPicBaseMaybeNat :: NatM (Maybe Reg) getPicBaseMaybeNat = NatM (\state -> (natm_pic state, state)) -getPicBaseNat :: MachRep -> NatM Reg +getPicBaseNat :: Size -> NatM Reg getPicBaseNat rep = do mbPicBase <- getPicBaseMaybeNat case mbPicBase of diff --git a/compiler/nativeGen/PositionIndependentCode.hs b/compiler/nativeGen/PositionIndependentCode.hs index 0473d91..edb8846 100644 --- a/compiler/nativeGen/PositionIndependentCode.hs +++ b/compiler/nativeGen/PositionIndependentCode.hs @@ -56,7 +56,6 @@ module PositionIndependentCode ( #include "nativeGen/NCG.h" import Cmm -import MachOp ( MachOp(MO_Add), wordRep, MachRep(..) ) import CLabel ( CLabel, pprCLabel, mkDynamicLinkerLabel, DynamicLinkerLabelInfo(..), dynamicLinkerLabelInfo, mkPicBaseLabel, @@ -118,7 +117,7 @@ cmmMakeDynamicReference dflags addImport referenceKind lbl AccessViaSymbolPtr -> do let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl addImport symbolPtr - return $ CmmLoad (cmmMakePicReference symbolPtr) wordRep + return $ CmmLoad (cmmMakePicReference symbolPtr) bWord AccessDirectly -> case referenceKind of -- for data, we might have to make some calculations: DataReference -> return $ cmmMakePicReference lbl @@ -142,7 +141,7 @@ cmmMakePicReference :: CLabel -> CmmExpr -- everything gets relocated at runtime cmmMakePicReference lbl - | (opt_PIC || not opt_Static) && absoluteLabel lbl = CmmMachOp (MO_Add wordRep) [ + | (opt_PIC || not opt_Static) && absoluteLabel lbl = CmmMachOp (MO_Add wordWidth) [ CmmReg (CmmGlobal PicBaseReg), CmmLit $ picRelative lbl ] @@ -552,12 +551,12 @@ pprImportedSymbol importedLbl ptext symbolSize <+> pprCLabel_asm lbl ] --- PLT code stubs are generated automatically be the dynamic linker. +-- PLT code stubs are generated automatically by the dynamic linker. | otherwise = empty where - symbolSize = case wordRep of - I32 -> sLit "\t.long" - I64 -> sLit "\t.quad" + symbolSize = case wordWidth of + W32 -> sLit "\t.long" + W64 -> sLit "\t.quad" _ -> panic "Unknown wordRep in pprImportedSymbol" #else @@ -616,7 +615,7 @@ initializePicBase picReg (CmmProc info lab params (ListGraph blocks) : statics) = do gotOffLabel <- getNewLabelNat - tmp <- getNewRegNat wordRep + tmp <- getNewRegNat $ intSize wordWidth let gotOffset = CmmData Text [ CmmDataLabel gotOffLabel, @@ -628,7 +627,7 @@ initializePicBase picReg (ImmCLbl mkPicBaseLabel) BasicBlock bID insns = head blocks b' = BasicBlock bID (FETCHPC picReg - : LD wordRep tmp + : LD wordSize tmp (AddrRegImm picReg offsetToOffset) : ADD picReg picReg (RIReg tmp) : insns) diff --git a/compiler/nativeGen/PprMach.hs b/compiler/nativeGen/PprMach.hs index 694e487..bb04287 100644 --- a/compiler/nativeGen/PprMach.hs +++ b/compiler/nativeGen/PprMach.hs @@ -28,7 +28,6 @@ module PprMach ( import BlockId import Cmm -import MachOp ( MachRep(..), wordRep, isFloatingRep ) import MachRegs -- may differ per-platform import MachInstrs @@ -113,9 +112,9 @@ pprBasicBlock (BasicBlock (BlockId id) instrs) = -- on which bit of it we care about. Yurgh. pprUserReg :: Reg -> Doc -pprUserReg = pprReg IF_ARCH_i386(I32,) IF_ARCH_x86_64(I64,) +pprUserReg = pprReg IF_ARCH_i386(II32,) IF_ARCH_x86_64(II64,) -pprReg :: IF_ARCH_i386(MachRep ->,) IF_ARCH_x86_64(MachRep ->,) Reg -> Doc +pprReg :: IF_ARCH_i386(Size ->,) IF_ARCH_x86_64(Size ->,) Reg -> Doc pprReg IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) r = case r of @@ -165,9 +164,9 @@ pprReg IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) r }) #endif #if i386_TARGET_ARCH - ppr_reg_no :: MachRep -> Int -> Doc - ppr_reg_no I8 = ppr_reg_byte - ppr_reg_no I16 = ppr_reg_word + ppr_reg_no :: Size -> Int -> Doc + ppr_reg_no II8 = ppr_reg_byte + ppr_reg_no II16 = ppr_reg_word ppr_reg_no _ = ppr_reg_long ppr_reg_byte i = ptext @@ -200,10 +199,10 @@ pprReg IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) r #endif #if x86_64_TARGET_ARCH - ppr_reg_no :: MachRep -> Int -> Doc - ppr_reg_no I8 = ppr_reg_byte - ppr_reg_no I16 = ppr_reg_word - ppr_reg_no I32 = ppr_reg_long + ppr_reg_no :: Size -> Int -> Doc + ppr_reg_no II8 = ppr_reg_byte + ppr_reg_no II16 = ppr_reg_word + ppr_reg_no II32 = ppr_reg_long ppr_reg_no _ = ppr_reg_quad ppr_reg_byte i = ptext @@ -358,7 +357,7 @@ pprReg IF_ARCH_i386(s,) IF_ARCH_x86_64(s,) r -- pprSize: print a 'Size' #if powerpc_TARGET_ARCH || i386_TARGET_ARCH || x86_64_TARGET_ARCH || sparc_TARGET_ARCH -pprSize :: MachRep -> Doc +pprSize :: Size -> Doc #else pprSize :: Size -> Doc #endif @@ -378,41 +377,41 @@ pprSize x = ptext (case x of TF -> sLit "t" #endif #if i386_TARGET_ARCH || x86_64_TARGET_ARCH - I8 -> sLit "b" - I16 -> sLit "w" - I32 -> sLit "l" - I64 -> sLit "q" + II8 -> sLit "b" + II16 -> sLit "w" + II32 -> sLit "l" + II64 -> sLit "q" #endif #if i386_TARGET_ARCH - F32 -> sLit "s" - F64 -> sLit "l" - F80 -> sLit "t" + FF32 -> sLit "s" + FF64 -> sLit "l" + FF80 -> sLit "t" #endif #if x86_64_TARGET_ARCH - F32 -> sLit "ss" -- "scalar single-precision float" (SSE2) - F64 -> sLit "sd" -- "scalar double-precision float" (SSE2) + FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2) + FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2) #endif #if sparc_TARGET_ARCH - I8 -> sLit "sb" - I16 -> sLit "sh" - I32 -> sLit "" - F32 -> sLit "" - F64 -> sLit "d" + II8 -> sLit "sb" + II16 -> sLit "sh" + II32 -> sLit "" + FF32 -> sLit "" + FF64 -> sLit "d" ) -pprStSize :: MachRep -> Doc +pprStSize :: Size -> Doc pprStSize x = ptext (case x of - I8 -> sLit "b" - I16 -> sLit "h" - I32 -> sLit "" - F32 -> sLit "" - F64 -> sLit "d" + II8 -> sLit "b" + II16 -> sLit "h" + II32 -> sLit "" + FF32 -> sLit "" + FF64 -> sLit "d" #endif #if powerpc_TARGET_ARCH - I8 -> sLit "b" - I16 -> sLit "h" - I32 -> sLit "w" - F32 -> sLit "fs" - F64 -> sLit "fd" + II8 -> sLit "b" + II16 -> sLit "h" + II32 -> sLit "w" + FF32 -> sLit "fs" + FF64 -> sLit "fd" #endif ) @@ -558,7 +557,7 @@ pprAddr (AddrBaseIndex base index displacement) = let pp_disp = ppr_disp displacement pp_off p = pp_disp <> char '(' <> p <> char ')' - pp_reg r = pprReg wordRep r + pp_reg r = pprReg wordSize r in case (base,index) of (EABaseNone, EAIndexNone) -> pp_disp @@ -735,30 +734,30 @@ pprAlign bytes = pprDataItem :: CmmLit -> Doc pprDataItem lit - = vcat (ppr_item (cmmLitRep lit) lit) + = vcat (ppr_item (cmmTypeSize $ cmmLitType lit) lit) where imm = litToImm lit -- These seem to be common: - ppr_item I8 x = [ptext (sLit "\t.byte\t") <> pprImm imm] - ppr_item I32 x = [ptext (sLit "\t.long\t") <> pprImm imm] - ppr_item F32 (CmmFloat r _) + ppr_item II8 x = [ptext (sLit "\t.byte\t") <> pprImm imm] + ppr_item II32 x = [ptext (sLit "\t.long\t") <> pprImm imm] + ppr_item FF32 (CmmFloat r _) = let bs = floatToBytes (fromRational r) in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs - ppr_item F64 (CmmFloat r _) + ppr_item FF64 (CmmFloat r _) = let bs = doubleToBytes (fromRational r) in map (\b -> ptext (sLit "\t.byte\t") <> pprImm (ImmInt b)) bs #if sparc_TARGET_ARCH -- copy n paste of x86 version - ppr_item I16 x = [ptext (sLit "\t.short\t") <> pprImm imm] - ppr_item I64 x = [ptext (sLit "\t.quad\t") <> pprImm imm] + ppr_item II16 x = [ptext (sLit "\t.short\t") <> pprImm imm] + ppr_item II64 x = [ptext (sLit "\t.quad\t") <> pprImm imm] #endif #if i386_TARGET_ARCH || x86_64_TARGET_ARCH - ppr_item I16 x = [ptext (sLit "\t.word\t") <> pprImm imm] + ppr_item II16 x = [ptext (sLit "\t.word\t") <> pprImm imm] #endif #if i386_TARGET_ARCH && darwin_TARGET_OS - ppr_item I64 (CmmInt x _) = + ppr_item II64 (CmmInt x _) = [ptext (sLit "\t.long\t") <> int (fromIntegral (fromIntegral x :: Word32)), ptext (sLit "\t.long\t") @@ -766,7 +765,7 @@ pprDataItem lit (fromIntegral (x `shiftR` 32) :: Word32))] #endif #if i386_TARGET_ARCH || (darwin_TARGET_OS && x86_64_TARGET_ARCH) - ppr_item I64 x = [ptext (sLit "\t.quad\t") <> pprImm imm] + ppr_item II64 x = [ptext (sLit "\t.quad\t") <> pprImm imm] #endif #if x86_64_TARGET_ARCH && !darwin_TARGET_OS -- x86_64: binutils can't handle the R_X86_64_PC64 relocation @@ -777,7 +776,7 @@ pprDataItem lit -- -- See Note [x86-64-relative] in includes/InfoTables.h -- - ppr_item I64 x + ppr_item II64 x | isRelativeReloc x = [ptext (sLit "\t.long\t") <> pprImm imm, ptext (sLit "\t.long\t0")] @@ -788,8 +787,8 @@ pprDataItem lit isRelativeReloc _ = False #endif #if powerpc_TARGET_ARCH - ppr_item I16 x = [ptext (sLit "\t.short\t") <> pprImm imm] - ppr_item I64 (CmmInt x _) = + ppr_item II16 x = [ptext (sLit "\t.short\t") <> pprImm imm] + ppr_item II64 (CmmInt x _) = [ptext (sLit "\t.long\t") <> int (fromIntegral (fromIntegral (x `shiftR` 32) :: Word32)), @@ -1249,18 +1248,18 @@ pprInstr (RELOAD slot reg) pprInstr (MOV size src dst) = pprSizeOpOp (sLit "mov") size src dst -pprInstr (MOVZxL I32 src dst) = pprSizeOpOp (sLit "mov") I32 src dst +pprInstr (MOVZxL II32 src dst) = pprSizeOpOp (sLit "mov") II32 src dst -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple -- movl. But we represent it as a MOVZxL instruction, because -- the reg alloc would tend to throw away a plain reg-to-reg -- move, and we still want it to do that. -pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes I32 src dst +pprInstr (MOVZxL sizes src dst) = pprSizeOpOpCoerce (sLit "movz") sizes II32 src dst -- zero-extension only needs to extend to 32 bits: on x86_64, -- the remaining zero-extension to 64 bits is automatic, and the 32-bit -- instruction is shorter. -pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes wordRep src dst +pprInstr (MOVSxL sizes src dst) = pprSizeOpOpCoerce (sLit "movs") sizes wordSize src dst -- here we do some patching, since the physical registers are only set late -- in the code generation. @@ -1296,8 +1295,8 @@ pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2 pprInstr (AND size src dst) = pprSizeOpOp (sLit "and") size src dst pprInstr (OR size src dst) = pprSizeOpOp (sLit "or") size src dst -pprInstr (XOR F32 src dst) = pprOpOp (sLit "xorps") F32 src dst -pprInstr (XOR F64 src dst) = pprOpOp (sLit "xorpd") F64 src dst +pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst +pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst pprInstr (NOT size op) = pprSizeOp (sLit "not") size op @@ -1310,8 +1309,14 @@ pprInstr (SHR size src dst) = pprShift (sLit "shr") size src dst pprInstr (BT size imm src) = pprSizeImmOp (sLit "bt") size imm src pprInstr (CMP size src dst) - | isFloatingRep size = pprSizeOpOp (sLit "ucomi") size src dst -- SSE2 - | otherwise = pprSizeOpOp (sLit "cmp") size src dst + | is_float size = pprSizeOpOp (sLit "ucomi") size src dst -- SSE2 + | otherwise = pprSizeOpOp (sLit "cmp") size src dst + where + -- This predicate is needed here and nowhere else + is_float FF32 = True + is_float FF64 = True + is_float FF80 = True + is_float other = False pprInstr (TEST size src dst) = pprSizeOpOp (sLit "test") size src dst pprInstr (PUSH size op) = pprSizeOp (sLit "push") size op @@ -1322,10 +1327,10 @@ pprInstr (POP size op) = pprSizeOp (sLit "pop") size op -- pprInstr POPA = ptext (sLit "\tpopal") pprInstr NOP = ptext (sLit "\tnop") -pprInstr (CLTD I32) = ptext (sLit "\tcltd") -pprInstr (CLTD I64) = ptext (sLit "\tcqto") +pprInstr (CLTD II32) = ptext (sLit "\tcltd") +pprInstr (CLTD II64) = ptext (sLit "\tcqto") -pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand I8 op) +pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op) pprInstr (JXX cond (BlockId id)) = pprCondInstr (sLit "j") cond (pprCLabel_asm lab) @@ -1334,10 +1339,10 @@ pprInstr (JXX cond (BlockId id)) pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm) pprInstr (JMP (OpImm imm)) = (<>) (ptext (sLit "\tjmp ")) (pprImm imm) -pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand wordRep op) +pprInstr (JMP op) = (<>) (ptext (sLit "\tjmp *")) (pprOperand wordSize op) pprInstr (JMP_TBL op ids) = pprInstr (JMP op) pprInstr (CALL (Left imm) _) = (<>) (ptext (sLit "\tcall ")) (pprImm imm) -pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg wordRep reg) +pprInstr (CALL (Right reg) _) = (<>) (ptext (sLit "\tcall *")) (pprReg wordSize reg) pprInstr (IDIV sz op) = pprSizeOp (sLit "idiv") sz op pprInstr (DIV sz op) = pprSizeOp (sLit "div") sz op @@ -1359,9 +1364,9 @@ pprInstr (CVTSI2SD from to) = pprOpReg (sLit "cvtsi2sdq") from to -- FETCHGOT for PIC on ELF platforms pprInstr (FETCHGOT reg) = vcat [ ptext (sLit "\tcall 1f"), - hcat [ ptext (sLit "1:\tpopl\t"), pprReg I32 reg ], + hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ], hcat [ ptext (sLit "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), "), - pprReg I32 reg ] + pprReg II32 reg ] ] -- FETCHPC for PIC on Darwin/x86 @@ -1370,7 +1375,7 @@ pprInstr (FETCHGOT reg) -- and it's a good thing to use the same name on both platforms) pprInstr (FETCHPC reg) = vcat [ ptext (sLit "\tcall 1f"), - hcat [ ptext (sLit "1:\tpopl\t"), pprReg I32 reg ] + hcat [ ptext (sLit "1:\tpopl\t"), pprReg II32 reg ] ] @@ -1419,12 +1424,12 @@ pprInstr g@(GDTOI src dst) hcat [gtab, text "addl $8, %esp"] ]) where - reg = pprReg I32 dst + reg = pprReg II32 dst pprInstr g@(GITOF src dst) = pprInstr (GITOD src dst) pprInstr g@(GITOD src dst) - = pprG g (hcat [gtab, text "pushl ", pprReg I32 src, + = pprG g (hcat [gtab, text "pushl ", pprReg II32 src, text " ; ffree %st(7); fildl (%esp) ; ", gpop dst 1, text " ; addl $4,%esp"]) @@ -1581,7 +1586,7 @@ pprInstr GFREE ptext (sLit "\tffree %st(4) ;ffree %st(5) ;ffree %st(6) ;ffree %st(7)") ] -pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> MachRep -> Doc +pprTrigOp :: String -> Bool -> CLabel -> CLabel -> Reg -> Reg -> Size -> Doc pprTrigOp op -- fsin, fcos or fptan isTan -- we need a couple of extra steps if we're doing tan l1 l2 -- internal labels for us to use @@ -1626,8 +1631,8 @@ pprTrigOp op -- fsin, fcos or fptan -------------------------- -- coerce %st(0) to the specified size -gcoerceto F64 = empty -gcoerceto F32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; " +gcoerceto FF64 = empty +gcoerceto FF32 = empty --text "subl $4,%esp ; fstps (%esp) ; flds (%esp) ; addl $4,%esp ; " gpush reg offset = hcat [text "ffree %st(7) ; fld ", greg reg offset] @@ -1647,20 +1652,20 @@ pprG :: Instr -> Doc -> Doc pprG fake actual = (char '#' <> pprGInstr fake) $$ actual -pprGInstr (GMOV src dst) = pprSizeRegReg (sLit "gmov") F64 src dst +pprGInstr (GMOV src dst) = pprSizeRegReg (sLit "gmov") FF64 src dst pprGInstr (GLD sz src dst) = pprSizeAddrReg (sLit "gld") sz src dst pprGInstr (GST sz src dst) = pprSizeRegAddr (sLit "gst") sz src dst -pprGInstr (GLDZ dst) = pprSizeReg (sLit "gldz") F64 dst -pprGInstr (GLD1 dst) = pprSizeReg (sLit "gld1") F64 dst +pprGInstr (GLDZ dst) = pprSizeReg (sLit "gldz") FF64 dst +pprGInstr (GLD1 dst) = pprSizeReg (sLit "gld1") FF64 dst -pprGInstr (GFTOI src dst) = pprSizeSizeRegReg (sLit "gftoi") F32 I32 src dst -pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") F64 I32 src dst +pprGInstr (GFTOI src dst) = pprSizeSizeRegReg (sLit "gftoi") FF32 II32 src dst +pprGInstr (GDTOI src dst) = pprSizeSizeRegReg (sLit "gdtoi") FF64 II32 src dst -pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") I32 F32 src dst -pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") I32 F64 src dst +pprGInstr (GITOF src dst) = pprSizeSizeRegReg (sLit "gitof") II32 FF32 src dst +pprGInstr (GITOD src dst) = pprSizeSizeRegReg (sLit "gitod") II32 FF64 src dst -pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") F64 co src dst +pprGInstr (GCMP co src dst) = pprCondRegReg (sLit "gcmp_") FF64 co src dst pprGInstr (GABS sz src dst) = pprSizeRegReg (sLit "gabs") sz src dst pprGInstr (GNEG sz src dst) = pprSizeRegReg (sLit "gneg") sz src dst pprGInstr (GSQRT sz src dst) = pprSizeRegReg (sLit "gsqrt") sz src dst @@ -1682,7 +1687,7 @@ pprDollImm :: Imm -> Doc pprDollImm i = ptext (sLit "$") <> pprImm i -pprOperand :: MachRep -> Operand -> Doc +pprOperand :: Size -> Operand -> Doc pprOperand s (OpReg r) = pprReg s r pprOperand s (OpImm i) = pprDollImm i pprOperand s (OpAddr ea) = pprAddr ea @@ -1691,11 +1696,11 @@ pprMnemonic_ :: LitString -> Doc pprMnemonic_ name = char '\t' <> ptext name <> space -pprMnemonic :: LitString -> MachRep -> Doc +pprMnemonic :: LitString -> Size -> Doc pprMnemonic name size = char '\t' <> ptext name <> pprSize size <> space -pprSizeImmOp :: LitString -> MachRep -> Imm -> Operand -> Doc +pprSizeImmOp :: LitString -> Size -> Imm -> Operand -> Doc pprSizeImmOp name size imm op1 = hcat [ pprMnemonic name size, @@ -1705,14 +1710,14 @@ pprSizeImmOp name size imm op1 pprOperand size op1 ] -pprSizeOp :: LitString -> MachRep -> Operand -> Doc +pprSizeOp :: LitString -> Size -> Operand -> Doc pprSizeOp name size op1 = hcat [ pprMnemonic name size, pprOperand size op1 ] -pprSizeOpOp :: LitString -> MachRep -> Operand -> Operand -> Doc +pprSizeOpOp :: LitString -> Size -> Operand -> Operand -> Doc pprSizeOpOp name size op1 op2 = hcat [ pprMnemonic name size, @@ -1721,7 +1726,7 @@ pprSizeOpOp name size op1 op2 pprOperand size op2 ] -pprOpOp :: LitString -> MachRep -> Operand -> Operand -> Doc +pprOpOp :: LitString -> Size -> Operand -> Operand -> Doc pprOpOp name size op1 op2 = hcat [ pprMnemonic_ name, @@ -1730,14 +1735,14 @@ pprOpOp name size op1 op2 pprOperand size op2 ] -pprSizeReg :: LitString -> MachRep -> Reg -> Doc +pprSizeReg :: LitString -> Size -> Reg -> Doc pprSizeReg name size reg1 = hcat [ pprMnemonic name size, pprReg size reg1 ] -pprSizeRegReg :: LitString -> MachRep -> Reg -> Reg -> Doc +pprSizeRegReg :: LitString -> Size -> Reg -> Reg -> Doc pprSizeRegReg name size reg1 reg2 = hcat [ pprMnemonic name size, @@ -1750,21 +1755,21 @@ pprRegReg :: LitString -> Reg -> Reg -> Doc pprRegReg name reg1 reg2 = hcat [ pprMnemonic_ name, - pprReg wordRep reg1, + pprReg wordSize reg1, comma, - pprReg wordRep reg2 + pprReg wordSize reg2 ] pprOpReg :: LitString -> Operand -> Reg -> Doc pprOpReg name op1 reg2 = hcat [ pprMnemonic_ name, - pprOperand wordRep op1, + pprOperand wordSize op1, comma, - pprReg wordRep reg2 + pprReg wordSize reg2 ] -pprCondRegReg :: LitString -> MachRep -> Cond -> Reg -> Reg -> Doc +pprCondRegReg :: LitString -> Size -> Cond -> Reg -> Reg -> Doc pprCondRegReg name size cond reg1 reg2 = hcat [ char '\t', @@ -1776,7 +1781,7 @@ pprCondRegReg name size cond reg1 reg2 pprReg size reg2 ] -pprSizeSizeRegReg :: LitString -> MachRep -> MachRep -> Reg -> Reg -> Doc +pprSizeSizeRegReg :: LitString -> Size -> Size -> Reg -> Reg -> Doc pprSizeSizeRegReg name size1 size2 reg1 reg2 = hcat [ char '\t', @@ -1790,7 +1795,7 @@ pprSizeSizeRegReg name size1 size2 reg1 reg2 pprReg size2 reg2 ] -pprSizeRegRegReg :: LitString -> MachRep -> Reg -> Reg -> Reg -> Doc +pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc pprSizeRegRegReg name size reg1 reg2 reg3 = hcat [ pprMnemonic name size, @@ -1801,7 +1806,7 @@ pprSizeRegRegReg name size reg1 reg2 reg3 pprReg size reg3 ] -pprSizeAddrReg :: LitString -> MachRep -> AddrMode -> Reg -> Doc +pprSizeAddrReg :: LitString -> Size -> AddrMode -> Reg -> Doc pprSizeAddrReg name size op dst = hcat [ pprMnemonic name size, @@ -1810,7 +1815,7 @@ pprSizeAddrReg name size op dst pprReg size dst ] -pprSizeRegAddr :: LitString -> MachRep -> Reg -> AddrMode -> Doc +pprSizeRegAddr :: LitString -> Size -> Reg -> AddrMode -> Doc pprSizeRegAddr name size src op = hcat [ pprMnemonic name size, @@ -1819,16 +1824,16 @@ pprSizeRegAddr name size src op pprAddr op ] -pprShift :: LitString -> MachRep -> Operand -> Operand -> Doc +pprShift :: LitString -> Size -> Operand -> Operand -> Doc pprShift name size src dest = hcat [ pprMnemonic name size, - pprOperand I8 src, -- src is 8-bit sized + pprOperand II8 src, -- src is 8-bit sized comma, pprOperand size dest ] -pprSizeOpOpCoerce :: LitString -> MachRep -> MachRep -> Operand -> Operand -> Doc +pprSizeOpOpCoerce :: LitString -> Size -> Size -> Operand -> Operand -> Doc pprSizeOpOpCoerce name size1 size2 op1 op2 = hcat [ char '\t', ptext name, pprSize size1, pprSize size2, space, pprOperand size1 op1, @@ -1875,7 +1880,7 @@ pprInstr (RELOAD slot reg) -- ld [g1+4],%f(n+1) -- sub g1,g2,g1 -- to restore g1 -pprInstr (LD F64 (AddrRegReg g1 g2) reg) +pprInstr (LD FF64 (AddrRegReg g1 g2) reg) = vcat [ hcat [ptext (sLit "\tadd\t"), pprReg g1,comma,pprReg g2,comma,pprReg g1], hcat [pp_ld_lbracket, pprReg g1, pp_rbracket_comma, pprReg reg], @@ -1886,7 +1891,7 @@ pprInstr (LD F64 (AddrRegReg g1 g2) reg) -- Translate to -- ld [addr],%fn -- ld [addr+4],%f(n+1) -pprInstr (LD F64 addr reg) | isJust off_addr +pprInstr (LD FF64 addr reg) | isJust off_addr = vcat [ hcat [pp_ld_lbracket, pprAddr addr, pp_rbracket_comma, pprReg reg], hcat [pp_ld_lbracket, pprAddr addr2, pp_rbracket_comma,pprReg (fPair reg)] @@ -1914,7 +1919,7 @@ pprInstr (LD size addr reg) -- st %fn,[g1] -- st %f(n+1),[g1+4] -- sub g1,g2,g1 -- to restore g1 -pprInstr (ST F64 reg (AddrRegReg g1 g2)) +pprInstr (ST FF64 reg (AddrRegReg g1 g2)) = vcat [ hcat [ptext (sLit "\tadd\t"), pprReg g1,comma,pprReg g2,comma,pprReg g1], hcat [ptext (sLit "\tst\t"), pprReg reg, pp_comma_lbracket, @@ -1927,7 +1932,7 @@ pprInstr (ST F64 reg (AddrRegReg g1 g2)) -- Translate to -- st %fn,[addr] -- st %f(n+1),[addr+4] -pprInstr (ST F64 reg addr) | isJust off_addr +pprInstr (ST FF64 reg addr) | isJust off_addr = vcat [ hcat [ptext (sLit "\tst\t"), pprReg reg, pp_comma_lbracket, pprAddr addr, rbrack], @@ -2002,12 +2007,12 @@ pprInstr (SETHI imm reg) pprInstr NOP = ptext (sLit "\tnop") -pprInstr (FABS F32 reg1 reg2) = pprSizeRegReg (sLit "fabs") F32 reg1 reg2 -pprInstr (FABS F64 reg1 reg2) - = (<>) (pprSizeRegReg (sLit "fabs") F32 reg1 reg2) +pprInstr (FABS FF32 reg1 reg2) = pprSizeRegReg (sLit "fabs") FF32 reg1 reg2 +pprInstr (FABS FF64 reg1 reg2) + = (<>) (pprSizeRegReg (sLit "fabs") FF32 reg1 reg2) (if (reg1 == reg2) then empty else (<>) (char '\n') - (pprSizeRegReg (sLit "fmov") F32 (fPair reg1) (fPair reg2))) + (pprSizeRegReg (sLit "fmov") FF32 (fPair reg1) (fPair reg2))) pprInstr (FADD size reg1 reg2 reg3) = pprSizeRegRegReg (sLit "fadd") size reg1 reg2 reg3 @@ -2016,22 +2021,22 @@ pprInstr (FCMP e size reg1 reg2) pprInstr (FDIV size reg1 reg2 reg3) = pprSizeRegRegReg (sLit "fdiv") size reg1 reg2 reg3 -pprInstr (FMOV F32 reg1 reg2) = pprSizeRegReg (sLit "fmov") F32 reg1 reg2 -pprInstr (FMOV F64 reg1 reg2) - = (<>) (pprSizeRegReg (sLit "fmov") F32 reg1 reg2) +pprInstr (FMOV FF32 reg1 reg2) = pprSizeRegReg (sLit "fmov") FF32 reg1 reg2 +pprInstr (FMOV FF64 reg1 reg2) + = (<>) (pprSizeRegReg (sLit "fmov") FF32 reg1 reg2) (if (reg1 == reg2) then empty else (<>) (char '\n') - (pprSizeRegReg (sLit "fmov") F32 (fPair reg1) (fPair reg2))) + (pprSizeRegReg (sLit "fmov") FF32 (fPair reg1) (fPair reg2))) pprInstr (FMUL size reg1 reg2 reg3) = pprSizeRegRegReg (sLit "fmul") size reg1 reg2 reg3 -pprInstr (FNEG F32 reg1 reg2) = pprSizeRegReg (sLit "fneg") F32 reg1 reg2 -pprInstr (FNEG F64 reg1 reg2) - = (<>) (pprSizeRegReg (sLit "fneg") F32 reg1 reg2) +pprInstr (FNEG FF32 reg1 reg2) = pprSizeRegReg (sLit "fneg") FF32 reg1 reg2 +pprInstr (FNEG FF64 reg1 reg2) + = (<>) (pprSizeRegReg (sLit "fneg") FF32 reg1 reg2) (if (reg1 == reg2) then empty else (<>) (char '\n') - (pprSizeRegReg (sLit "fmov") F32 (fPair reg1) (fPair reg2))) + (pprSizeRegReg (sLit "fmov") FF32 (fPair reg1) (fPair reg2))) pprInstr (FSQRT size reg1 reg2) = pprSizeRegReg (sLit "fsqrt") size reg1 reg2 pprInstr (FSUB size reg1 reg2 reg3) = pprSizeRegRegReg (sLit "fsub") size reg1 reg2 reg3 @@ -2040,14 +2045,14 @@ pprInstr (FxTOy size1 size2 reg1 reg2) ptext (sLit "\tf"), ptext (case size1 of - I32 -> sLit "ito" - F32 -> sLit "sto" - F64 -> sLit "dto"), + II32 -> sLit "ito" + FF32 -> sLit "sto" + FF64 -> sLit "dto"), ptext (case size2 of - I32 -> sLit "i\t" - F32 -> sLit "s\t" - F64 -> sLit "d\t"), + II32 -> sLit "i\t" + FF32 -> sLit "s\t" + FF64 -> sLit "d\t"), pprReg reg1, comma, pprReg reg2 ] @@ -2079,27 +2084,27 @@ pprRI :: RI -> Doc pprRI (RIReg r) = pprReg r pprRI (RIImm r) = pprImm r -pprSizeRegReg :: LitString -> MachRep -> Reg -> Reg -> Doc +pprSizeRegReg :: LitString -> Size -> Reg -> Reg -> Doc pprSizeRegReg name size reg1 reg2 = hcat [ char '\t', ptext name, (case size of - F32 -> ptext (sLit "s\t") - F64 -> ptext (sLit "d\t")), + FF32 -> ptext (sLit "s\t") + FF64 -> ptext (sLit "d\t")), pprReg reg1, comma, pprReg reg2 ] -pprSizeRegRegReg :: LitString -> MachRep -> Reg -> Reg -> Reg -> Doc +pprSizeRegRegReg :: LitString -> Size -> Reg -> Reg -> Reg -> Doc pprSizeRegRegReg name size reg1 reg2 reg3 = hcat [ char '\t', ptext name, (case size of - F32 -> ptext (sLit "s\t") - F64 -> ptext (sLit "d\t")), + FF32 -> ptext (sLit "s\t") + FF64 -> ptext (sLit "d\t")), pprReg reg1, comma, pprReg reg2, @@ -2164,11 +2169,11 @@ pprInstr (LD sz reg addr) = hcat [ char '\t', ptext (sLit "l"), ptext (case sz of - I8 -> sLit "bz" - I16 -> sLit "hz" - I32 -> sLit "wz" - F32 -> sLit "fs" - F64 -> sLit "fd"), + II8 -> sLit "bz" + II16 -> sLit "hz" + II32 -> sLit "wz" + FF32 -> sLit "fs" + FF64 -> sLit "fd"), case addr of AddrRegImm _ _ -> empty AddrRegReg _ _ -> char 'x', char '\t', @@ -2180,11 +2185,11 @@ pprInstr (LA sz reg addr) = hcat [ char '\t', ptext (sLit "l"), ptext (case sz of - I8 -> sLit "ba" - I16 -> sLit "ha" - I32 -> sLit "wa" - F32 -> sLit "fs" - F64 -> sLit "fd"), + II8 -> sLit "ba" + II16 -> sLit "ha" + II32 -> sLit "wa" + FF32 -> sLit "fs" + FF64 -> sLit "fd"), case addr of AddrRegImm _ _ -> empty AddrRegReg _ _ -> char 'x', char '\t', @@ -2499,8 +2504,8 @@ pprRI :: RI -> Doc pprRI (RIReg r) = pprReg r pprRI (RIImm r) = pprImm r -pprFSize F64 = empty -pprFSize F32 = char 's' +pprFSize FF64 = empty +pprFSize FF32 = char 's' -- limit immediate argument for shift instruction to range 0..32 -- (yes, the maximum is really 32, not 31) diff --git a/compiler/nativeGen/RegAllocInfo.hs b/compiler/nativeGen/RegAllocInfo.hs index 0328b95..80702bd 100644 --- a/compiler/nativeGen/RegAllocInfo.hs +++ b/compiler/nativeGen/RegAllocInfo.hs @@ -38,7 +38,6 @@ module RegAllocInfo ( import BlockId import Cmm import CLabel -import MachOp ( MachRep(..), wordRep ) import MachInstrs import MachRegs import Outputable @@ -212,13 +211,13 @@ regUsage instr = case instr of GMUL sz s1 s2 dst -> mkRU [s1,s2] [dst] GDIV sz s1 s2 dst -> mkRU [s1,s2] [dst] - GCMP sz src1 src2 -> mkRUR [src1,src2] - GABS sz src dst -> mkRU [src] [dst] - GNEG sz src dst -> mkRU [src] [dst] - GSQRT sz src dst -> mkRU [src] [dst] - GSIN sz _ _ src dst -> mkRU [src] [dst] - GCOS sz _ _ src dst -> mkRU [src] [dst] - GTAN sz _ _ src dst -> mkRU [src] [dst] + GCMP sz src1 src2 -> mkRUR [src1,src2] + GABS sz src dst -> mkRU [src] [dst] + GNEG sz src dst -> mkRU [src] [dst] + GSQRT sz src dst -> mkRU [src] [dst] + GSIN sz _ _ src dst -> mkRU [src] [dst] + GCOS sz _ _ src dst -> mkRU [src] [dst] + GTAN sz _ _ src dst -> mkRU [src] [dst] #endif #if x86_64_TARGET_ARCH @@ -797,14 +796,14 @@ mkSpillInstr reg delta slot #ifdef i386_TARGET_ARCH let off_w = (off-delta) `div` 4 in case regClass reg of - RcInteger -> MOV I32 (OpReg reg) (OpAddr (spRel off_w)) - _ -> GST F80 reg (spRel off_w) {- RcFloat/RcDouble -} + RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w)) + _ -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -} #endif #ifdef x86_64_TARGET_ARCH let off_w = (off-delta) `div` 8 in case regClass reg of - RcInteger -> MOV I64 (OpReg reg) (OpAddr (spRel off_w)) - RcDouble -> MOV F64 (OpReg reg) (OpAddr (spRel off_w)) + RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w)) + RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w)) -- ToDo: will it work to always spill as a double? -- does that cause a stall if the data was a float? #endif @@ -819,8 +818,8 @@ mkSpillInstr reg delta slot #endif #ifdef powerpc_TARGET_ARCH let sz = case regClass reg of - RcInteger -> I32 - RcDouble -> F64 + RcInteger -> II32 + RcDouble -> FF64 in ST sz reg (AddrRegImm sp (ImmInt (off-delta))) #endif @@ -839,27 +838,27 @@ mkLoadInstr reg delta slot #if i386_TARGET_ARCH let off_w = (off-delta) `div` 4 in case regClass reg of { - RcInteger -> MOV I32 (OpAddr (spRel off_w)) (OpReg reg); - _ -> GLD F80 (spRel off_w) reg} {- RcFloat/RcDouble -} + RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg); + _ -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -} #endif #if x86_64_TARGET_ARCH let off_w = (off-delta) `div` 8 in case regClass reg of - RcInteger -> MOV I64 (OpAddr (spRel off_w)) (OpReg reg) - _ -> MOV F64 (OpAddr (spRel off_w)) (OpReg reg) + RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg) + _ -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg) #endif #if sparc_TARGET_ARCH let{off_w = 1 + (off `div` 4); sz = case regClass reg of { - RcInteger -> I32; - RcFloat -> F32; + RcInteger -> II32; + RcFloat -> FF32; RcDouble -> F64}} in LD sz (fpRel (- off_w)) reg #endif #if powerpc_TARGET_ARCH let sz = case regClass reg of - RcInteger -> I32 - RcDouble -> F64 + RcInteger -> II32 + RcDouble -> FF64 in LD sz reg (AddrRegImm sp (ImmInt (off-delta))) #endif @@ -870,11 +869,11 @@ mkRegRegMoveInstr mkRegRegMoveInstr src dst #if i386_TARGET_ARCH || x86_64_TARGET_ARCH = case regClass src of - RcInteger -> MOV wordRep (OpReg src) (OpReg dst) + RcInteger -> MOV wordSize (OpReg src) (OpReg dst) #if i386_TARGET_ARCH RcDouble -> GMOV src dst #else - RcDouble -> MOV F64 (OpReg src) (OpReg dst) + RcDouble -> MOV FF64 (OpReg src) (OpReg dst) #endif #elif powerpc_TARGET_ARCH = MR dst src diff --git a/compiler/prelude/ForeignCall.lhs b/compiler/prelude/ForeignCall.lhs index a80900e..27f5b4f 100644 --- a/compiler/prelude/ForeignCall.lhs +++ b/compiler/prelude/ForeignCall.lhs @@ -131,6 +131,8 @@ stdcall: Caller allocates parameters, callee deallocates. ToDo: The stdcall calling convention is x86 (win32) specific, so perhaps we should emit a warning if it's being used on other platforms. + +See: http://www.programmersheaven.com/2/Calling-conventions \begin{code} data CCallConv = CCallConv | StdCallConv | CmmCallConv diff --git a/compiler/typecheck/TcForeign.lhs b/compiler/typecheck/TcForeign.lhs index e621337..b1dda2d 100644 --- a/compiler/typecheck/TcForeign.lhs +++ b/compiler/typecheck/TcForeign.lhs @@ -33,7 +33,6 @@ import Id #if alpha_TARGET_ARCH import Type import SMRep -import MachOp #endif import Name import OccName @@ -189,7 +188,7 @@ checkFEDArgs :: [Type] -> TcM () checkFEDArgs arg_tys = check (integral_args <= 32) err where - integral_args = sum [ (machRepByteWidth . argMachRep . primRepToCgRep) prim_rep + integral_args = sum [ (widthInBytes . argMachRep . primRepToCgRep) prim_rep | prim_rep <- map typePrimRep arg_tys, primRepHint prim_rep /= FloatHint ] err = ptext (sLit "On Alpha, I can only handle 32 bytes of non-floating-point arguments to foreign export dynamic") diff --git a/compiler/typecheck/TcRnDriver.lhs b/compiler/typecheck/TcRnDriver.lhs index 325b9db..e0d8632 100644 --- a/compiler/typecheck/TcRnDriver.lhs +++ b/compiler/typecheck/TcRnDriver.lhs @@ -320,7 +320,8 @@ tcRnExtCore hsc_env (HsExtCore this_mod decls src_binds) my_exports = map (Avail . idName) bndrs ; -- ToDo: export the data types also? - final_type_env = extendTypeEnvWithIds (tcg_type_env tcg_env) bndrs ; + final_type_env = + extendTypeEnvWithIds (tcg_type_env tcg_env) bndrs ; mod_guts = ModGuts { mg_module = this_mod, mg_boot = False, diff --git a/includes/Cmm.h b/includes/Cmm.h index c0b2fe9..06a66a7 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -88,6 +88,7 @@ #define I16 bits16 #define I32 bits32 #define I64 bits64 +#define P_ gcptr #if SIZEOF_VOID_P == 4 #define W_ bits32 @@ -254,10 +255,10 @@ #define ENTER() \ again: \ W_ info; \ - if (GETTAG(R1) != 0) { \ + if (GETTAG(P1) != 0) { \ jump %ENTRY_CODE(Sp(0)); \ } \ - info = %INFO_PTR(R1); \ + info = %INFO_PTR(P1); \ switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \ (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \ case \ @@ -267,7 +268,7 @@ IND_OLDGEN_PERM, \ IND_STATIC: \ { \ - R1 = StgInd_indirectee(R1); \ + P1 = StgInd_indirectee(P1); \ goto again; \ } \ case \ @@ -562,7 +563,7 @@ bdescr_free(__bd) = free + WDS(1); #define recordMutable(p, regs) \ - W_ __p; \ + P_ __p; \ W_ __bd; \ W_ __gen; \ __p = p; \ diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c index 798c6e6..116b2e9 100644 --- a/includes/mkDerivedConstants.c +++ b/includes/mkDerivedConstants.c @@ -57,10 +57,19 @@ #if defined(GEN_HASKELL) #define field_type_(str, s_type, field) /* nothing */ +#define field_type_gcptr_(str, s_type, field) /* nothing */ #else +/* Defining REP_x to be b32 etc + These are both the C-- types used in a load + e.g. b32[addr] + and the names of the CmmTypes in the compiler + b32 :: CmmType +*/ #define field_type_(str, s_type, field) \ - printf("#define REP_" str " I"); \ + printf("#define REP_" str " b"); \ printf("%d\n", sizeof (__typeof__(((((s_type*)0)->field)))) * 8); +#define field_type_gcptr_(str, s_type, field) \ + printf("#define REP_" str " gcptr\n"); #endif #define field_type(s_type, field) \ @@ -136,17 +145,23 @@ closure_payload_macro(str(s_type,field)); /* Byte offset and MachRep for a closure field, minus the header */ +#define closure_field_(str, s_type, field) \ + closure_field_offset_(str,s_type,field) \ + field_type_(str, s_type, field); \ + closure_field_macro(str) + #define closure_field(s_type, field) \ - closure_field_offset(s_type,field) \ - field_type(s_type, field); \ - closure_field_macro(str(s_type,field)) + closure_field_(str(s_type,field),s_type,field) /* Byte offset and MachRep for a closure field, minus the header */ -#define closure_field_(str, s_type, field) \ +#define closure_field_gcptr_(str, s_type, field) \ closure_field_offset_(str,s_type,field) \ - field_type_(str, s_type, field); \ + field_type_gcptr_(str, s_type, field); \ closure_field_macro(str) +#define closure_field_gcptr(s_type, field) \ + closure_field_gcptr_(str(s_type,field),s_type,field) + /* Byte offset for a TSO field, minus the header and variable prof bit. */ #define tso_payload_offset(s_type, field) \ def_offset(str(s_type,field), OFFSET(s_type,field) - sizeof(StgHeader) - sizeof(StgTSOProfInfo)); @@ -310,23 +325,23 @@ main(int argc, char *argv[]) closure_size(StgPAP); closure_field(StgPAP, n_args); - closure_field(StgPAP, fun); + closure_field_gcptr(StgPAP, fun); closure_field(StgPAP, arity); closure_payload(StgPAP, payload); thunk_size(StgAP); closure_field(StgAP, n_args); - closure_field(StgAP, fun); + closure_field_gcptr(StgAP, fun); closure_payload(StgAP, payload); thunk_size(StgAP_STACK); closure_field(StgAP_STACK, size); - closure_field(StgAP_STACK, fun); + closure_field_gcptr(StgAP_STACK, fun); closure_payload(StgAP_STACK, payload); thunk_size(StgSelector); - closure_field(StgInd, indirectee); + closure_field_gcptr(StgInd, indirectee); closure_size(StgMutVar); closure_field(StgMutVar, var); diff --git a/rts/Exception.cmm b/rts/Exception.cmm index 479c9c9..43f53c7 100644 --- a/rts/Exception.cmm +++ b/rts/Exception.cmm @@ -253,7 +253,7 @@ INFO_TABLE_RET(stg_catch_frame, CATCH_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - W_ unused3, "ptr" W_ unused4) + W_ unused3, P_ unused4) { Sp = Sp + SIZEOF_StgCatchFrame; jump %ENTRY_CODE(Sp(SP_OFF)); @@ -315,7 +315,7 @@ section "data" { no_break_on_exception: W_[1]; } -INFO_TABLE_RET(stg_raise_ret, RET_SMALL, "ptr" W_ arg1) +INFO_TABLE_RET(stg_raise_ret, RET_SMALL, P_ arg1) { R1 = Sp(1); Sp = Sp + WDS(2); diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm index 3980ca2..94cec38 100644 --- a/rts/HeapStackCheck.cmm +++ b/rts/HeapStackCheck.cmm @@ -114,7 +114,7 @@ import LeaveCriticalSection; There are canned sequences for 'n' pointer values in registers. -------------------------------------------------------------------------- */ -INFO_TABLE_RET( stg_enter, RET_SMALL, "ptr" W_ unused) +INFO_TABLE_RET( stg_enter, RET_SMALL, P_ unused) { R1 = Sp(1); Sp_adj(2); @@ -444,7 +444,7 @@ INFO_TABLE_RET( stg_gc_void, RET_SMALL) /*-- R1 is boxed/unpointed -------------------------------------------------- */ -INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, "ptr" W_ unused) +INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, P_ unused) { R1 = Sp(1); Sp_adj(2); @@ -531,7 +531,7 @@ stg_gc_l1 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */ -INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, "ptr" W_ unused ) +INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, P_ unused ) { Sp_adj(1); // one ptr is on the stack (Sp(0)) @@ -816,7 +816,7 @@ stg_block_1 * * -------------------------------------------------------------------------- */ -INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, "ptr" W_ unused ) +INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, P_ unused ) { R1 = Sp(1); Sp_adj(2); @@ -843,7 +843,7 @@ stg_block_takemvar BLOCK_BUT_FIRST(stg_block_takemvar_finally); } -INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, "ptr" W_ unused1, "ptr" W_ unused2 ) +INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, P_ unused1, P_ unused2 ) { R2 = Sp(2); R1 = Sp(1); @@ -892,7 +892,7 @@ stg_block_blackhole BLOCK_BUT_FIRST(stg_block_blackhole_finally); } -INFO_TABLE_RET( stg_block_throwto, RET_SMALL, "ptr" W_ unused, "ptr" W_ unused ) +INFO_TABLE_RET( stg_block_throwto, RET_SMALL, P_ unused, P_ unused ) { R2 = Sp(2); R1 = Sp(1); diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 72090c8..f75b8aa 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1084,7 +1084,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5) + W_ unused3, P_ unused4, P_ unused5) { W_ r, frame, trec, outer; @@ -1118,7 +1118,7 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ unused3, P_ unused4) { W_ frame, trec, valid, next_invariant, q, outer; @@ -1180,7 +1180,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ unused3, P_ unused4) { W_ frame, trec, valid; @@ -1214,7 +1214,7 @@ INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, #if defined(PROFILING) W_ unused1, W_ unused2, #endif - "ptr" W_ unused3, "ptr" W_ unused4) + P_ unused3, P_ unused4) { W_ r, frame, trec, outer; frame = Sp; diff --git a/rts/Updates.cmm b/rts/Updates.cmm index 7ebade0..4043da0 100644 --- a/rts/Updates.cmm +++ b/rts/Updates.cmm @@ -45,9 +45,9 @@ } #if defined(PROFILING) -#define UPD_FRAME_PARAMS W_ unused1, W_ unused2, "ptr" W_ unused3 +#define UPD_FRAME_PARAMS W_ unused1, W_ unused2, P_ unused3 #else -#define UPD_FRAME_PARAMS "ptr" W_ unused1 +#define UPD_FRAME_PARAMS P_ unused1 #endif /* this bitmap indicates that the first word of an update frame is a diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs index 9d0febb..eb29e2d 100644 --- a/utils/genapply/GenApply.hs +++ b/utils/genapply/GenApply.hs @@ -426,12 +426,12 @@ formalParam V _ = empty formalParam arg n = formalParamType arg <> space <> text "arg" <> int n <> text ", " -formalParamType arg | isPtr arg = text "\"ptr\"" <> space <> argRep arg - | otherwise = argRep arg +formalParamType arg = argRep arg argRep F = text "F_" argRep D = text "D_" argRep L = text "L_" +argRep P = text "gcptr" argRep _ = text "W_" genApply regstatus args = diff --git a/utils/runstdtest/runstdtest.prl b/utils/runstdtest/runstdtest.prl index 555e1ec..039c253 100644 --- a/utils/runstdtest/runstdtest.prl +++ b/utils/runstdtest/runstdtest.prl @@ -249,7 +249,7 @@ else $PostScriptLines hit='NO' for out_file in @PgmStdoutFile ; do - if sed "s/\\r\$//" $TmpPrefix/runtest$$.1 | cmp -s \$out_file - ; then + if sed "s/ \$//" $TmpPrefix/runtest$$.1 | cmp -s \$out_file - ; then hit='YES' fi done @@ -275,7 +275,7 @@ fi hit='NO' for out_file in @PgmStderrFile ; do - if sed "s/\\r\$//" $TmpPrefix/runtest$$.2 | cmp -s \$out_file - ; then + if sed "s/ \$//" $TmpPrefix/runtest$$.2 | cmp -s \$out_file - ; then hit='YES' fi done -- 1.7.10.4