From 6015a94f9108a502150565577b66c23650796639 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Fri, 27 Jul 2007 10:41:57 +0000 Subject: [PATCH] Pointer Tagging This patch implements pointer tagging as per our ICFP'07 paper "Faster laziness using dynamic pointer tagging". It improves performance by 10-15% for most workloads, including GHC itself. The original patches were by Alexey Rodriguez Yakushev , with additions and improvements by me. I've re-recorded the development as a single patch. The basic idea is this: we use the low 2 bits of a pointer to a heap object (3 bits on a 64-bit architecture) to encode some information about the object pointed to. For a constructor, we encode the "tag" of the constructor (e.g. True vs. False), for a function closure its arity. This enables some decisions to be made without dereferencing the pointer, which speeds up some common operations. In particular it enables us to avoid costly indirect jumps in many cases. More information in the commentary: http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/HaskellExecution/PointerTagging --- compiler/cmm/CmmLint.hs | 11 +++- compiler/cmm/PprC.hs | 3 +- compiler/codeGen/CgBindery.lhs | 68 +++++++++++++++++---- compiler/codeGen/CgClosure.lhs | 24 +++++++- compiler/codeGen/CgCon.lhs | 27 ++++++--- compiler/codeGen/CgHeapery.lhs | 3 + compiler/codeGen/CgInfoTbls.hs | 25 +++++++- compiler/codeGen/CgPrimOp.hs | 3 +- compiler/codeGen/CgProf.hs | 15 ++++- compiler/codeGen/CgTailCall.lhs | 70 ++++++++++++++++++++-- compiler/codeGen/CgUtils.hs | 62 +++++++++++++++++++ compiler/codeGen/ClosureInfo.lhs | 38 ++++++++++-- compiler/main/Constants.lhs | 10 ++++ compiler/nativeGen/MachCodeGen.hs | 12 ++++ includes/Closures.h | 3 +- includes/Cmm.h | 45 ++++++++++++-- includes/InfoTables.h | 2 +- includes/MachDeps.h | 10 ++++ includes/Rts.h | 40 +++++++++++++ includes/Storage.h | 2 +- includes/mkDerivedConstants.c | 4 ++ rts/Apply.cmm | 34 +++++++---- rts/HeapStackCheck.cmm | 32 +++++++--- rts/Interpreter.c | 11 ++-- rts/PrimOps.cmm | 9 ++- rts/RetainerProfile.c | 22 ++++--- rts/RtsAPI.c | 38 ++++++------ rts/Sanity.c | 17 ++++-- rts/Sparks.c | 6 ++ rts/Stable.c | 11 +++- rts/Stats.c | 55 +++++++++++++++++ rts/StgMiscClosures.cmm | 10 ++-- rts/StgStartup.cmm | 1 + rts/StgStdThunks.cmm | 29 +++++++-- rts/sm/Compact.c | 37 ++++++++---- rts/sm/Evac.c | 82 ++++++++++++++++++------- rts/sm/GC.c | 13 ++-- rts/sm/Scav.c | 4 +- utils/genapply/GenApply.hs | 119 +++++++++++++++++++++++++++++++++---- 39 files changed, 832 insertions(+), 175 deletions(-) diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 130dba0..d8d6c9b 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -88,7 +88,8 @@ cmmCheckMachOp op args = return (resultRepOfMachOp op) isWordOffsetReg (CmmGlobal Sp) = True -isWordOffsetReg (CmmGlobal Hp) = True +-- No warnings for unaligned arithmetic, which is used to tag dynamic constructor closures. +--isWordOffsetReg (CmmGlobal Hp) = True isWordOffsetReg _ = False isOffsetOp (MO_Add _) = True @@ -98,14 +99,18 @@ isOffsetOp _ = False -- This expression should be an address from which a word can be loaded: -- check for funny-looking sub-word offsets. cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) - | isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 = cmmLintDubiousWordOffset e cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) - | isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 = cmmLintDubiousWordOffset e cmmCheckWordAddress _ = return () +-- No warnings for unaligned arithmetic with the node register, +-- which is used to extract fields from tagged constructor closures. +notNodeReg (CmmReg reg) | reg == nodeReg = False +notNodeReg _ = True lintCmmStmt :: CmmStmt -> CmmLint () lintCmmStmt stmt@(CmmAssign reg expr) = do diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 77d337d..6032dc2 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -322,8 +322,9 @@ pprExpr e = case e of -> char '*' <> pprAsPtrReg r CmmLoad (CmmRegOff r off) rep - | isPtrReg r && rep == wordRep + | isPtrReg r && rep == wordRep && (off `rem` wORD_SIZE == 0) -- ToDo: check that the offset is a word multiple? + -- (For tagging to work, I had to avoid unaligned loads. --ARY) -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift)) CmmLoad expr rep -> diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs index d5a2c69..7447222 100644 --- a/compiler/codeGen/CgBindery.lhs +++ b/compiler/codeGen/CgBindery.lhs @@ -11,7 +11,8 @@ module CgBindery ( cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF, - stableIdInfo, heapIdInfo, + stableIdInfo, heapIdInfo, + taggedStableIdInfo, taggedHeapIdInfo, letNoEscapeIdInfo, idInfoToAmode, addBindC, addBindsC, @@ -22,7 +23,7 @@ module CgBindery ( getLiveStackBindings, bindArgsToStack, rebindToStack, - bindNewToNode, bindNewToReg, bindArgsToRegs, + bindNewToNode, bindNewToUntagNode, bindNewToReg, bindArgsToRegs, bindNewToTemp, getArgAmode, getArgAmodes, getCgIdInfo, @@ -38,11 +39,13 @@ import CgStackery import CgUtils import CLabel import ClosureInfo +import Constants import Cmm import PprCmm ( {- instance Outputable -} ) import SMRep import Id +import DataCon import VarEnv import VarSet import Literal @@ -52,6 +55,7 @@ import StgSyn import Unique import UniqSet import Outputable + \end{code} @@ -80,23 +84,44 @@ data CgIdInfo , cg_rep :: CgRep , cg_vol :: VolatileLoc , cg_stb :: StableLoc - , cg_lf :: LambdaFormInfo } + , cg_lf :: LambdaFormInfo + , cg_tag :: {-# UNPACK #-} !Int -- tag to be added in idInfoToAmode + } mkCgIdInfo id vol stb lf = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, - cg_lf = lf, cg_rep = idCgRep id } + cg_lf = lf, cg_rep = idCgRep id, cg_tag = tag } + where + tag + | Just con <- isDataConWorkId_maybe id, + {- Is this an identifier for a static constructor closure? -} + isNullaryRepDataCon con + {- If yes, is this a nullary constructor? + If yes, we assume that the constructor is evaluated and can + be tagged. + -} + = tagForCon con + + | otherwise + = funTagLFInfo lf voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc , cg_stb = VoidLoc, cg_lf = mkLFArgument id - , cg_rep = VoidArg } + , cg_rep = VoidArg, cg_tag = 0 } -- Used just for VoidRep things data VolatileLoc -- These locations die across a call = NoVolatileLoc | RegLoc CmmReg -- In one of the registers (global or local) | VirHpLoc VirtualHpOffset -- Hp+offset (address of closure) - | VirNodeLoc VirtualHpOffset -- Cts of offset indirect from Node - -- ie *(Node+offset) + | VirNodeLoc ByteOff -- Cts of offset indirect from Node + -- ie *(Node+offset). + -- NB. Byte offset, because we subtract R1's + -- tag from the offset. + +mkTaggedCgIdInfo id vol stb lf con + = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, + cg_lf = lf, cg_rep = idCgRep id, cg_tag = tagForCon con } \end{code} @StableLoc@ encodes where an Id can be found, used by @@ -121,7 +146,7 @@ data StableLoc \begin{code} instance Outputable CgIdInfo where - ppr (CgIdInfo id rep vol stb lf) + ppr (CgIdInfo id rep vol stb lf _) -- TODO, pretty pring the tag info = ppr id <+> ptext SLIT("-->") <+> vcat [ppr vol, ppr stb] instance Outputable VolatileLoc where @@ -149,19 +174,29 @@ stableIdInfo id amode lf_info = mkCgIdInfo id NoVolatileLoc (StableLoc amode) heapIdInfo id offset lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info stackIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info -nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc offset) NoStableLoc lf_info +nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset)) NoStableLoc lf_info regIdInfo id reg lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info +taggedStableIdInfo id amode lf_info con + = mkTaggedCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info con +taggedHeapIdInfo id offset lf_info con + = mkTaggedCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info con +untagNodeIdInfo id offset lf_info tag + = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset - tag)) NoStableLoc lf_info + + idInfoToAmode :: CgIdInfo -> FCode CmmExpr idInfoToAmode info = case cg_vol info of { RegLoc reg -> returnFC (CmmReg reg) ; - VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetW (CmmReg nodeReg) nd_off) mach_rep) ; - VirHpLoc hp_off -> getHpRelOffset hp_off ; + VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetB (CmmReg nodeReg) nd_off) + mach_rep) ; + VirHpLoc hp_off -> do { off <- getHpRelOffset hp_off + ; return $! maybeTag off }; NoVolatileLoc -> case cg_stb info of - StableLoc amode -> returnFC amode + StableLoc amode -> returnFC $! maybeTag amode VirStkLoc sp_off -> do { sp_rel <- getSpRelOffset sp_off ; return (CmmLoad sp_rel mach_rep) } @@ -177,6 +212,11 @@ idInfoToAmode info where mach_rep = argMachRep (cg_rep info) + maybeTag amode -- add the tag, if we have one + | tag == 0 = amode + | otherwise = cmmOffsetB amode tag + where tag = cg_tag info + cgIdInfoId :: CgIdInfo -> Id cgIdInfoId = cg_id @@ -389,6 +429,10 @@ bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code bindNewToNode id offset lf_info = addBindC id (nodeIdInfo id offset lf_info) +bindNewToUntagNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Int -> Code +bindNewToUntagNode id offset lf_info tag + = addBindC id (untagNodeIdInfo id offset lf_info tag) + -- Create a new temporary whose unique is that in the id, -- bind the id to it, and return the addressing mode for the -- temporary. diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index fabf434..86e13ab 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -177,7 +177,14 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do -- BUILD ITS INFO TABLE AND CODE ; forkClosureBody (do { -- Bind the fvs - let bind_fv (info, offset) + let + -- A function closure pointer may be tagged, so we + -- must take it into account when accessing the free variables. + mbtag = tagForArity (length args) + bind_fv (info, offset) + | Just tag <- mbtag + = bindNewToUntagNode (cgIdInfoId info) offset (cgIdInfoLF info) tag + | otherwise = bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info) ; mapCs bind_fv bind_details @@ -236,7 +243,7 @@ NB: Thunks cannot have a primitive type! closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do { body_absC <- getCgStmts $ do { tickyEnterThunk cl_info - ; ldvEnter (CmmReg nodeReg) -- NB: Node always points when profiling + ; ldvEnterClosure cl_info -- NB: Node always points when profiling ; thunkWrapper cl_info $ do -- We only enter cc after setting up update so -- that cc of enclosing scope will be recorded @@ -400,8 +407,19 @@ funWrapper :: ClosureInfo -- Closure whose code body this is funWrapper closure_info arg_regs reg_save_code fun_body = do { let node_points = nodeMustPointToIt (closureLFInfo closure_info) + {- + -- Debugging: check that R1 has the correct tag + ; let tag = funTag closure_info + ; whenC (tag /= 0 && node_points) $ do + l <- newLabelC + stmtC (CmmCondBranch (CmmMachOp mo_wordEq [cmmGetTag (CmmReg nodeReg), + CmmLit (mkIntCLit tag)]) l) + stmtC (CmmStore (CmmLit (mkWordCLit 0)) (CmmLit (mkWordCLit 0))) + labelC l + -} + -- Enter for Ldv profiling - ; whenC node_points (ldvEnter (CmmReg nodeReg)) + ; whenC node_points (ldvEnterClosure closure_info) -- GranSim yeild poin ; granYield arg_regs node_points diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index a2c8578..91d7098 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -43,8 +43,10 @@ import Id import Type import PrelInfo import Outputable -import Util import ListSetOps +#ifdef DEBUG +import Util ( lengthIs ) +#endif \end{code} @@ -93,7 +95,7 @@ cgTopRhsCon id con args ; emitDataLits closure_label closure_rep -- RETURN - ; returnFC (id, stableIdInfo id (mkLblExpr closure_label) lf_info) } + ; returnFC (id, taggedStableIdInfo id (mkLblExpr closure_label) lf_info con) } \end{code} %************************************************************************ @@ -134,9 +136,10 @@ at all. \begin{code} buildDynCon binder cc con [] = do this_pkg <- getThisPackage - returnFC (stableIdInfo binder + returnFC (taggedStableIdInfo binder (mkLblExpr (mkClosureLabel this_pkg (dataConName con))) - (mkConLFInfo con)) + (mkConLFInfo con) + con) \end{code} The following three paragraphs about @Char@-like and @Int@-like @@ -170,7 +173,7 @@ buildDynCon binder cc con [arg_amode] offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) -- INTLIKE closures consist of a header and one word payload intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW) - ; returnFC (stableIdInfo binder intlike_amode (mkConLFInfo con)) } + ; returnFC (taggedStableIdInfo binder intlike_amode (mkConLFInfo con) con) } buildDynCon binder cc con [arg_amode] | maybeCharLikeCon con @@ -181,7 +184,7 @@ buildDynCon binder cc con [arg_amode] offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) -- CHARLIKE closures consist of a header and one word payload charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW) - ; returnFC (stableIdInfo binder charlike_amode (mkConLFInfo con)) } + ; returnFC (taggedStableIdInfo binder charlike_amode (mkConLFInfo con) con) } \end{code} Now the general case. @@ -194,7 +197,7 @@ buildDynCon binder ccs con args (closure_info, amodes_w_offsets) = layOutDynConstr this_pkg con args ; hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets - ; returnFC (heapIdInfo binder hp_off lf_info) } + ; returnFC (taggedHeapIdInfo binder hp_off lf_info con) } where lf_info = mkConLFInfo con @@ -223,7 +226,9 @@ bindConArgs :: DataCon -> [Id] -> Code bindConArgs con args = do this_pkg <- getThisPackage let - bind_arg (arg, offset) = bindNewToNode arg offset (mkLFArgument arg) + -- The binding below forces the masking out of the tag bits + -- when accessing the constructor field. + bind_arg (arg, offset) = bindNewToUntagNode arg offset (mkLFArgument arg) (tagForCon con) (_, args_w_offsets) = layOutDynConstr this_pkg con (addIdReps args) -- ASSERT(not (isUnboxedTupleCon con)) return () @@ -386,11 +391,12 @@ cgTyCon tycon -- Put the table after the data constructor decls, because the -- datatype closure table (for enumeration types) -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff + -- Note that the closure pointers are tagged. ; extra <- if isEnumerationTyCon tycon then do tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel (tyConName tycon)) - [ CmmLabel (mkLocalClosureLabel (dataConName con)) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con)) (tagForCon con) | con <- tyConDataCons tycon]) return [tbl] else @@ -434,6 +440,9 @@ cgDataCon data_con body_code = do { -- NB: We don't set CC when entering data (WDP 94/06) tickyReturnOldCon (length arg_things) + -- The case continuation code is expecting a tagged pointer + ; stmtC (CmmAssign nodeReg + (tagCons data_con (CmmReg nodeReg))) ; performReturn emitReturnInstr } -- noStmts: Ptr to thing already in Node diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index 3bba211..b89452e 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -288,6 +288,9 @@ hpStkCheck cl_info is_fun reg_save_code code = noStmts | otherwise = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl))) + -- Strictly speaking, we should tag node here. But if + -- node doesn't point to the closure, the code for the closure + -- cannot depend on the value of R1 anyway, so we're safe. closure_lbl = closureLabelFromCI cl_info full_save_code = node_asst `plusStmts` reg_save_code diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 4e384854..e9751fa 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -15,6 +15,7 @@ module CgInfoTbls ( stdInfoTableSizeB, entryCode, closureInfoPtr, getConstrTag, + cmmGetClosureType, infoTable, infoTableClosureType, infoTablePtrs, infoTableNonPtrs, funInfoTable, makeRelativeRefTo @@ -273,14 +274,24 @@ emitAlgReturnTarget emitAlgReturnTarget name branches mb_deflt fam_sz = do { blks <- getCgStmts $ - emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) - -- NB: tag_expr is zero-based + -- is the constructor tag in the node reg? + if isSmallFamily fam_sz + then do -- yes, node has constr. tag + let tag_expr = cmmConstrTag1 (CmmReg nodeReg) + branches' = [(tag+1,branch)|(tag,branch)<-branches] + emitSwitch tag_expr branches' mb_deflt 1 fam_sz + else do -- no, get tag from info table + let -- Note that ptr _always_ has tag 1 + -- when the family size is big enough + untagged_ptr = cmmRegOffB nodeReg (-1) + tag_expr = getConstrTag (untagged_ptr) + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) ; lbl <- emitReturnTarget name blks ; return (lbl, Nothing) } -- Nothing: the internal branches in the switch don't have -- global labels, so we can't use them at the 'call site' where - tag_expr = getConstrTag (CmmReg nodeReg) + uniq = getUnique name -------------------------------- emitReturnInstr :: Code @@ -346,6 +357,14 @@ getConstrTag closure_ptr where info_table = infoTable (closureInfoPtr closure_ptr) +cmmGetClosureType :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the closure type +-- obtained from the info table +cmmGetClosureType closure_ptr + = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableClosureType info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + infoTable :: CmmExpr -> CmmExpr -- Takes an info pointer (the first word of a closure) -- and returns a pointer to the first word of the standard-form diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index d26d9c6..e489d73 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -183,8 +183,9 @@ emitPrimOp [res] AddrToHValueOp [arg] live = stmtC (CmmAssign (CmmLocal res) arg) -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) +-- Note: argument may be tagged! emitPrimOp [res] DataToTagOp [arg] live - = stmtC (CmmAssign (CmmLocal res) (getConstrTag arg)) + = stmtC (CmmAssign (CmmLocal res) (getConstrTag (cmmUntag arg))) {- Freezing arrays-of-ptrs requires changing an info table, for the benefit of the generational collector. It needs to scavenge mutable diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index 27ee54c..651f0ea 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -20,7 +20,7 @@ module CgProf ( emitSetCCC, emitCCS, -- Lag/drag/void stuff - ldvEnter, ldvRecordCreate + ldvEnter, ldvEnterClosure, ldvRecordCreate ) where #include "HsVersions.h" @@ -242,9 +242,12 @@ enter_cost_centre closure_info ccs body where enc_ccs = CmmLit (mkCCostCentreStack ccs) re_entrant = closureReEntrant closure_info - node_ccs = costCentreFrom (CmmReg nodeReg) + node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) is_box = isBox body + -- if this is a function, then node will be tagged; we must subract the tag + node_tag = funTag closure_info + -- set the current CCS when entering a PAP enterCostCentrePAP :: CmmExpr -> Code enterCostCentrePAP closure = @@ -448,9 +451,14 @@ ldvRecordCreate closure = stmtC $ CmmStore (ldvWord closure) dynLdvInit -- The closure is not IND or IND_OLDGEN because neither is considered for LDV -- profiling. -- +ldvEnterClosure :: ClosureInfo -> Code +ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag)) + where tag = funTag closure_info + -- don't forget to substract node's tag + ldvEnter :: CmmExpr -> Code -- Argument is a closure pointer -ldvEnter cl_ptr +ldvEnter cl_ptr = ifProfiling $ -- if (era > 0) { -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | @@ -458,6 +466,7 @@ ldvEnter cl_ptr emitIf (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) (stmtC (CmmStore ldv_wd new_ldv_wd)) where + -- don't forget to substract node's tag ldv_wd = ldvWord cl_ptr new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep) (CmmLit (mkWordCLit lDV_CREATE_MASK))) diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs index 22cecb7..9527026 100644 --- a/compiler/codeGen/CgTailCall.lhs +++ b/compiler/codeGen/CgTailCall.lhs @@ -27,6 +27,7 @@ import CgUtils import CgTicky import ClosureInfo import SMRep +import MachOp import Cmm import CmmUtils import CLabel @@ -102,7 +103,8 @@ performTailCall fun_info arg_amodes pending_assts | otherwise = do { fun_amode <- idInfoToAmode fun_info - ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode) + ; let assignSt = CmmAssign nodeReg fun_amode + node_asst = oneStmt assignSt opt_node_asst | nodeMustPointToIt lf_info = node_asst | otherwise = noStmts ; EndOfBlockInfo sp _ <- getEndOfBlockInfo @@ -113,8 +115,15 @@ performTailCall fun_info arg_amodes pending_assts -- Node must always point to things we enter EnterIt -> do { emitSimultaneously (node_asst `plusStmts` pending_assts) - ; let target = entryCode (closureInfoPtr (CmmReg nodeReg)) - ; doFinalJump sp False (stmtC (CmmJump target [])) } + ; let target = entryCode (closureInfoPtr (CmmReg nodeReg)) + enterClosure = stmtC (CmmJump target []) + -- If this is a scrutinee + -- let's check if the closure is a constructor + -- so we can directly jump to the alternatives switch + -- statement. + jumpInstr = getEndOfBlockInfo >>= + maybeSwitchOnCons enterClosure + ; doFinalJump sp False jumpInstr } -- A function, but we have zero arguments. It is already in WHNF, -- so we can just return it. @@ -149,6 +158,7 @@ performTailCall fun_info arg_amodes pending_assts ; directCall sp apply_lbl args extra_args (node_asst `plusStmts` pending_assts) + } -- A direct function call (possibly with some left-over arguments) @@ -169,8 +179,58 @@ performTailCall fun_info arg_amodes pending_assts where fun_name = idName (cgIdInfoId fun_info) lf_info = cgIdInfoLF fun_info - - + untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg)) + -- Test if closure is a constructor + maybeSwitchOnCons enterClosure eob + | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob + = do { is_constr <- newLabelC + -- Is the pointer tagged? + -- Yes, jump to switch statement + ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg)) + is_constr) + -- No, enter the closure. + ; enterClosure + ; labelC is_constr + ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) []) + } +{- + -- This is a scrutinee for a case expression + -- so let's see if we can directly inspect the closure + | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob + = do { no_cons <- newLabelC + -- Both the NCG and gcc optimize away the temp + ; z <- newTemp wordRep + ; stmtC (CmmAssign z tag_expr) + ; let tag = CmmReg z + -- Is the closure a cons? + ; stmtC (CmmCondBranch (cond1 tag) no_cons) + ; stmtC (CmmCondBranch (cond2 tag) no_cons) + -- Yes, jump to switch statement + ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) []) + ; labelC no_cons + -- No, enter the closure. + ; enterClosure + } +-} + -- No case expression involved, enter the closure. + | otherwise + = do { stmtC untag_node + ; enterClosure + } + where + --cond1 tag = cmmULtWord tag lowCons + -- More efficient than the above? + tag_expr = cmmGetClosureType (CmmReg nodeReg) + cond1 tag = cmmEqWord tag (CmmLit (mkIntCLit 0)) + cond2 tag = cmmUGtWord tag highCons + lowCons = CmmLit (mkIntCLit 1) + -- CONSTR + highCons = CmmLit (mkIntCLit 8) + -- CONSTR_NOCAF_STATIC (from ClosureType.h) + + +untagCmmAssign (CmmAssign r cmmExpr) = CmmAssign r (cmmUntag cmmExpr) +untagCmmAssign stmt = stmt directCall sp lbl args extra_args assts = do let diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index c66fc9e..8d3578e 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -22,12 +22,17 @@ module CgUtils ( callerSaveVolatileRegs, get_GlobalReg_addr, cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, + cmmUGtWord, cmmOffsetExprW, cmmOffsetExprB, cmmRegOffW, cmmRegOffB, cmmLabelOffW, cmmLabelOffB, cmmOffsetW, cmmOffsetB, cmmOffsetLitW, cmmOffsetLitB, cmmLoadIndexW, + cmmConstrTag, cmmConstrTag1, + + tagForCon, tagCons, isSmallFamily, + cmmUntag, cmmIsTagged, cmmGetTag, addToMem, addToMemE, mkWordCLit, @@ -43,6 +48,7 @@ module CgUtils ( import CgMonad import TyCon +import DataCon import Id import Constants import SMRep @@ -61,7 +67,9 @@ import Util import DynFlags import FastString import PackageConfig +#ifdef DEBUG import Outputable +#endif import Data.Char import Data.Bits @@ -164,6 +172,9 @@ cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2] cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2] cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2] cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2] +--cmmShlWord e1 e2 = CmmMachOp mo_wordShl [e1, e2] +--cmmUShrWord e1 e2 = CmmMachOp mo_wordUShr [e1, e2] +cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2] cmmNegate :: CmmExpr -> CmmExpr cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) @@ -172,6 +183,57 @@ cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e] blankWord :: CmmStatic blankWord = CmmUninitialised wORD_SIZE +-- Tagging -- +-- Tag bits mask +--cmmTagBits = CmmLit (mkIntCLit tAG_BITS) +cmmTagMask = CmmLit (mkIntCLit tAG_MASK) +cmmPointerMask = CmmLit (mkIntCLit (complement tAG_MASK)) + +-- Used to untag a possibly tagged pointer +-- A static label need not be untagged +cmmUntag e@(CmmLit (CmmLabel _)) = e +-- Default case +cmmUntag e = (e `cmmAndWord` cmmPointerMask) + +cmmGetTag e = (e `cmmAndWord` cmmTagMask) + +-- Test if a closure pointer is untagged +cmmIsTagged e = (e `cmmAndWord` cmmTagMask) + `cmmNeWord` CmmLit zeroCLit + +cmmConstrTag e = (e `cmmAndWord` cmmTagMask) `cmmSubWord` (CmmLit (mkIntCLit 1)) +-- Get constructor tag, but one based. +cmmConstrTag1 e = e `cmmAndWord` cmmTagMask + +{- + The family size of a data type (the number of constructors) + can be either: + * small, if the family size < 2**tag_bits + * big, otherwise. + + Small families can have the constructor tag in the tag + bits. + Big families only use the tag value 1 to represent + evaluatedness. +-} +isSmallFamily fam_size = fam_size <= mAX_PTR_TAG + +tagForCon con = tag + where + con_tag = dataConTagZ con + fam_size = tyConFamilySize (dataConTyCon con) + tag | isSmallFamily fam_size = con_tag + 1 + | otherwise = 1 + +--Tag an expression, to do: refactor, this appears in some other module. +tagCons con expr = cmmOffsetB expr (tagForCon con) + +-- Copied from CgInfoTbls.hs +-- We keep the *zero-indexed* tag in the srt_len field of the info +-- table of a data constructor. +dataConTagZ :: DataCon -> ConTagZ +dataConTagZ con = dataConTag con - fIRST_TAG + ----------------------- -- Making literals diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs index d0d2ed9..d537a7b 100644 --- a/compiler/codeGen/ClosureInfo.lhs +++ b/compiler/codeGen/ClosureInfo.lhs @@ -23,7 +23,7 @@ module ClosureInfo ( mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, - mkClosureInfo, mkConInfo, + mkClosureInfo, mkConInfo, maybeIsLFCon, closureSize, closureNonHdrSize, closureGoodStuffSize, closurePtrsSize, @@ -35,6 +35,7 @@ module ClosureInfo ( closureNeedsUpdSpace, closureIsThunk, closureSingleEntry, closureReEntrant, isConstrClosure_maybe, closureFunInfo, isStandardFormThunk, isKnownFun, + funTag, funTagLFInfo, tagForArity, enterIdLabel, enterLocalIdLabel, enterReturnPtLabel, @@ -58,6 +59,7 @@ module ClosureInfo ( #include "../includes/MachDeps.h" #include "HsVersions.h" +--import CgUtils import StgSyn import SMRep @@ -277,6 +279,10 @@ might_be_a_function ty mkConLFInfo :: DataCon -> LambdaFormInfo mkConLFInfo con = LFCon con +maybeIsLFCon :: LambdaFormInfo -> Maybe DataCon +maybeIsLFCon (LFCon con) = Just con +maybeIsLFCon _ = Nothing + mkSelectorLFInfo id offset updatable = LFThunk NotTopLevel False updatable (SelectorThunk offset) (might_be_a_function (idType id)) @@ -804,10 +810,32 @@ isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con isConstrClosure_maybe _ = Nothing closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) -closureFunInfo (ClosureInfo { closureLFInfo = LFReEntrant _ arity _ arg_desc}) - = Just (arity, arg_desc) -closureFunInfo _ - = Nothing +closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info +closureFunInfo _ = Nothing + +lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr) +lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc) +lfFunInfo _ = Nothing + +funTag :: ClosureInfo -> Int +funTag (ClosureInfo { closureLFInfo = lf_info }) = funTagLFInfo lf_info +funTag _ = 0 + +-- maybe this should do constructor tags too? +funTagLFInfo :: LambdaFormInfo -> Int +funTagLFInfo lf + -- A function is tagged with its arity + | Just (arity,_) <- lfFunInfo lf, + Just tag <- tagForArity arity + = tag + + -- other closures (and unknown ones) are not tagged + | otherwise + = 0 + +tagForArity :: Int -> Maybe Int +tagForArity i | i <= mAX_PTR_TAG = Just i + | otherwise = Nothing \end{code} \begin{code} diff --git a/compiler/main/Constants.lhs b/compiler/main/Constants.lhs index 4f13af8..2e0c4d4 100644 --- a/compiler/main/Constants.lhs +++ b/compiler/main/Constants.lhs @@ -6,6 +6,8 @@ \begin{code} module Constants (module Constants) where +import Data.Bits (shiftL) + -- This magical #include brings in all the everybody-knows-these magic -- constants unfortunately, we need to be *explicit* about which one -- we want; if we just hope a -I... will get the right one, we could @@ -108,6 +110,14 @@ wORD_SIZE = (SIZEOF_HSWORD :: Int) wORD_SIZE_IN_BITS = wORD_SIZE * 8 :: Int \end{code} +Amount of pointer bits used for semi-tagging constructor closures + +\begin{code} +tAG_BITS = (TAG_BITS :: Int) +tAG_MASK = ((1 `shiftL` tAG_BITS) - 1) :: Int +mAX_PTR_TAG = tAG_MASK :: Int +\end{code} + Size of a C int, in bytes. May be smaller than wORD_SIZE. \begin{code} diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs index 2c07016..cc94074 100644 --- a/compiler/nativeGen/MachCodeGen.hs +++ b/compiler/nativeGen/MachCodeGen.hs @@ -2216,6 +2216,18 @@ condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do -- return (CondCode False cond code) +-- anything vs zero, using a mask +-- TODO: Add some sanity checking!!!! +condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk)) + | (CmmLit (CmmInt mask pk2)) <- o2 + = do + (x_reg, x_code) <- getSomeReg x + let + code = x_code `snocOL` + TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg) + -- + return (CondCode False cond code) + -- anything vs zero condIntCode cond x (CmmLit (CmmInt 0 pk)) = do (x_reg, x_code) <- getSomeReg x diff --git a/includes/Closures.h b/includes/Closures.h index 64582ba..df53cee 100644 --- a/includes/Closures.h +++ b/includes/Closures.h @@ -306,7 +306,8 @@ typedef struct { */ typedef struct { const struct _StgInfoTable* info; - StgWord size; + StgHalfWord size; + StgHalfWord tag; StgClosure * fun; StgClosure * payload[FLEXIBLE_ARRAY]; } StgRetFun; diff --git a/includes/Cmm.h b/includes/Cmm.h index b23a37b..cecf926 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -91,12 +91,34 @@ #if SIZEOF_VOID_P == 4 #define W_ bits32 +/* Maybe it's better to include MachDeps.h */ +#define TAG_BITS 2 #elif SIZEOF_VOID_P == 8 #define W_ bits64 +/* Maybe it's better to include MachDeps.h */ +#define TAG_BITS 3 #else #error Unknown word size #endif +/* + * The RTS must UNTAG a pointer before dereferencing it. + * The use of UNTAG follows the following rules of thumb: + * + * - Any pointer might be tagged. + * - Except the pointers that are passed in R1 to RTS functions. + * - R1 is also untagged when entering constructor code. + * + * TODO: + * + * - Remove redundancies of tagging and untagging in code generation. + * - Optimize getTag or dataToTag# ? + * + */ +#define TAG_MASK ((1 << TAG_BITS) - 1) +#define UNTAG(p) (p & ~TAG_MASK) +#define GETTAG(p) (p & TAG_MASK) + #if SIZEOF_INT == 4 #define CInt bits32 #elif SIZEOF_INT == 8 @@ -228,11 +250,23 @@ ToDo: range should end in N_CLOSURE_TYPES-1, not N_CLOSURE_TYPES, but switch doesn't allow us to use exprs there yet. + + If R1 points to a tagged object it points either to + * A constructor. + * A function with arity <= TAG_MASK. + In both cases the right thing to do is to return. + Note: it is rather lucky that we can use the tag bits to do this + for both objects. Maybe it points to a brittle design? + + Indirections can contain tagged pointers, so their tag is checked. -------------------------------------------------------------------------- */ #define ENTER() \ again: \ W_ info; \ + if (GETTAG(R1) != 0) { \ + jump %ENTRY_CODE(Sp(0)); \ + } \ info = %INFO_PTR(R1); \ switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \ (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \ @@ -247,14 +281,13 @@ goto again; \ } \ case \ - BCO, \ FUN, \ FUN_1_0, \ FUN_0_1, \ FUN_2_0, \ FUN_1_1, \ - FUN_0_2, \ - FUN_STATIC, \ + FUN_STATIC, \ + BCO, \ PAP: \ { \ jump %ENTRY_CODE(Sp(0)); \ @@ -265,6 +298,10 @@ } \ } +// The FUN cases almost never happen: a pointer to a non-static FUN +// should always be tagged. This unfortunately isn't true for the +// interpreter right now, which leaves untagged FUNs on the stack. + /* ----------------------------------------------------------------------------- Constants. -------------------------------------------------------------------------- */ @@ -375,7 +412,7 @@ (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \ (TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES)) -#define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(p))) +#define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p)))) /* * The layout of the StgFunInfoExtra part of an info table changes diff --git a/includes/InfoTables.h b/includes/InfoTables.h index a8e76b0..bbffea6 100644 --- a/includes/InfoTables.h +++ b/includes/InfoTables.h @@ -164,7 +164,7 @@ typedef struct { extern StgWord16 closure_flags[]; -#define closureFlags(c) (closure_flags[get_itbl(c)->type]) +#define closureFlags(c) (closure_flags[get_itbl(UNTAG_CLOSURE(c))->type]) #define closure_HNF(c) ( closureFlags(c) & _HNF) #define closure_BITMAP(c) ( closureFlags(c) & _BTM) diff --git a/includes/MachDeps.h b/includes/MachDeps.h index abe4405..7b71f7c 100644 --- a/includes/MachDeps.h +++ b/includes/MachDeps.h @@ -105,4 +105,14 @@ #endif #endif +#ifndef TAG_BITS +#if SIZEOF_HSWORD == 4 +#define TAG_BITS 2 +#else +#define TAG_BITS 3 +#endif +#endif + +#define TAG_MASK ((1 << TAG_BITS) - 1) + #endif /* MACHDEPS_H */ diff --git a/includes/Rts.h b/includes/Rts.h index d009618..eba8146 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -107,6 +107,29 @@ extern void _assertFail (const char *, unsigned int); #define FMT_Int64 "lld" #endif +/* + * Macros for untagging and retagging closure pointers + * For more information look at the comments in Cmm.h + */ + +static inline StgWord +GET_CLOSURE_TAG(StgClosure * p) +{ + return (StgWord)p & TAG_MASK; +} + +static inline StgClosure * +UNTAG_CLOSURE(StgClosure * p) +{ + return (StgClosure*)((StgWord)p & ~TAG_MASK); +} + +static inline StgClosure * +TAG_CLOSURE(StgWord tag,StgClosure * p) +{ + return (StgClosure*)((StgWord)p | tag); +} + /* ----------------------------------------------------------------------------- Include everything STG-ish -------------------------------------------------------------------------- */ @@ -207,6 +230,23 @@ extern void stg_exit(int n) GNU_ATTRIBUTE(__noreturn__); /* declarations for runtime flags/values */ #define MAX_RTS_ARGS 32 +#ifdef DEBUG +#define TICK_VAR(arity) \ + extern StgInt SLOW_CALLS_##arity; \ + extern StgInt RIGHT_ARITY_##arity; \ + extern StgInt TAGGED_PTR_##arity; + +#define TICK_VAR_INI(arity) \ + StgInt SLOW_CALLS_##arity = 1; \ + StgInt RIGHT_ARITY_##arity = 1; \ + StgInt TAGGED_PTR_##arity = 0; + +extern StgInt TOTAL_CALLS; + +TICK_VAR(1) +TICK_VAR(2) +#endif + /* ----------------------------------------------------------------------------- Assertions and Debuggery -------------------------------------------------------------------------- */ diff --git a/includes/Storage.h b/includes/Storage.h index 604e49e..92a856c 100644 --- a/includes/Storage.h +++ b/includes/Storage.h @@ -303,7 +303,7 @@ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p); ((StgInfoTable *)(INFO_PTR_TO_STRUCT(p)))->type < N_CLOSURE_TYPES) #define LOOKS_LIKE_CLOSURE_PTR(p) \ - (LOOKS_LIKE_INFO_PTR(((StgClosure *)(p))->header.info)) + (LOOKS_LIKE_INFO_PTR((UNTAG_CLOSURE((StgClosure *)(p)))->header.info)) /* ----------------------------------------------------------------------------- Macros for calculating how big a closure will be (used during allocation) diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c index 2fe99b6..aa3c673 100644 --- a/includes/mkDerivedConstants.c +++ b/includes/mkDerivedConstants.c @@ -403,6 +403,10 @@ main(int argc, char *argv[]) struct_field(StgLargeBitmap, size); field_offset(StgLargeBitmap, bitmap); + struct_field(StgRetFun, size); + struct_field(StgRetFun, tag); + struct_field(StgRetFun, fun); + struct_size(snEntry); struct_field(snEntry,sn_obj); struct_field(snEntry,addr); diff --git a/rts/Apply.cmm b/rts/Apply.cmm index e0ca039..cf8a108 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -90,8 +90,6 @@ stg_PAP_apply // Enter PAP cost centre ENTER_CCS_PAP_CL(pap); - R1 = StgPAP_fun(pap); - // Reload the stack W_ i; W_ p; @@ -105,14 +103,30 @@ for: goto for; } + R1 = StgPAP_fun(pap); + +/* DEBUGGING CODE, ensures that arity 1 and 2 functions are entered tagged + if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 1 ) { + if (GETTAG(R1)!=1) { + W_[0]=1; + } + } + + if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 2 ) { + if (GETTAG(R1)!=2) { + W_[0]=1; + } + } +*/ + // Off we go! TICK_ENT_VIA_NODE(); #ifdef NO_ARG_REGS - jump %GET_ENTRY(R1); + jump %GET_ENTRY(UNTAG(R1)); #else W_ info; - info = %GET_FUN_INFO(R1); + info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { @@ -167,8 +181,6 @@ INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP") // Enter PAP cost centre ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL - R1 = StgAP_fun(ap); - // Reload the stack W_ i; W_ p; @@ -182,14 +194,16 @@ for: goto for; } + R1 = StgAP_fun(ap); + // Off we go! TICK_ENT_VIA_NODE(); #ifdef NO_ARG_REGS - jump %GET_ENTRY(R1); + jump %GET_ENTRY(UNTAG(R1)); #else W_ info; - info = %GET_FUN_INFO(R1); + info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { @@ -246,8 +260,6 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") // Enter PAP cost centre ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL - R1 = StgAP_STACK_fun(ap); - // Reload the stack W_ i; W_ p; @@ -264,5 +276,7 @@ for: // Off we go! TICK_ENT_VIA_NODE(); + R1 = StgAP_STACK_fun(ap); + ENTER(); } diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm index f40fbf5..3c66e78 100644 --- a/rts/HeapStackCheck.cmm +++ b/rts/HeapStackCheck.cmm @@ -551,6 +551,8 @@ INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, "ptr" W_ unused ) +---------------------+ | f_closure | +---------------------+ + | tag | + +- - - - - - - - - - -+ | size | +---------------------+ | stg_gc_fun_info | @@ -567,8 +569,11 @@ __stg_gc_fun W_ size; W_ info; W_ type; + W_ tag; + W_ ret_fun; - info = %GET_FUN_INFO(R1); + tag = GETTAG(R1); + info = %GET_FUN_INFO(UNTAG(R1)); // cache the size type = TO_W_(StgFunInfoExtra_fun_type(info)); @@ -579,7 +584,7 @@ __stg_gc_fun #ifdef TABLES_NEXT_TO_CODE // bitmap field holds an offset size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) - + %GET_ENTRY(R1) /* ### */ ); + + %GET_ENTRY(UNTAG(R1)) /* ### */ ); #else size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) ); #endif @@ -591,9 +596,11 @@ __stg_gc_fun #ifdef NO_ARG_REGS // we don't have to save any registers away Sp_adj(-3); - Sp(2) = R1; - Sp(1) = size; Sp(0) = stg_gc_fun_info; + ret_fun = Sp; + StgRetFun_size(ret_fun) = HALF_W_(size); + StgRetFun_tag(ret_fun) = HALF_W_(tag); + StgRetFun_fun(ret_fun) = R1; GC_GENERIC #else W_ type; @@ -602,9 +609,11 @@ __stg_gc_fun if (type == ARG_GEN || type == ARG_GEN_BIG) { // regs already saved by the heap check code Sp_adj(-3); - Sp(2) = R1; - Sp(1) = size; Sp(0) = stg_gc_fun_info; + ret_fun = Sp; + StgRetFun_size(ret_fun) = HALF_W_(size); + StgRetFun_tag(ret_fun) = HALF_W_(tag); + StgRetFun_fun(ret_fun) = R1; // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)");); GC_GENERIC } else { @@ -624,17 +633,22 @@ __stg_gc_fun INFO_TABLE_RET( stg_gc_fun, RET_FUN ) { - R1 = Sp(2); + // Grab the fun, but remember to add in the tag. The GC doesn't + // guarantee to retain the tag on the pointer, so we have to do + // it manually, because the function entry code assumes it. + W_ ret_fun; + ret_fun = Sp; + R1 = StgRetFun_fun(ret_fun) | TO_W_(StgRetFun_tag(ret_fun)); Sp_adj(3); #ifdef NO_ARG_REGS // Minor optimisation: there are no argument registers to load up, // so we can just jump straight to the function's entry point. - jump %GET_ENTRY(R1); + jump %GET_ENTRY(UNTAG(R1)); #else W_ info; W_ type; - info = %GET_FUN_INFO(R1); + info = %GET_FUN_INFO(UNTAG(R1)); type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN || type == ARG_GEN_BIG) { jump StgFunInfoExtra_slow_apply(info); diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 6663445..527ebde 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -224,7 +224,7 @@ interpretBCO (Capability* cap) // +---------------+ // else if (Sp[0] == (W_)&stg_apply_interp_info) { - obj = (StgClosure *)Sp[1]; + obj = UNTAG_CLOSURE((StgClosure *)Sp[1]); Sp += 2; goto run_BCO_fun; } @@ -244,6 +244,7 @@ eval: obj = (StgClosure*)Sp[0]; Sp++; eval_obj: + obj = UNTAG_CLOSURE(obj); INTERP_TICK(it_total_evals); IF_DEBUG(interpreter, @@ -327,7 +328,7 @@ eval_obj: Sp[i] = (W_)ap->payload[i]; } - obj = (StgClosure*)ap->fun; + obj = UNTAG_CLOSURE((StgClosure*)ap->fun); ASSERT(get_itbl(obj)->type == BCO); goto run_BCO_fun; } @@ -531,7 +532,7 @@ do_apply: pap = (StgPAP *)obj; // we only cope with PAPs whose function is a BCO - if (get_itbl(pap->fun)->type != BCO) { + if (get_itbl(UNTAG_CLOSURE(pap->fun))->type != BCO) { goto defer_apply_to_sched; } @@ -556,7 +557,7 @@ do_apply: for (i = 0; i < pap->n_args; i++) { Sp[i] = (W_)pap->payload[i]; } - obj = pap->fun; + obj = UNTAG_CLOSURE(pap->fun); goto run_BCO_fun; } else if (arity == n) { @@ -564,7 +565,7 @@ do_apply: for (i = 0; i < pap->n_args; i++) { Sp[i] = (W_)pap->payload[i]; } - obj = pap->fun; + obj = UNTAG_CLOSURE(pap->fun); goto run_BCO_fun; } else /* arity > n */ { diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 7c75fca..cb8626e 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1869,7 +1869,7 @@ unpackClosurezh_fast // TODO: Consider the absence of ptrs or nonptrs as a special case ? W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr; - info = %GET_STD_INFO(R1); + info = %GET_STD_INFO(UNTAG(R1)); // Some closures have non-standard layout, so we omit those here. W_ type; @@ -1899,6 +1899,9 @@ out: ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast); + W_ clos; + clos = UNTAG(R1); + ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1); nptrs_arr = Hp - nptrs_arr_sz + WDS(1); @@ -1907,7 +1910,7 @@ out: p = 0; for: if(p < ptrs) { - W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(R1,p); + W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p); p = p + 1; goto for; } @@ -1917,7 +1920,7 @@ for: p = 0; for2: if(p < nptrs) { - W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(R1, p+ptrs); + W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs); p = p + 1; goto for2; } diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index 9f29aca..2613b9e 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -1486,7 +1486,9 @@ retainStack( StgClosure *c, retainer c_child_r, * ------------------------------------------------------------------------- */ static INLINE StgPtr -retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun, +retain_PAP_payload (StgClosure *pap, /* NOT tagged */ + retainer c_child_r, /* NOT tagged */ + StgClosure *fun, /* tagged */ StgClosure** payload, StgWord n_args) { StgPtr p; @@ -1494,6 +1496,7 @@ retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun, StgFunInfoTable *fun_info; retainClosure(fun, pap, c_child_r); + fun = UNTAG_CLOSURE(fun); fun_info = get_fun_itbl(fun); ASSERT(fun_info->i.type != PAP); @@ -1542,9 +1545,9 @@ retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun, static void retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 ) { - // c = Current closure - // cp = Current closure's Parent - // r = current closures' most recent Retainer + // c = Current closure (possibly tagged) + // cp = Current closure's Parent (NOT tagged) + // r = current closures' most recent Retainer (NOT tagged) // c_child_r = current closure's children's most recent retainer // first_child = first child of c StgClosure *c, *cp, *first_child; @@ -1582,6 +1585,8 @@ loop: //debugBelch("inner_loop"); inner_loop: + c = UNTAG_CLOSURE(c); + // c = current closure under consideration, // cp = current closure's parent, // r = current closure's most recent retainer @@ -1794,16 +1799,19 @@ inner_loop: static void retainRoot( StgClosure **tl ) { + StgClosure *c; + // We no longer assume that only TSOs and WEAKs are roots; any closure can // be a root. ASSERT(isEmptyRetainerStack()); currentStackBoundary = stackTop; - if (*tl != &stg_END_TSO_QUEUE_closure && isRetainer(*tl)) { - retainClosure(*tl, *tl, getRetainerFrom(*tl)); + c = UNTAG_CLOSURE(*tl); + if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) { + retainClosure(c, c, getRetainerFrom(c)); } else { - retainClosure(*tl, *tl, CCS_SYSTEM); + retainClosure(c, c, CCS_SYSTEM); } // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl))); diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c index 69fac8d..716b4a2 100644 --- a/rts/RtsAPI.c +++ b/rts/RtsAPI.c @@ -22,6 +22,10 @@ /* ---------------------------------------------------------------------------- Building Haskell objects from C datatypes. + + TODO: Currently this code does not tag created pointers, + however it is not unsafe (the contructor code will do it) + just inefficient. ------------------------------------------------------------------------- */ HaskellObj rts_mkChar (Capability *cap, HsChar c) @@ -221,7 +225,7 @@ rts_getChar (HaskellObj p) // See comment above: // ASSERT(p->header.info == Czh_con_info || // p->header.info == Czh_static_info); - return (StgChar)(StgWord)(p->payload[0]); + return (StgChar)(StgWord)(UNTAG_CLOSURE(p)->payload[0]); } HsInt @@ -230,7 +234,7 @@ rts_getInt (HaskellObj p) // See comment above: // ASSERT(p->header.info == Izh_con_info || // p->header.info == Izh_static_info); - return (HsInt)(p->payload[0]); + return (HsInt)(UNTAG_CLOSURE(p)->payload[0]); } HsInt8 @@ -239,7 +243,7 @@ rts_getInt8 (HaskellObj p) // See comment above: // ASSERT(p->header.info == I8zh_con_info || // p->header.info == I8zh_static_info); - return (HsInt8)(HsInt)(p->payload[0]); + return (HsInt8)(HsInt)(UNTAG_CLOSURE(p)->payload[0]); } HsInt16 @@ -248,7 +252,7 @@ rts_getInt16 (HaskellObj p) // See comment above: // ASSERT(p->header.info == I16zh_con_info || // p->header.info == I16zh_static_info); - return (HsInt16)(HsInt)(p->payload[0]); + return (HsInt16)(HsInt)(UNTAG_CLOSURE(p)->payload[0]); } HsInt32 @@ -257,7 +261,7 @@ rts_getInt32 (HaskellObj p) // See comment above: // ASSERT(p->header.info == I32zh_con_info || // p->header.info == I32zh_static_info); - return (HsInt32)(HsInt)(p->payload[0]); + return (HsInt32)(HsInt)(UNTAG_CLOSURE(p)->payload[0]); } HsInt64 @@ -267,7 +271,7 @@ rts_getInt64 (HaskellObj p) // See comment above: // ASSERT(p->header.info == I64zh_con_info || // p->header.info == I64zh_static_info); - tmp = (HsInt64*)&(p->payload[0]); + tmp = (HsInt64*)&(UNTAG_CLOSURE(p)->payload[0]); return *tmp; } HsWord @@ -276,7 +280,7 @@ rts_getWord (HaskellObj p) // See comment above: // ASSERT(p->header.info == Wzh_con_info || // p->header.info == Wzh_static_info); - return (HsWord)(p->payload[0]); + return (HsWord)(UNTAG_CLOSURE(p)->payload[0]); } HsWord8 @@ -285,7 +289,7 @@ rts_getWord8 (HaskellObj p) // See comment above: // ASSERT(p->header.info == W8zh_con_info || // p->header.info == W8zh_static_info); - return (HsWord8)(HsWord)(p->payload[0]); + return (HsWord8)(HsWord)(UNTAG_CLOSURE(p)->payload[0]); } HsWord16 @@ -294,7 +298,7 @@ rts_getWord16 (HaskellObj p) // See comment above: // ASSERT(p->header.info == W16zh_con_info || // p->header.info == W16zh_static_info); - return (HsWord16)(HsWord)(p->payload[0]); + return (HsWord16)(HsWord)(UNTAG_CLOSURE(p)->payload[0]); } HsWord32 @@ -303,7 +307,7 @@ rts_getWord32 (HaskellObj p) // See comment above: // ASSERT(p->header.info == W32zh_con_info || // p->header.info == W32zh_static_info); - return (HsWord32)(HsWord)(p->payload[0]); + return (HsWord32)(HsWord)(UNTAG_CLOSURE(p)->payload[0]); } @@ -314,7 +318,7 @@ rts_getWord64 (HaskellObj p) // See comment above: // ASSERT(p->header.info == W64zh_con_info || // p->header.info == W64zh_static_info); - tmp = (HsWord64*)&(p->payload[0]); + tmp = (HsWord64*)&(UNTAG_CLOSURE(p)->payload[0]); return *tmp; } @@ -324,7 +328,7 @@ rts_getFloat (HaskellObj p) // See comment above: // ASSERT(p->header.info == Fzh_con_info || // p->header.info == Fzh_static_info); - return (float)(PK_FLT((P_)p->payload)); + return (float)(PK_FLT((P_)UNTAG_CLOSURE(p)->payload)); } HsDouble @@ -333,7 +337,7 @@ rts_getDouble (HaskellObj p) // See comment above: // ASSERT(p->header.info == Dzh_con_info || // p->header.info == Dzh_static_info); - return (double)(PK_DBL((P_)p->payload)); + return (double)(PK_DBL((P_)UNTAG_CLOSURE(p)->payload)); } HsStablePtr @@ -342,7 +346,7 @@ rts_getStablePtr (HaskellObj p) // See comment above: // ASSERT(p->header.info == StablePtr_con_info || // p->header.info == StablePtr_static_info); - return (StgStablePtr)(p->payload[0]); + return (StgStablePtr)(UNTAG_CLOSURE(p)->payload[0]); } HsPtr @@ -351,7 +355,7 @@ rts_getPtr (HaskellObj p) // See comment above: // ASSERT(p->header.info == Ptr_con_info || // p->header.info == Ptr_static_info); - return (Capability *)(p->payload[0]); + return (Capability *)(UNTAG_CLOSURE(p)->payload[0]); } HsFunPtr @@ -360,7 +364,7 @@ rts_getFunPtr (HaskellObj p) // See comment above: // ASSERT(p->header.info == FunPtr_con_info || // p->header.info == FunPtr_static_info); - return (void *)(p->payload[0]); + return (void *)(UNTAG_CLOSURE(p)->payload[0]); } HsBool @@ -368,7 +372,7 @@ rts_getBool (HaskellObj p) { StgInfoTable *info; - info = get_itbl((StgClosure *)p); + info = get_itbl((StgClosure *)UNTAG_CLOSURE(p)); if (info->srt_bitmap == 0) { // srt_bitmap is the constructor tag return 0; } else { diff --git a/rts/Sanity.c b/rts/Sanity.c index 7de8ec7..a2ddff8 100644 --- a/rts/Sanity.c +++ b/rts/Sanity.c @@ -80,13 +80,16 @@ checkLargeBitmap( StgPtr payload, StgLargeBitmap* large_bitmap, nat size ) static void checkClosureShallow( StgClosure* p ) { - ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); + StgClosure *q; + + q = UNTAG_CLOSURE(p); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); /* Is it a static closure? */ - if (!HEAP_ALLOCED(p)) { - ASSERT(closure_STATIC(p)); + if (!HEAP_ALLOCED(q)) { + ASSERT(closure_STATIC(q)); } else { - ASSERT(!closure_STATIC(p)); + ASSERT(!closure_STATIC(q)); } } @@ -162,7 +165,7 @@ checkStackFrame( StgPtr c ) StgRetFun *ret_fun; ret_fun = (StgRetFun *)c; - fun_info = get_fun_itbl(ret_fun->fun); + fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun)); size = ret_fun->size; switch (fun_info->f.fun_type) { case ARG_GEN: @@ -206,6 +209,7 @@ checkPAP (StgClosure *fun, StgClosure** payload, StgWord n_args) StgClosure *p; StgFunInfoTable *fun_info; + fun = UNTAG_CLOSURE(fun); ASSERT(LOOKS_LIKE_CLOSURE_PTR(fun)); fun_info = get_fun_itbl(fun); @@ -241,6 +245,7 @@ checkClosure( StgClosure* p ) ASSERT(LOOKS_LIKE_INFO_PTR(p->header.info)); + p = UNTAG_CLOSURE(p); /* Is it a static closure (i.e. in the data segment)? */ if (!HEAP_ALLOCED(p)) { ASSERT(closure_STATIC(p)); @@ -815,7 +820,7 @@ checkStaticObjects ( StgClosure* static_objects ) switch (info->type) { case IND_STATIC: { - StgClosure *indirectee = ((StgIndStatic *)p)->indirectee; + StgClosure *indirectee = UNTAG_CLOSURE(((StgIndStatic *)p)->indirectee); ASSERT(LOOKS_LIKE_CLOSURE_PTR(indirectee)); ASSERT(LOOKS_LIKE_INFO_PTR(indirectee->header.info)); diff --git a/rts/Sparks.c b/rts/Sparks.c index ca60e13..0ff4ee4 100644 --- a/rts/Sparks.c +++ b/rts/Sparks.c @@ -200,6 +200,12 @@ newSpark (StgRegTable *reg, StgClosure *p) { StgSparkPool *pool = &(reg->rSparks); + /* I am not sure whether this is the right thing to do. + * Maybe it is better to exploit the tag information + * instead of throwing it away? + */ + p = UNTAG_CLOSURE(p); + ASSERT_SPARK_POOL_INVARIANTS(pool); if (closure_SHOULD_SPARK(p)) { diff --git a/rts/Stable.c b/rts/Stable.c index e5e8dfb..0ed18bc 100644 --- a/rts/Stable.c +++ b/rts/Stable.c @@ -177,6 +177,9 @@ exitStablePtrTable(void) /* * get at the real stuff...remove indirections. + * It untags pointers before dereferencing and + * retags the real stuff with its tag (if there + * is any) when returning. * * ToDo: move to a better home. */ @@ -184,16 +187,18 @@ static StgClosure* removeIndirections(StgClosure* p) { - StgClosure* q = p; + StgWord tag = GET_CLOSURE_TAG(p); + StgClosure* q = UNTAG_CLOSURE(p); while (get_itbl(q)->type == IND || get_itbl(q)->type == IND_STATIC || get_itbl(q)->type == IND_OLDGEN || get_itbl(q)->type == IND_PERM || get_itbl(q)->type == IND_OLDGEN_PERM ) { - q = ((StgInd *)q)->indirectee; + tag = GET_CLOSURE_TAG(q); + q = UNTAG_CLOSURE(((StgInd *)q)->indirectee); } - return q; + return TAG_CLOSURE(tag,q); } static StgWord diff --git a/rts/Stats.c b/rts/Stats.c index 9342118..f18e26f 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -441,6 +441,52 @@ stat_endHeapCensus(void) were left unused when the heap-check failed. -------------------------------------------------------------------------- */ +#ifdef DEBUG +#define TICK_VAR(arity) \ + extern StgInt SLOW_CALLS_##arity; \ + extern StgInt RIGHT_ARITY_##arity; \ + extern StgInt TAGGED_PTR_##arity; + +#define TICK_VAR_INI(arity) \ + StgInt SLOW_CALLS_##arity = 1; \ + StgInt RIGHT_ARITY_##arity = 1; \ + StgInt TAGGED_PTR_##arity = 0; + +extern StgInt TOTAL_CALLS; + +TICK_VAR(1) +TICK_VAR(2) + +TICK_VAR_INI(1) +TICK_VAR_INI(2) + +StgInt TOTAL_CALLS=1; +#endif + +/* Report the value of a counter */ +#define REPORT(counter) \ + { \ + ullong_format_string(counter,temp,rtsTrue/*commas*/); \ + statsPrintf(" (" #counter ") : %s\n",temp); \ + } + +/* Report the value of a counter as a percentage of another counter */ +#define REPORT_PCT(counter,countertot) \ + statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \ + counter*100.0/countertot) + +#define TICK_PRINT(arity) \ + REPORT(SLOW_CALLS_##arity); \ + REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \ + REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \ + REPORT(RIGHT_ARITY_##arity); \ + REPORT(TAGGED_PTR_##arity) + +#define TICK_PRINT_TOT(arity) \ + statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \ + SLOW_CALLS_##arity * 100.0/TOTAL_CALLS) + + void stat_exit(int alloc) { @@ -557,6 +603,15 @@ stat_exit(int alloc) TICK_TO_DBL(time - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100 / TICK_TO_DBL(etime)); + + /* + TICK_PRINT(1); + TICK_PRINT(2); + REPORT(TOTAL_CALLS); + TICK_PRINT_TOT(1); + TICK_PRINT_TOT(2); + */ + #if USE_PAPI /* PAPI reporting, should put somewhere else? * Note that the cycles are counted _after_ the initialization of the RTS -- AR */ diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index e092e3f..58cbaf9 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -167,7 +167,7 @@ INFO_TABLE_FUN( stg_BCO, 4, 0, BCO, "BCO", "BCO", ARG_BCO ) INFO_TABLE(stg_IND,1,0,IND,"IND","IND") { TICK_ENT_DYN_IND(); /* tick */ - R1 = StgInd_indirectee(R1); + R1 = UNTAG(StgInd_indirectee(R1)); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(R1); } @@ -183,7 +183,7 @@ INFO_TABLE(stg_IND_direct,1,0,IND,"IND","IND") INFO_TABLE(stg_IND_STATIC,1,0,IND_STATIC,"IND_STATIC","IND_STATIC") { TICK_ENT_STATIC_IND(); /* tick */ - R1 = StgInd_indirectee(R1); + R1 = UNTAG(StgInd_indirectee(R1)); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(R1); } @@ -220,7 +220,7 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM") StgHeader_info(R1) = stg_IND_info; #endif /* TICKY_TICKY */ - R1 = StgInd_indirectee(R1); + R1 = UNTAG(StgInd_indirectee(R1)); #if defined(TICKY_TICKY) && !defined(PROFILING) TICK_ENT_VIA_NODE(); @@ -233,7 +233,7 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM") INFO_TABLE(stg_IND_OLDGEN,1,0,IND_OLDGEN,"IND_OLDGEN","IND_OLDGEN") { TICK_ENT_STATIC_IND(); /* tick */ - R1 = StgInd_indirectee(R1); + R1 = UNTAG(StgInd_indirectee(R1)); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(R1); } @@ -262,7 +262,7 @@ INFO_TABLE(stg_IND_OLDGEN_PERM,1,0,IND_OLDGEN_PERM,"IND_OLDGEN_PERM","IND_OLDGEN StgHeader_info(R1) = stg_IND_OLDGEN_info; #endif /* TICKY_TICKY */ - R1 = StgInd_indirectee(R1); + R1 = UNTAG(StgInd_indirectee(R1)); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(R1); diff --git a/rts/StgStartup.cmm b/rts/StgStartup.cmm index 5b0f7e2..b5a5cdc 100644 --- a/rts/StgStartup.cmm +++ b/rts/StgStartup.cmm @@ -142,6 +142,7 @@ stg_threadFinished forceIO takes care of this, performing the IO action and entering the results that comes back. + ------------------------------------------------------------------------- */ INFO_TABLE_RET( stg_forceIO, RET_SMALL) diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm index db9c254..20ceb6a 100644 --- a/rts/StgStdThunks.cmm +++ b/rts/StgStdThunks.cmm @@ -39,10 +39,23 @@ #define RET_PARAMS #endif +/* + * TODO: On return, we can use a more efficient + * untagging (we know the constructor tag). + * + * When entering stg_sel_#_upd, we know R1 points to its closure, + * so it's untagged. + * The payload might be a thunk or a constructor, + * so we enter it. + * + * When returning, we know for sure it is a constructor, + * so we untag it before accessing the field. + * + */ #define SELECTOR_CODE_UPD(offset) \ INFO_TABLE_RET(stg_sel_ret_##offset##_upd, RET_SMALL, RET_PARAMS) \ { \ - R1 = StgClosure_payload(R1,offset); \ + R1 = StgClosure_payload(UNTAG(R1),offset); \ GET_SAVED_CCCS; \ Sp = Sp + SIZEOF_StgHeader; \ ENTER(); \ @@ -58,8 +71,11 @@ ENTER_CCS_THUNK(R1); \ SAVE_CCCS(WITHUPD_FRAME_SIZE); \ W_[Sp-WITHUPD_FRAME_SIZE] = stg_sel_ret_##offset##_upd_info; \ - R1 = StgThunk_payload(R1,0); \ Sp = Sp - WITHUPD_FRAME_SIZE; \ + R1 = StgThunk_payload(R1,0); \ + if (GETTAG(R1) != 0) { \ + jump RET_LBL(stg_sel_ret_##offset##_upd); \ + } \ jump %GET_ENTRY(R1); \ } /* NOTE: no need to ENTER() here, we know the closure cannot evaluate to a function, @@ -85,10 +101,10 @@ SELECTOR_CODE_UPD(15) #define SELECTOR_CODE_NOUPD(offset) \ INFO_TABLE_RET(stg_sel_ret_##offset##_noupd, RET_SMALL, RET_PARAMS) \ { \ - R1 = StgClosure_payload(R1,offset); \ + R1 = StgClosure_payload(UNTAG(R1),offset); \ GET_SAVED_CCCS; \ Sp = Sp + SIZEOF_StgHeader; \ - jump %GET_ENTRY(R1); \ + ENTER(); \ } \ \ INFO_TABLE_SELECTOR(stg_sel_##offset##_noupd, offset, THUNK_SELECTOR, "stg_sel_noupd", "stg_sel_noupd")\ @@ -101,8 +117,11 @@ SELECTOR_CODE_UPD(15) ENTER_CCS_THUNK(R1); \ SAVE_CCCS(NOUPD_FRAME_SIZE); \ W_[Sp-NOUPD_FRAME_SIZE] = stg_sel_ret_##offset##_noupd_info; \ - R1 = StgThunk_payload(R1,0); \ Sp = Sp - NOUPD_FRAME_SIZE; \ + R1 = StgThunk_payload(R1,0); \ + if (GETTAG(R1) != 0) { \ + jump RET_LBL(stg_sel_ret_##offset##_noupd); \ + } \ jump %GET_ENTRY(R1); \ } diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index feebef8..e8d1540 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -55,23 +55,32 @@ STATIC_INLINE void thread (StgClosure **p) { - StgPtr q = *(StgPtr *)p; + StgClosure *q0 = *p; + StgPtr q = (StgPtr)UNTAG_CLOSURE(q0); + nat tag = GET_CLOSURE_TAG(q0); bdescr *bd; // It doesn't look like a closure at the moment, because the info // ptr is possibly threaded: // ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); + + // We need one tag value here, because we a non-zero tag to + // indicate "not an info pointer". So we add one to the existing + // tag. If this would overflow the tag bits, we throw away the + // original tag (which is safe but pessimistic; tags are optional). + if (tag == TAG_MASK) tag = 0; - if (HEAP_ALLOCED(q)) { + if (HEAP_ALLOCED(q)) + { bd = Bdescr(q); // a handy way to discover whether the ptr is into the // compacted area of the old gen, is that the EVACUATED flag // is zero (it's non-zero for all the other areas of live // memory). - if ((bd->flags & BF_EVACUATED) == 0) { - + if ((bd->flags & BF_EVACUATED) == 0) + { *(StgPtr)p = (StgWord)*q; - *q = (StgWord)p + 1; // set the low bit + *q = (StgWord)p + tag + 1; // set the low bit } } } @@ -84,11 +93,15 @@ STATIC_INLINE void unthread( StgPtr p, StgPtr free ) { StgWord q = *p, r; + nat tag; + StgPtr q1; - while ((q & 1) != 0) { - q -= 1; // unset the low bit again - r = *((StgPtr)q); - *((StgPtr)q) = (StgWord)free; + while (GET_CLOSURE_TAG((StgClosure *)q) != 0) { + q -= 1; // restore the original tag + tag = GET_CLOSURE_TAG((StgClosure *)q); + q1 = (StgPtr)UNTAG_CLOSURE((StgClosure *)q); + r = *q1; + *q1 = (StgWord)free + tag; q = r; } *p = q; @@ -97,10 +110,10 @@ unthread( StgPtr p, StgPtr free ) STATIC_INLINE StgInfoTable * get_threaded_info( StgPtr p ) { - StgPtr q = (P_)GET_INFO((StgClosure *)p); + StgPtr q = (P_)GET_INFO(UNTAG_CLOSURE((StgClosure *)p)); - while (((StgWord)q & 1) != 0) { - q = (P_)*((StgPtr)((StgWord)q-1)); + while (GET_CLOSURE_TAG((StgClosure *)q) != 0) { + q = (P_)*((StgPtr)((StgWord)(UNTAG_CLOSURE((StgClosure *)q)))); } ASSERT(LOOKS_LIKE_INFO_PTR(q)); diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index dda5659..d437e3f 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -39,7 +39,7 @@ upd_evacuee(StgClosure *p, StgClosure *dest) STATIC_INLINE StgClosure * -copy(StgClosure *src, nat size, step *stp) +copy_tag(StgClosure *src, nat size, step *stp,StgWord tag) { StgPtr to, from; nat i; @@ -75,6 +75,10 @@ copy(StgClosure *src, nat size, step *stp) for (i = 0; i < size; i++) { // unroll for small i to[i] = from[i]; } + + /* retag pointer before updating EVACUATE closure and returning */ + to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to); + upd_evacuee((StgClosure *)from,(StgClosure *)to); #ifdef PROFILING @@ -89,7 +93,7 @@ copy(StgClosure *src, nat size, step *stp) // that will not be scavenged. Used for object that have no pointer // fields. STATIC_INLINE StgClosure * -copy_noscav(StgClosure *src, nat size, step *stp) +copy_noscav_tag(StgClosure *src, nat size, step *stp, StgWord tag) { StgPtr to, from; nat i; @@ -125,6 +129,10 @@ copy_noscav(StgClosure *src, nat size, step *stp) for (i = 0; i < size; i++) { // unroll for small i to[i] = from[i]; } + + /* retag pointer before updating EVACUATE closure and returning */ + to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to); + upd_evacuee((StgClosure *)from,(StgClosure *)to); #ifdef PROFILING @@ -184,6 +192,19 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp) } +/* Copy wrappers that don't tag the closure after copying */ +STATIC_INLINE StgClosure * +copy(StgClosure *src, nat size, step *stp) +{ + return copy_tag(src,size,stp,0); +} + +STATIC_INLINE StgClosure * +copy_noscav(StgClosure *src, nat size, step *stp) +{ + return copy_noscav_tag(src,size,stp,0); +} + /* ----------------------------------------------------------------------------- Evacuate a large object @@ -295,13 +316,18 @@ evacuate(StgClosure *q) bdescr *bd = NULL; step *stp; const StgInfoTable *info; + StgWord tag; loop: + /* The tag and the pointer are split, to be merged after evacing */ + tag = GET_CLOSURE_TAG(q); + q = UNTAG_CLOSURE(q); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); if (!HEAP_ALLOCED(q)) { - if (!major_gc) return q; + if (!major_gc) return TAG_CLOSURE(tag,q); info = get_itbl(q); switch (info->type) { @@ -338,14 +364,16 @@ loop: if (*STATIC_LINK(info,(StgClosure *)q) == NULL) { *STATIC_LINK(info,(StgClosure *)q) = static_objects; static_objects = (StgClosure *)q; + /* I am assuming that static_objects pointers are not + * written to other objects, and thus, no need to retag. */ } - return q; + return TAG_CLOSURE(tag,q); case CONSTR_NOCAF_STATIC: /* no need to put these on the static linked list, they don't need * to be scavenged. */ - return q; + return TAG_CLOSURE(tag,q); default: barf("evacuate(static): strange closure type %d", (int)(info->type)); @@ -365,7 +393,7 @@ loop: failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } - return q; + return TAG_CLOSURE(tag,q); } if ((bd->flags & (BF_LARGE | BF_COMPACTED | BF_EVACUATED)) != 0) { @@ -380,7 +408,7 @@ loop: failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } - return q; + return TAG_CLOSURE(tag,q); } /* evacuate large objects by re-linking them onto a different list. @@ -393,7 +421,7 @@ loop: goto loop; } evacuate_large((P_)q); - return q; + return TAG_CLOSURE(tag,q); } /* If the object is in a step that we're compacting, then we @@ -408,7 +436,7 @@ loop: } push_mark_stack((P_)q); } - return q; + return TAG_CLOSURE(tag,q); } } @@ -429,20 +457,24 @@ loop: if (q->header.info == Czh_con_info && // unsigned, so always true: (StgChar)w >= MIN_CHARLIKE && (StgChar)w <= MAX_CHARLIKE) { - return (StgClosure *)CHARLIKE_CLOSURE((StgChar)w); + return TAG_CLOSURE(tag, + (StgClosure *)CHARLIKE_CLOSURE((StgChar)w) + ); } if (q->header.info == Izh_con_info && (StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) { - return (StgClosure *)INTLIKE_CLOSURE((StgInt)w); + return TAG_CLOSURE(tag, + (StgClosure *)INTLIKE_CLOSURE((StgInt)w) + ); } // else - return copy_noscav(q,sizeofW(StgHeader)+1,stp); + return copy_noscav_tag(q,sizeofW(StgHeader)+1,stp,tag); } case FUN_0_1: case FUN_1_0: case CONSTR_1_0: - return copy(q,sizeofW(StgHeader)+1,stp); + return copy_tag(q,sizeofW(StgHeader)+1,stp,tag); case THUNK_1_0: case THUNK_0_1: @@ -462,27 +494,27 @@ loop: case FUN_1_1: case FUN_2_0: + case FUN_0_2: case CONSTR_1_1: case CONSTR_2_0: - case FUN_0_2: - return copy(q,sizeofW(StgHeader)+2,stp); + return copy_tag(q,sizeofW(StgHeader)+2,stp,tag); case CONSTR_0_2: - return copy_noscav(q,sizeofW(StgHeader)+2,stp); + return copy_noscav_tag(q,sizeofW(StgHeader)+2,stp,tag); case THUNK: return copy(q,thunk_sizeW_fromITBL(info),stp); case FUN: - case CONSTR: case IND_PERM: case IND_OLDGEN_PERM: case WEAK: case STABLE_NAME: - return copy(q,sizeW_fromITBL(info),stp); + case CONSTR: + return copy_tag(q,sizeW_fromITBL(info),stp,tag); case BCO: - return copy(q,bco_sizeW((StgBCO *)q),stp); + return copy(q,bco_sizeW((StgBCO *)q),stp); case CAF_BLACKHOLE: case SE_CAF_BLACKHOLE: @@ -739,7 +771,9 @@ eval_thunk_selector( nat field, StgSelector * p ) const StgInfoTable *info_ptr; StgClosure *selectee; - selectee = p->selectee; + // The selectee might be a constructor closure, + // so we untag the pointer. + selectee = UNTAG_CLOSURE(p->selectee); // Save the real info pointer (NOTE: not the same as get_itbl()). info_ptr = p->header.info; @@ -814,7 +848,7 @@ selector_loop: { StgClosure *q; q = selectee->payload[field]; - if (is_to_space(q)) { + if (is_to_space(UNTAG_CLOSURE(q))) { goto bale_out; } else { return q; @@ -826,7 +860,8 @@ selector_loop: case IND_OLDGEN: case IND_OLDGEN_PERM: case IND_STATIC: - selectee = ((StgInd *)selectee)->indirectee; + // Again, we might need to untag a constructor. + selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee ); goto selector_loop; case EVACUATED: @@ -880,7 +915,8 @@ selector_loop: // indirection. LDV_RECORD_CREATE(selectee); - selectee = val; + // Of course this pointer might be tagged + selectee = UNTAG_CLOSURE(val); goto selector_loop; } } diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 1fee394..216d3cb 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -1031,6 +1031,7 @@ GarbageCollect ( rtsBool force_major_gc ) closure if it is alive, or NULL otherwise. NOTE: Use it before compaction only! + It untags and (if needed) retags pointers to closures. -------------------------------------------------------------------------- */ @@ -1039,8 +1040,12 @@ isAlive(StgClosure *p) { const StgInfoTable *info; bdescr *bd; + StgWord tag; while (1) { + /* The tag and the pointer are split, to be merged later when needed. */ + tag = GET_CLOSURE_TAG(p); + p = UNTAG_CLOSURE(p); ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl(p); @@ -1052,18 +1057,18 @@ isAlive(StgClosure *p) // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs. // if (!HEAP_ALLOCED(p)) { - return p; + return TAG_CLOSURE(tag,p); } // ignore closures in generations that we're not collecting. bd = Bdescr((P_)p); if (bd->gen_no > N) { - return p; + return TAG_CLOSURE(tag,p); } // if it's a pointer into to-space, then we're done if (bd->flags & BF_EVACUATED) { - return p; + return TAG_CLOSURE(tag,p); } // large objects use the evacuated flag @@ -1073,7 +1078,7 @@ isAlive(StgClosure *p) // check the mark bit for compacted steps if ((bd->flags & BF_COMPACTED) && is_marked((P_)p,bd)) { - return p; + return TAG_CLOSURE(tag,p); } switch (info->type) { diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 0de029e..f211401 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -200,7 +200,7 @@ scavenge_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size) StgWord bitmap; StgFunInfoTable *fun_info; - fun_info = get_fun_itbl(fun); + fun_info = get_fun_itbl(UNTAG_CLOSURE(fun)); ASSERT(fun_info->i.type != PAP); p = (StgPtr)payload; @@ -1720,7 +1720,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end) StgFunInfoTable *fun_info; ret_fun->fun = evacuate(ret_fun->fun); - fun_info = get_fun_itbl(ret_fun->fun); + fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun)); p = scavenge_arg_block(fun_info, ret_fun->payload); goto follow_srt; } diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs index b7cc6dd..c42ccb1 100644 --- a/utils/genapply/GenApply.hs +++ b/utils/genapply/GenApply.hs @@ -1,10 +1,12 @@ -{-# OPTIONS -cpp #-} +{-# OPTIONS -cpp -fglasgow-exts #-} module Main(main) where #include "../../includes/ghcconfig.h" #include "../../includes/MachRegs.h" #include "../../includes/Constants.h" +-- Needed for TAG_BITS +#include "../../includes/MachDeps.h" import Text.PrettyPrint import Data.Word @@ -165,10 +167,16 @@ mkApplyFastName args mkApplyInfoName args = mkApplyName args <> text "_info" +mb_tag_node arity | Just tag <- tagForArity arity = mkTagStmt tag <> semi + | otherwise = empty + +mkTagStmt tag = text ("R1 = R1 + "++ show tag) + genMkPAP regstatus macro jump ticker disamb no_load_regs -- don't load argumnet regs before jumping args_in_regs -- arguments are already in regs is_pap args all_args_size fun_info_label + is_fun_case = smaller_arity_cases $$ exact_arity_case $$ larger_arity_case @@ -214,7 +222,8 @@ genMkPAP regstatus macro jump ticker disamb if is_pap then text "R2 = " <> mkApplyInfoName this_call_args <> semi - else empty, + else empty, + if is_fun_case then mb_tag_node arity else empty, text "jump " <> text jump <> semi ]) $$ text "}" @@ -294,9 +303,10 @@ genMkPAP regstatus macro jump ticker disamb -- text "TICK_SLOW_CALL_" <> text ticker <> text "_CORRECT();", reg_doc, text "Sp_adj(" <> int sp' <> text ");", - if is_pap - then text "R2 = " <> fun_info_label <> semi - else empty, + if is_pap + then text "R2 = " <> fun_info_label <> semi + else empty, + if is_fun_case then mb_tag_node n_args else empty, text "jump " <> text jump <> semi ]) @@ -319,6 +329,15 @@ genMkPAP regstatus macro jump ticker disamb nest 4 (vcat [ -- text "TICK_SLOW_CALL_" <> text ticker <> text "_TOO_FEW();", save_regs, + -- Before building the PAP, tag the function closure pointer + if is_fun_case then + vcat [ + text "if (arity < " <> int tAG_BITS_MAX <> text ") {", + text " R1 = R1 + arity" <> semi, + text "}" + ] + else empty + , text macro <> char '(' <> int n_args <> comma <> int all_args_size <> text "," <> fun_info_label <> @@ -332,6 +351,66 @@ genMkPAP regstatus macro jump ticker disamb = assignRegs regstatus stk_args_slow_offset args -- BUILD_PAP assumes args start at offset 1 +-- -------------------------------------- +-- Examine tag bits of function pointer and enter it +-- directly if needed. +-- TODO: remove the redundant case in the original code. +enterFastPath regstatus no_load_regs args_in_regs args + | Just tag <- tagForArity (length args) + = enterFastPathHelper tag regstatus no_load_regs args_in_regs args +enterFastPath _ _ _ _ = empty + +-- Copied from Constants.lhs & CgUtils.hs, i'd rather have this imported: +-- (arity,tag) +tAG_BITS = (TAG_BITS :: Int) +tAG_BITS_MAX = ((1 `shiftL` tAG_BITS) :: Int) + +tagForArity :: Int -> Maybe Int +tagForArity i | i < tAG_BITS_MAX = Just i + | otherwise = Nothing + +enterFastPathHelper tag regstatus no_load_regs args_in_regs args = + vcat [text "if (GETTAG(R1)==" <> int tag <> text ") {", + reg_doc, + text " Sp_adj(" <> int sp' <> text ");", + -- enter, but adjust offset with tag + text " jump " <> text "%GET_ENTRY(R1-" <> int tag <> text ");", + text "}" + ] + -- I don't totally understand this code, I copied it from + -- exact_arity_case + -- TODO: refactor + where + -- offset of arguments on the stack at slow apply calls. + stk_args_slow_offset = 1 + + stk_args_offset + | args_in_regs = 0 + | otherwise = stk_args_slow_offset + + (reg_doc, sp') + | no_load_regs || args_in_regs = (empty, stk_args_offset) + | otherwise = loadRegArgs regstatus stk_args_offset args + +tickForArity arity + | True + = empty + | Just tag <- tagForArity arity + = vcat [ + text "W_[TOTAL_CALLS] = W_[TOTAL_CALLS] + 1;", + text "W_[SLOW_CALLS_" <> int arity <> text "] = W_[SLOW_CALLS_" <> int arity <> text "] + 1;", + text "if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == " <> int arity <> text " ) {", + text " W_[RIGHT_ARITY_" <> int arity <> text "] = W_[RIGHT_ARITY_" <> int arity <> text "] + 1;", + text " if (GETTAG(R1)==" <> int tag <> text ") {", + text " W_[TAGGED_PTR_" <> int arity <> text "] = W_[TAGGED_PTR_" <> int arity <> text "] + 1;", + text " } else {", + -- force a halt when not tagged! +-- text " W_[0]=0;", + text " }", + text "}" + ] +tickForArity _ = text "W_[TOTAL_CALLS] = W_[TOTAL_CALLS] + 1;" + -- ----------------------------------------------------------------------------- -- generate an apply function @@ -388,6 +467,7 @@ genApply regstatus args = -- print " [IND_OLDGEN_PERM] &&ind_lbl" -- print " };" + tickForArity (length args), text "", text "IF_DEBUG(apply,foreign \"C\" debugBelch(\"" <> fun_ret_label <> text "... \"); foreign \"C\" printClosure(R1 \"ptr\"));", @@ -411,6 +491,12 @@ genApply regstatus args = vcat (do_assert args 1), text "again:", + + -- if pointer is tagged enter it fast! + enterFastPath regstatus False False args, + + -- Functions can be tagged, so we untag them! + text "R1 = UNTAG(R1);", text "info = %INFO_PTR(R1);", -- if fast == 1: @@ -428,7 +514,7 @@ genApply regstatus args = text "ASSERT(arity > 0);", genMkPAP regstatus "BUILD_PAP" "ENTRY_LBL(stg_BCO)" "FUN" "BCO" True{-stack apply-} False{-args on stack-} False{-not a PAP-} - args all_args_size fun_info_label + args all_args_size fun_info_label {- tag stmt -}False ]), text "}", @@ -445,9 +531,9 @@ genApply regstatus args = nest 4 (vcat [ text "arity = TO_W_(StgFunInfoExtra_arity(%FUN_INFO(info)));", text "ASSERT(arity > 0);", - genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(R1)" "FUN" "FUN" + genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(UNTAG(R1))" "FUN" "FUN" False{-reg apply-} False{-args on stack-} False{-not a PAP-} - args all_args_size fun_info_label + args all_args_size fun_info_label {- tag stmt -}True ]), text "}", @@ -461,7 +547,7 @@ genApply regstatus args = text "ASSERT(arity > 0);", genMkPAP regstatus "NEW_PAP" "stg_PAP_apply" "PAP" "PAP" True{-stack apply-} False{-args on stack-} True{-is a PAP-} - args all_args_size fun_info_label + args all_args_size fun_info_label {- tag stmt -}False ]), text "}", @@ -506,6 +592,7 @@ genApply regstatus args = text " IND_OLDGEN_PERM: {", nest 4 (vcat [ text "R1 = StgInd_indirectee(R1);", + -- An indirection node might contain a tagged pointer text "goto again;" ]), text "}", @@ -541,6 +628,14 @@ genApplyFast regstatus args = nest 4 (vcat [ text "W_ info;", text "W_ arity;", + + tickForArity (length args), + + -- if pointer is tagged enter it fast! + enterFastPath regstatus False True args, + + -- Functions can be tagged, so we untag them! + text "R1 = UNTAG(R1);", text "info = %GET_STD_INFO(R1);", text "switch [INVALID_OBJECT .. N_CLOSURE_TYPES] (TO_W_(%INFO_TYPE(info))) {", nest 4 (vcat [ @@ -554,9 +649,9 @@ genApplyFast regstatus args = nest 4 (vcat [ text "arity = TO_W_(StgFunInfoExtra_arity(%GET_FUN_INFO(R1)));", text "ASSERT(arity > 0);", - genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(R1)" "FUN" "FUN" + genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(UNTAG(R1))" "FUN" "FUN" False{-reg apply-} True{-args in regs-} False{-not a PAP-} - args all_args_size fun_info_label + args all_args_size fun_info_label {- tag stmt -}True ]), char '}', @@ -607,7 +702,7 @@ genStackApply regstatus args = (assign_regs, sp') = loadRegArgs regstatus 0 args body = vcat [assign_regs, text "Sp_adj" <> parens (int sp') <> semi, - text "jump %GET_ENTRY(R1);" + text "jump %GET_ENTRY(UNTAG(R1));" ] -- ----------------------------------------------------------------------------- -- 1.7.10.4