From 7f1bc015a4094a8282ad4090768d780fd4d6122d Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Mon, 22 Jan 2007 11:40:16 +0000 Subject: [PATCH] Semi-tagging optimisation In the generated code for case-of-variable, test the tag of the scrutinee closure and only enter if it is unevaluated. Also turn *off* vectored returns. --- compiler/codeGen/CgCallConv.hs | 7 +-- compiler/codeGen/CgCase.lhs | 99 ++++++++++++++++++++++++++++++++++----- compiler/codeGen/CgInfoTbls.hs | 2 +- compiler/codeGen/CgTailCall.lhs | 2 +- compiler/codeGen/CgUtils.hs | 3 ++ 5 files changed, 96 insertions(+), 17 deletions(-) diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs index b48b7d5..895552b 100644 --- a/compiler/codeGen/CgCallConv.hs +++ b/compiler/codeGen/CgCallConv.hs @@ -267,9 +267,10 @@ ctrlReturnConvAlg :: TyCon -> CtrlReturnConvention ctrlReturnConvAlg tycon = case (tyConFamilySize tycon) of size -> -- we're supposed to know... - if (size > (1::Int) && size <= mAX_FAMILY_SIZE_FOR_VEC_RETURNS) then - VectoredReturn size - else + -- Disable vectored returns +-- if (size > (1::Int) && size <= mAX_FAMILY_SIZE_FOR_VEC_RETURNS) then +-- VectoredReturn size +-- else UnvectoredReturn size -- NB: unvectored returns Include size 0 (no constructors), so that -- the following perverse code compiles (it crashed GHC in 5.02) diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs index 23310dd..7f440c1 100644 --- a/compiler/codeGen/CgCase.lhs +++ b/compiler/codeGen/CgCase.lhs @@ -9,6 +9,7 @@ module CgCase ( cgCase, saveVolatileVarsAndRegs, ) where #include "HsVersions.h" +#include "../includes/ClosureTypes.h" import {-# SOURCE #-} CgExpr ( cgExpr ) @@ -41,6 +42,7 @@ import PrimOp import TyCon import Util import Outputable +import Constants \end{code} \begin{code} @@ -171,9 +173,54 @@ cgCase (StgOpApp op@(StgFCallOp fcall _) args _) _other -> False \end{code} -Special case: scrutinising a non-primitive variable. -This can be done a little better than the general case, because -we can reuse/trim the stack slot holding the variable (if it is in one). +Special case: scrutinising a non-primitive variable. This is where we +want to do semi-tagging. The code generated will be something like this: + + save volatile vars + R1 = fun + jump c99_ret + + +c99_ret: + infoptr = R1[0] + type = infoptr[-4] // or something + if (type > 8) goto no_cons + tag = infoptr[-6] + if (tag == 1) ... etc. +no_cons + jump infoptr + +\begin{code} +cgCase (StgApp fun []) + live_in_whole_case live_in_alts bndr srt (AlgAlt tycon) alts + = do { fun_info <- getCgIdInfo fun + ; fun_amode <- idInfoToAmode fun_info + + ; nukeDeadBindings live_in_alts + ; (save_assts, alts_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_alts + + ; scrut_eob_info + <- forkEval alts_eob_info + (allocStackTop retAddrSizeW >> nopC) + (do { deAllocStackTop retAddrSizeW + ; cgEvalAltsSemiTag maybe_cc_slot bndr srt + tycon alts }) + + -- jump to the continuation immediately + ; case scrut_eob_info of + EndOfBlockInfo sp (CaseAlts lbl _ _ _) -> do + let node_asst = oneStmt (CmmAssign nodeReg fun_amode) + emitSimultaneously (node_asst `plusStmts` save_assts) + let jmp = stmtC (CmmJump (CmmLit (CmmLabel lbl)) []) + setEndOfBlockInfo scrut_eob_info $ + doFinalJump sp False jmp + } +\end{code} + +Special case: scrutinising a non-primitive application. This can be +done a little better than the general case, because we can reuse/trim +the stack slot holding the variables involved in the application. \begin{code} cgCase (StgApp fun args) @@ -410,15 +457,6 @@ cgEvalAlts cc_slot bndr srt alt_type alts do { -- Bind the default binder bindNewToReg bndr nodeReg (mkLFArgument bndr) - -- Generate sequel info for use downstream - -- At the moment, we only do it if the type is vector-returnable. - -- Reason: if not, then it costs extra to label the - -- alternatives, because we'd get return code like: - -- - -- switch TagReg { 0 : JMP(alt_1); 1 : JMP(alt_2) ..etc } - -- - -- which is worse than having the alt code in the switch statement - ; (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot alt_type alts ; (lbl, branches) <- emitAlgReturnTarget (idName bndr) @@ -429,6 +467,43 @@ cgEvalAlts cc_slot bndr srt alt_type alts ret_conv = case alt_type of AlgAlt tc -> ctrlReturnConvAlg tc PolyAlt -> UnvectoredReturn 0 + + +-- Alternatives for a semi-tagging case expression +cgEvalAltsSemiTag cc_slot bndr srt tycon alts + = do -- Bind the default binder + bindNewToReg bndr nodeReg (mkLFArgument bndr) + + blks <- getCgStmts $ cgEvalAltsSemiTag' cc_slot tycon alts + lbl <- emitDirectReturnTarget (idName bndr) blks srt + return (CaseAlts lbl Nothing bndr False) + +cgEvalAltsSemiTag' cc_slot tycon alts + = do + (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot (AlgAlt tycon) alts + + iptr <- newTemp wordRep + stmtC (CmmAssign iptr (closureInfoPtr (CmmReg nodeReg))) + -- share the iptr between ctype and tag, below + + -- we don't have a 1-indexed tag field, we have to use the type + -- field first to find out whether the closure is a constructor + not_constr <- newLabelC + + let highCons = CmmLit (CmmInt CONSTR_NOCAF_STATIC halfWordRep) + stmtC (CmmCondBranch (CmmMachOp (MO_U_Gt halfWordRep) + [infoTableClosureType (infoTable (CmmReg iptr)), + highCons]) + not_constr) + + let tag_expr = CmmMachOp (MO_U_Conv halfWordRep wordRep) + [infoTableConstrTag (infoTable (CmmReg iptr))] + + let family_size = tyConFamilySize tycon + emitSwitch tag_expr alts mb_deflt 0 (family_size - 1) + + labelC not_constr + stmtC (CmmJump (entryCode (CmmReg iptr)) []) \end{code} diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 1c30d06..3751824 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -19,7 +19,7 @@ module CgInfoTbls ( mkFunGenInfoExtraBits, entryCode, closureInfoPtr, getConstrTag, - infoTable, infoTableClosureType, + infoTable, infoTableClosureType, infoTableConstrTag, infoTablePtrs, infoTableNonPtrs, funInfoTable, retVec diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs index c65ec1c..94a96f7 100644 --- a/compiler/codeGen/CgTailCall.lhs +++ b/compiler/codeGen/CgTailCall.lhs @@ -6,7 +6,7 @@ \begin{code} module CgTailCall ( - cgTailCall, performTailCall, + cgTailCall, performTailCall, doFinalJump, performReturn, performPrimReturn, emitKnownConReturnCode, emitAlgReturnCode, returnUnboxedTuple, ccallReturnUnboxedTuple, diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 804aeab..f2b3c72 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -17,6 +17,8 @@ module CgUtils ( tagToClosure, cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, + cmmULtWord, cmmUGtWord, + cmmULeWord, cmmUGeWord, cmmOffsetExprW, cmmOffsetExprB, cmmRegOffW, cmmRegOffB, cmmLabelOffW, cmmLabelOffB, @@ -151,6 +153,7 @@ cmmOrWord e1 e2 = CmmMachOp mo_wordOr [e1, e2] cmmAndWord e1 e2 = CmmMachOp mo_wordAnd [e1, e2] cmmNeWord e1 e2 = CmmMachOp mo_wordNe [e1, e2] cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2] +cmmULeWord e1 e2 = CmmMachOp mo_wordULe [e1, e2] cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2] cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2] cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2] -- 1.7.10.4