X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FcodeGen%2FCgTailCall.lhs;h=dd7327b745f1d6ad41fb89414b049da1cb25c301;hb=04db0e9fa47ce4dfbcb73ec1752d94195f3b394e;hp=8dfd5f484a0f29a772334740b9746b46158f8e54;hpb=13386b66f4fcc1fbf2f7df13e8687510e857c848;p=ghc-hetmet.git diff --git a/ghc/compiler/codeGen/CgTailCall.lhs b/ghc/compiler/codeGen/CgTailCall.lhs index 8dfd5f4..dd7327b 100644 --- a/ghc/compiler/codeGen/CgTailCall.lhs +++ b/ghc/compiler/codeGen/CgTailCall.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgTailCall.lhs,v 1.31 2001/10/25 05:07:32 sof Exp $ +% $Id: CgTailCall.lhs,v 1.43 2005/06/21 10:44:41 simonmar Exp $ % %******************************************************** %* * @@ -11,15 +11,12 @@ \begin{code} module CgTailCall ( - cgTailCall, + cgTailCall, performTailCall, performReturn, performPrimReturn, - mkStaticAlgReturnCode, mkDynamicAlgReturnCode, - mkUnboxedTupleReturnCode, returnUnboxedTuple, - mkPrimReturnCode, - - tailCallFun, + emitKnownConReturnCode, emitAlgReturnCode, + returnUnboxedTuple, ccallReturnUnboxedTuple, + pushUnboxedTuple, tailCallPrimOp, - doTailCall, pushReturnAddress ) where @@ -27,593 +24,432 @@ module CgTailCall ( #include "HsVersions.h" import CgMonad -import AbsCSyn -import PprAbsC ( pprAmode ) - -import AbsCUtils ( mkAbstractCs, getAmodeRep ) -import CgBindery ( getArgAmodes, getCAddrMode, getCAddrModeAndInfo ) -import CgRetConv ( dataReturnConvPrim, - ctrlReturnConvAlg, CtrlReturnConvention(..), - assignAllRegs, assignRegs - ) -import CgStackery ( mkTaggedStkAmodes, adjustStackHW ) -import CgUsages ( getSpRelOffset, adjustSpAndHp ) -import CgUpdate ( pushSeqFrame ) -import CLabel ( mkUpdInfoLabel, mkRtsPrimOpLabel ) -import ClosureInfo ( nodeMustPointToIt, - getEntryConvention, EntryConvention(..), LambdaFormInfo - ) -import CmdLineOpts ( opt_DoSemiTagging ) -import Id ( Id, idType, idName ) -import DataCon ( DataCon, dataConTyCon, dataConTag, fIRST_TAG ) -import Maybes ( maybeToBool ) -import PrimRep ( PrimRep(..) ) -import StgSyn ( StgArg ) +import CgBindery ( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape, + idInfoToAmode, cgIdInfoId, cgIdInfoLF, + cgIdInfoArgRep ) +import CgInfoTbls ( entryCode, emitDirectReturnInstr, dataConTagZ, + emitVectoredReturnInstr, closureInfoPtr ) +import CgCallConv +import CgStackery ( setRealSp, mkStkAmodes, adjustStackHW, + getSpRelOffset ) +import CgHeapery ( setRealHp, getHpRelOffset ) +import CgUtils ( emitSimultaneously ) +import CgTicky +import ClosureInfo +import SMRep ( CgRep, isVoidArg, separateByPtrFollowness ) +import Cmm +import CmmUtils +import CLabel ( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel ) import Type ( isUnLiftedType ) +import Id ( Id, idName, idUnique, idType ) +import DataCon ( DataCon, dataConTyCon ) +import StgSyn ( StgArg ) import TyCon ( TyCon ) import PrimOp ( PrimOp ) -import Util ( zipWithEqual, splitAtList ) -import ListSetOps ( assocMaybe ) import Outputable -import Panic ( panic, assertPanic ) -\end{code} -%************************************************************************ -%* * -\subsection[tailcall-doc]{Documentation} -%* * -%************************************************************************ - -\begin{code} -cgTailCall :: Id -> [StgArg] -> Code -\end{code} - -Here's the code we generate for a tail call. (NB there may be no -arguments, in which case this boils down to just entering a variable.) - -\begin{itemize} -\item Adjust the stack ptr to \tr{tailSp + #args}. -\item Put args in the top locations of the resulting stack. -\item Make Node point to the function closure. -\item Enter the function closure. -\end{itemize} - -Things to be careful about: -\begin{itemize} -\item Don't overwrite stack locations before you have finished with - them (remember you need the function and the as-yet-unmoved - arguments). -\item Preferably, generate no code to replace x by x on the stack (a - common situation in tail-recursion). -\item Adjust the stack high water mark appropriately. -\end{itemize} - -Treat unboxed locals exactly like literals (above) except use the addr -mode for the local instead of (CLit lit) in the assignment. - -Case for unboxed @Ids@ first: -\begin{code} -cgTailCall fun [] - | isUnLiftedType (idType fun) - = getCAddrMode fun `thenFC` \ amode -> - performPrimReturn (ppr fun) amode -\end{code} - -The general case (@fun@ is boxed): -\begin{code} -cgTailCall fun args = performTailCall fun args -\end{code} - -%************************************************************************ -%* * -\subsection[return-and-tail-call]{Return and tail call} -%* * -%************************************************************************ +import Monad ( when ) -\begin{code} -performPrimReturn :: SDoc -- Just for debugging (sigh) - -> CAddrMode -- The thing to return - -> Code +----------------------------------------------------------------------------- +-- Tail Calls -performPrimReturn doc amode - = let - kind = getAmodeRep amode - ret_reg = WARN( case kind of { PtrRep -> True; other -> False }, text "primRet" <+> doc <+> pprAmode amode ) - dataReturnConvPrim kind - - assign_possibly = case kind of - VoidRep -> AbsCNop - kind -> (CAssign (CReg ret_reg) amode) - in - performReturn assign_possibly (mkPrimReturnCode doc) - -mkPrimReturnCode :: SDoc -- Debugging only - -> Sequel - -> Code -mkPrimReturnCode doc UpdateCode = pprPanic "mkPrimReturnCode: Upd" doc -mkPrimReturnCode doc sequel = sequelToAmode sequel `thenFC` \ dest_amode -> - absC (CReturn dest_amode DirectReturn) - -- Direct, no vectoring +cgTailCall :: Id -> [StgArg] -> Code --- Constructor is built on the heap; Node is set. --- All that remains is --- (a) to set TagReg, if necessary --- (c) to do the right sort of jump. - -mkStaticAlgReturnCode :: DataCon -- The constructor - -> Sequel -- where to return to - -> Code - -mkStaticAlgReturnCode con sequel - = -- Generate profiling code if necessary - (case return_convention of - VectoredReturn sz -> profCtrC SLIT("TICK_VEC_RETURN") [mkIntCLit sz] - other -> nopC - ) `thenC` - - -- Set tag if necessary - -- This is done by a macro, because if we are short of registers - -- we don't set TagReg; instead the continuation gets the tag - -- by indexing off the info ptr - (case return_convention of - - UnvectoredReturn no_of_constrs - | no_of_constrs > 1 - -> absC (CMacroStmt SET_TAG [mkIntCLit zero_indexed_tag]) - - other -> nopC - ) `thenC` - - -- Generate the right jump or return - (case sequel of - UpdateCode -> -- Ha! We can go direct to the update code, - -- (making sure to jump to the *correct* update - -- code.) - absC (CReturn (CLbl mkUpdInfoLabel CodePtrRep) - return_info) - - CaseAlts _ (Just (alts, _)) -> -- Ho! We know the constructor so - -- we can go right to the alternative - - case assocMaybe alts tag of - Just (alt_absC, join_lbl) -> - absC (CJump (CLbl join_lbl CodePtrRep)) - Nothing -> panic "mkStaticAlgReturnCode: default" - -- The Nothing case should never happen; - -- it's the subject of a wad of special-case - -- code in cgReturnCon - - -- can't be a SeqFrame, because we're returning a constructor - - other -> -- OnStack, or (CaseAlts ret_amode Nothing) - sequelToAmode sequel `thenFC` \ ret_amode -> - absC (CReturn ret_amode return_info) - ) +-- Here's the code we generate for a tail call. (NB there may be no +-- arguments, in which case this boils down to just entering a variable.) +-- +-- * Put args in the top locations of the stack. +-- * Adjust the stack ptr +-- * Make R1 point to the function closure if necessary. +-- * Perform the call. +-- +-- Things to be careful about: +-- +-- * Don't overwrite stack locations before you have finished with +-- them (remember you need the function and the as-yet-unmoved +-- arguments). +-- * Preferably, generate no code to replace x by x on the stack (a +-- common situation in tail-recursion). +-- * Adjust the stack high water mark appropriately. +-- +-- Treat unboxed locals exactly like literals (above) except use the addr +-- mode for the local instead of (CLit lit) in the assignment. + +cgTailCall fun args + = do { fun_info <- getCgIdInfo fun + + ; if isUnLiftedType (idType fun) + then -- Primitive return + ASSERT( null args ) + do { fun_amode <- idInfoToAmode fun_info + ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } + + else -- Normal case, fun is boxed + do { arg_amodes <- getArgAmodes args + ; performTailCall fun_info arg_amodes noStmts } + } + + +-- ----------------------------------------------------------------------------- +-- The guts of a tail-call + +performTailCall + :: CgIdInfo -- The function + -> [(CgRep,CmmExpr)] -- Args + -> CmmStmts -- Pending simultaneous assignments + -- *** GUARANTEED to contain only stack assignments. + -> Code +performTailCall fun_info arg_amodes pending_assts + | Just join_sp <- maybeLetNoEscape fun_info + = -- A let-no-escape is slightly different, because we + -- arrange the stack arguments into pointers and non-pointers + -- to make the heap check easier. The tail-call sequence + -- is very similar to returning an unboxed tuple, so we + -- share some code. + do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes + ; emitSimultaneously (pending_assts `plusStmts` arg_assts) + ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info)) + ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) } + + | otherwise + = do { fun_amode <- idInfoToAmode fun_info + ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode) + opt_node_asst | nodeMustPointToIt lf_info = node_asst + | otherwise = noStmts + ; EndOfBlockInfo sp _ <- getEndOfBlockInfo + ; hmods <- getHomeModules + + ; case (getCallMethod hmods fun_name lf_info (length arg_amodes)) of + + -- Node must always point to things we enter + EnterIt -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; let target = entryCode (closureInfoPtr (CmmReg nodeReg)) + ; doFinalJump sp False (stmtC (CmmJump target [])) } + + -- A function, but we have zero arguments. It is already in WHNF, + -- so we can just return it. + -- As with any return, Node must point to it. + ReturnIt -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; doFinalJump sp False emitDirectReturnInstr } + + -- A real constructor. Don't bother entering it, + -- just do the right sort of return instead. + -- As with any return, Node must point to it. + ReturnCon con -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; doFinalJump sp False (emitKnownConReturnCode con) } + + JumpToIt lbl -> do + { emitSimultaneously (opt_node_asst `plusStmts` pending_assts) + ; doFinalJump sp False (jumpToLbl lbl) } + + -- A slow function call via the RTS apply routines + -- Node must definitely point to the thing + SlowCall -> do + { when (not (null arg_amodes)) $ do + { if (isKnownFun lf_info) + then tickyKnownCallTooFewArgs + else tickyUnknownCall + ; tickySlowCallPat (map fst arg_amodes) + } + + ; let (apply_lbl, args, extra_args) + = constructSlowCall arg_amodes + + ; directCall sp apply_lbl args extra_args + (node_asst `plusStmts` pending_assts) + } + + -- A direct function call (possibly with some left-over arguments) + DirectEntry lbl arity -> do + { if arity == length arg_amodes + then tickyKnownCallExact + else do tickyKnownCallExtraArgs + tickySlowCallPat (map fst (drop arity arg_amodes)) + + ; let + -- The args beyond the arity go straight on the stack + (arity_args, extra_args) = splitAt arity arg_amodes + + ; directCall sp lbl arity_args extra_args + (opt_node_asst `plusStmts` pending_assts) + } + } where - tag = dataConTag con - tycon = dataConTyCon con - return_convention = ctrlReturnConvAlg tycon - zero_indexed_tag = tag - fIRST_TAG -- Adjust tag to be zero-indexed - -- cf AbsCUtils.mkAlgAltsCSwitch - - return_info = - case return_convention of - UnvectoredReturn _ -> DirectReturn - VectoredReturn _ -> StaticVectoredReturn zero_indexed_tag + fun_name = idName (cgIdInfoId fun_info) + lf_info = cgIdInfoLF fun_info -mkUnboxedTupleReturnCode :: Sequel -> Code -mkUnboxedTupleReturnCode sequel - = case sequel of - -- can't update with an unboxed tuple! - UpdateCode -> panic "mkUnboxedTupleReturnCode" - CaseAlts _ (Just ([(_,(alt_absC,join_lbl))], _)) -> - absC (CJump (CLbl join_lbl CodePtrRep)) - -- can't be a SeqFrame +directCall sp lbl args extra_args assts = do + let + -- First chunk of args go in registers + (reg_arg_amodes, stk_args) = assignCallRegs args + + -- Any "extra" arguments are placed in frames on the + -- stack after the other arguments. + slow_stk_args = slowArgs extra_args - other -> -- OnStack, or (CaseAlts ret_amode something) - sequelToAmode sequel `thenFC` \ ret_amode -> - absC (CReturn ret_amode DirectReturn) + reg_assts = assignToRegs reg_arg_amodes + -- + (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args) --- This function is used by PrimOps that return enumerated types (i.e. --- all the comparison operators). + emitSimultaneously (reg_assts `plusStmts` + stk_assts `plusStmts` + assts) -mkDynamicAlgReturnCode :: TyCon -> CAddrMode -> Sequel -> Code + doFinalJump final_sp False (jumpToLbl lbl) -mkDynamicAlgReturnCode tycon dyn_tag sequel - = case ctrlReturnConvAlg tycon of - VectoredReturn sz -> +-- ----------------------------------------------------------------------------- +-- The final clean-up before we do a jump at the end of a basic block. +-- This code is shared by tail-calls and returns. - profCtrC SLIT("TICK_VEC_RETURN") [mkIntCLit sz] `thenC` - sequelToAmode sequel `thenFC` \ ret_addr -> - absC (CReturn ret_addr (DynamicVectoredReturn dyn_tag)) +doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code +doFinalJump final_sp is_let_no_escape jump_code + = do { -- Adjust the high-water mark if necessary + adjustStackHW final_sp - UnvectoredReturn no_of_constrs -> + -- Push a return address if necessary (after the assignments + -- above, in case we clobber a live stack location) + -- + -- DONT push the return address when we're about to jump to a + -- let-no-escape: the final tail call in the let-no-escape + -- will do this. + ; eob <- getEndOfBlockInfo + ; whenC (not is_let_no_escape) (pushReturnAddress eob) - -- Set tag if necessary - -- This is done by a macro, because if we are short of registers - -- we don't set TagReg; instead the continuation gets the tag - -- by indexing off the info ptr - (if no_of_constrs > 1 then - absC (CMacroStmt SET_TAG [dyn_tag]) - else - nopC - ) `thenC` + -- Final adjustment of Sp/Hp + ; adjustSpAndHp final_sp + -- and do the jump + ; jump_code } - sequelToAmode sequel `thenFC` \ ret_addr -> - -- Generate the right jump or return - absC (CReturn ret_addr DirectReturn) -\end{code} +-- ----------------------------------------------------------------------------- +-- A general return (just a special case of doFinalJump, above) -\begin{code} -performReturn :: AbstractC -- Simultaneous assignments to perform - -> (Sequel -> Code) -- The code to execute to actually do - -- the return, given an addressing mode - -- for the return address +performReturn :: Code -- The code to execute to actually do the return -> Code --- this is just a special case of doTailCall, later. -performReturn sim_assts finish_code - = getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) -> - - -- Do the simultaneous assignments, - doSimAssts sim_assts `thenC` - - -- push a return address if necessary - -- (after the assignments above, in case we clobber a live - -- stack location) - pushReturnAddress eob `thenC` - - -- Adjust Sp/Hp - adjustSpAndHp args_sp `thenC` - - -- Do the return - finish_code sequel -- "sequel" is `robust' in that it doesn't - -- depend on stk-ptr values -\end{code} - -Returning unboxed tuples. This is mainly to support _ccall_GC_, where -we want to do things in a slightly different order to normal: - - - push return address - - adjust stack pointer - - r = call(args...) - - assign regs for unboxed tuple (usually just R1 = r) - - return to continuation - -The return address (i.e. stack frame) must be on the stack before -doing the call in case the call ends up in the garbage collector. - -Sadly, the information about the continuation is lost after we push it -(in order to avoid pushing it again), so we end up doing a needless -indirect jump (ToDo). +performReturn finish_code + = do { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo + ; doFinalJump args_sp False{-not a LNE-} finish_code } -\begin{code} -returnUnboxedTuple :: [CAddrMode] -> Code -> Code -returnUnboxedTuple amodes before_jump - = getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) -> - - -- push a return address if necessary - pushReturnAddress eob `thenC` - setEndOfBlockInfo (EndOfBlockInfo args_sp (OnStack args_sp)) ( - - -- Adjust Sp/Hp - adjustSpAndHp args_sp `thenC` - - before_jump `thenC` - - let (ret_regs, leftovers) = assignRegs [] (map getAmodeRep amodes) - in - - profCtrC SLIT("TICK_RET_UNBOXED_TUP") [mkIntCLit (length amodes)] `thenC` +-- ----------------------------------------------------------------------------- +-- Primitive Returns +-- Just load the return value into the right register, and return. - doTailCall amodes ret_regs - mkUnboxedTupleReturnCode - (length leftovers) {- fast args arity -} - AbsCNop {-no pending assigments-} - Nothing {-not a let-no-escape-} - False {-node doesn't point-} - ) -\end{code} +performPrimReturn :: CgRep -> CmmExpr -- The thing to return + -> Code +performPrimReturn rep amode + = do { whenC (not (isVoidArg rep)) + (stmtC (CmmAssign ret_reg amode)) + ; performReturn emitDirectReturnInstr } + where + ret_reg = dataReturnConvPrim rep -\begin{code} -performTailCall :: Id -> [StgArg] -> Code -performTailCall fun args - = getCAddrModeAndInfo fun `thenFC` \ (fun', fun_amode, lf_info) -> - getArgAmodes args `thenFC` \ arg_amodes -> - tailCallFun fun' fun_amode lf_info arg_amodes AbsCNop{- No pending assignments -} -\end{code} +-- ----------------------------------------------------------------------------- +-- Algebraic constructor returns -Generating code for a tail call to a function (or closure) +-- Constructor is built on the heap; Node is set. +-- All that remains is to do the right sort of jump. -\begin{code} -tailCallFun - :: Id -- Function - -> CAddrMode - -> LambdaFormInfo - -> [CAddrMode] -- Arguments - -> AbstractC -- Pending simultaneous assignments - -- *** GUARANTEED to contain only stack - -- assignments. - -- In ptic, we don't need to look in - -- here to discover all live regs - -> Code - -tailCallFun fun fun_amode lf_info arg_amodes pending_assts - = nodeMustPointToIt lf_info `thenFC` \ node_points -> - -- we use the name of fun', the Id from the environment, rather than - -- fun from the STG tree, in case it is a top-level name that we globalised - -- (see cgTopRhsClosure). - getEntryConvention (idName fun) lf_info - (map getAmodeRep arg_amodes) `thenFC` \ entry_conv -> - let - node_asst - = if node_points then - CAssign (CReg node) fun_amode - else - AbsCNop - - (arg_regs, finish_code, arity) - = case entry_conv of - ViaNode -> - ([], - profCtrC SLIT("TICK_ENT_VIA_NODE") [] `thenC` - absC (CJump (CMacroExpr CodePtrRep ENTRY_CODE - [CVal (nodeRel 0) DataPtrRep])) - , 0) - StdEntry lbl -> ([], absC (CJump (CLbl lbl CodePtrRep)), 0) - DirectEntry lbl arity regs -> - (regs, absC (CJump (CLbl lbl CodePtrRep)), - arity - length regs) - - -- set up for a let-no-escape if necessary - join_sp = case fun_amode of - CJoinPoint sp -> Just sp - other -> Nothing - in - doTailCall arg_amodes arg_regs (const finish_code) arity - (mkAbstractCs [node_asst,pending_assts]) join_sp node_points - - --- this generic tail call code is used for both function calls and returns. - -doTailCall - :: [CAddrMode] -- args to pass to function - -> [MagicId] -- registers to use - -> (Sequel->Code) -- code to perform jump - -> Int -- number of "fast" stack arguments - -> AbstractC -- pending assignments - -> Maybe VirtualSpOffset -- sp offset to trim stack to: - -- USED iff destination is a let-no-escape - -> Bool -- node points to the closure to enter - -> Code +emitKnownConReturnCode :: DataCon -> Code +emitKnownConReturnCode con + = emitAlgReturnCode (dataConTyCon con) + (CmmLit (mkIntCLit (dataConTagZ con))) + -- emitAlgReturnCode requires zero-indexed tag -doTailCall arg_amodes arg_regs finish_code arity pending_assts - maybe_join_sp node_points - = getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) -> - - let - (reg_arg_amodes, stk_arg_amodes) = splitAtList arg_regs arg_amodes - -- We get some stk_arg_amodes if (a) no regs, or - -- (b) args beyond arity - - reg_arg_assts - = mkAbstractCs (zipWithEqual "assign_to_reg2" - assign_to_reg arg_regs reg_arg_amodes) - - assign_to_reg reg_id amode = CAssign (CReg reg_id) amode - - join_sp = case maybe_join_sp of - Just sp -> ASSERT(not (args_sp > sp)) sp - -- If ASSERTion fails: Oops: the join point has *lower* - -- stack ptrs than the continuation Note that we take - -- the Sp point without the return address here. The - -- return address is put on by the let-no-escapey thing - -- when it finishes. - Nothing -> args_sp - - (fast_stk_amodes, tagged_stk_amodes) = - splitAt arity stk_arg_amodes - - -- eager blackholing, at the end of the basic block. - (r1_tmp_asst, bh_asst) - = case sequel of -#if 0 - -- no: UpdateCode doesn't tell us that we're in a thunk's entry code. - -- we might be in a case continuation later down the line. Also, - -- we might have pushed a return address on the stack, if we're in - -- a case scrut, and still be in the thunk's entry code. - UpdateCode -> - (CAssign node_save nodeReg, - CAssign (CVal (CIndex node_save (mkIntCLit 0) PtrRep) - PtrRep) - (CLbl mkBlackHoleInfoTableLabel DataPtrRep)) - where - node_save = CTemp (mkPseudoUnique1 2) DataPtrRep -#endif - _ -> (AbsCNop, AbsCNop) - in - -- We can omit tags on the arguments passed to the fast entry point, - -- but we have to be careful to fill in the tags on any *extra* - -- arguments we're about to push on the stack. - - mkTaggedStkAmodes join_sp tagged_stk_amodes `thenFC` - \ (fast_sp, tagged_arg_assts, tag_assts) -> - - mkTaggedStkAmodes fast_sp fast_stk_amodes `thenFC` - \ (final_sp, fast_arg_assts, _) -> - - -- adjust the high-water mark if necessary - adjustStackHW final_sp `thenC` - - -- The stack space for the pushed return addess, - -- with any args pushed on top, is recorded in final_sp. - - -- Do the simultaneous assignments, - doSimAssts (mkAbstractCs [r1_tmp_asst, - pending_assts, - reg_arg_assts, - fast_arg_assts, - tagged_arg_assts, - tag_assts]) `thenC` - absC bh_asst `thenC` - - -- push a return address if necessary - -- (after the assignments above, in case we clobber a live - -- stack location) - - -- DONT push the return address when we're about - -- to jump to a let-no-escape: the final tail call - -- in the let-no-escape will do this. - (if (maybeToBool maybe_join_sp) - then nopC - else pushReturnAddress eob) `thenC` - - -- Final adjustment of Sp/Hp - adjustSpAndHp final_sp `thenC` +emitAlgReturnCode :: TyCon -> CmmExpr -> Code +-- emitAlgReturnCode is used both by emitKnownConReturnCode, +-- and by by PrimOps that return enumerated types (i.e. +-- all the comparison operators). +emitAlgReturnCode tycon tag + = do { case ctrlReturnConvAlg tycon of + VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz + ; emitVectoredReturnInstr tag } + UnvectoredReturn _ -> emitDirectReturnInstr + } + + +-- --------------------------------------------------------------------------- +-- Unboxed tuple returns + +-- These are a bit like a normal tail call, except that: +-- +-- - The tail-call target is an info table on the stack +-- +-- - We separate stack arguments into pointers and non-pointers, +-- to make it easier to leave things in a sane state for a heap check. +-- This is OK because we can never partially-apply an unboxed tuple, +-- unlike a function. The same technique is used when calling +-- let-no-escape functions, because they also can't be partially +-- applied. + +returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code +returnUnboxedTuple amodes + = do { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo + ; tickyUnboxedTupleReturn (length amodes) + ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes + ; emitSimultaneously assts + ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr } + +pushUnboxedTuple :: VirtualSpOffset -- Sp at which to start pushing + -> [(CgRep, CmmExpr)] -- amodes of the components + -> FCode (VirtualSpOffset, -- final Sp + CmmStmts) -- assignments (regs+stack) + +pushUnboxedTuple sp [] + = return (sp, noStmts) +pushUnboxedTuple sp amodes + = do { let (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes - -- Now decide about semi-tagging - let - semi_tagging_on = opt_DoSemiTagging - in - case (semi_tagging_on, arg_amodes, node_points, sequel) of + -- separate the rest of the args into pointers and non-pointers + (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes + reg_arg_assts = assignToRegs reg_arg_amodes + + -- push ptrs, then nonptrs, on the stack + ; (ptr_sp, ptr_assts) <- mkStkAmodes sp ptr_args + ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args + + ; returnFC (final_sp, + reg_arg_assts `plusStmts` + ptr_assts `plusStmts` nptr_assts) } + + +-- ----------------------------------------------------------------------------- +-- Returning unboxed tuples. This is mainly to support _ccall_GC_, where +-- we want to do things in a slightly different order to normal: +-- +-- - push return address +-- - adjust stack pointer +-- - r = call(args...) +-- - assign regs for unboxed tuple (usually just R1 = r) +-- - return to continuation +-- +-- The return address (i.e. stack frame) must be on the stack before +-- doing the call in case the call ends up in the garbage collector. +-- +-- Sadly, the information about the continuation is lost after we push it +-- (in order to avoid pushing it again), so we end up doing a needless +-- indirect jump (ToDo). + +ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code +ccallReturnUnboxedTuple amodes before_jump + = do { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo + + -- Push a return address if necessary + ; pushReturnAddress eob + ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack) + (do { adjustSpAndHp args_sp + ; before_jump + ; returnUnboxedTuple amodes }) + } + +-- ----------------------------------------------------------------------------- +-- Calling an out-of-line primop - -- - -- *************** The semi-tagging case *************** - -- - {- XXX leave this out for now. - ( True, [], True, CaseAlts _ (Just (st_alts, maybe_deflt_join_details))) -> - - -- Whoppee! Semi-tagging rules OK! - -- (a) semi-tagging is switched on - -- (b) there are no arguments, - -- (c) Node points to the closure - -- (d) we have a case-alternative sequel with - -- some visible alternatives - - -- Why is test (c) necessary? - -- Usually Node will point to it at this point, because we're - -- scrutinsing something which is either a thunk or a - -- constructor. - -- But not always! The example I came across is when we have - -- a top-level Double: - -- lit.3 = D# 3.000 - -- ... (case lit.3 of ...) ... - -- Here, lit.3 is built as a re-entrant thing, which you must enter. - -- (OK, the simplifier should have eliminated this, but it's - -- easy to deal with the case anyway.) - let - join_details_to_code (load_regs_and_profiling_code, join_lbl) - = load_regs_and_profiling_code `mkAbsCStmts` - CJump (CLbl join_lbl CodePtrRep) - - semi_tagged_alts = [ (mkMachInt (fromInt (tag - fIRST_TAG)), - join_details_to_code join_details) - | (tag, join_details) <- st_alts - ] - - enter_jump - -- Enter Node (we know infoptr will have the info ptr in it)! - = mkAbstractCs [ - CCallProfCtrMacro SLIT("RET_SEMI_FAILED") - [CMacroExpr IntRep INFO_TAG [CReg infoptr]], - CJump (CMacroExpr CodePtrRep ENTRY_CODE [CReg infoptr]) ] - in - -- Final switch - absC (mkAbstractCs [ - CAssign (CReg infoptr) - (CVal (NodeRel zeroOff) DataPtrRep), - - case maybe_deflt_join_details of - Nothing -> - CSwitch (CMacroExpr IntRep INFO_TAG [CReg infoptr]) - (semi_tagged_alts) - (enter_jump) - Just (_, details) -> - CSwitch (CMacroExpr IntRep EVAL_TAG [CReg infoptr]) - [(mkMachInt 0, enter_jump)] - (CSwitch - (CMacroExpr IntRep INFO_TAG [CReg infoptr]) - (semi_tagged_alts) - (join_details_to_code details)) - ]) - -} +tailCallPrimOp :: PrimOp -> [StgArg] -> Code +tailCallPrimOp op args + = do { -- We're going to perform a normal-looking tail call, + -- except that *all* the arguments will be in registers. + -- Hence the ASSERT( null leftovers ) + arg_amodes <- getArgAmodes args + ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes + jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op) + + ; ASSERT(null leftovers) -- no stack-resident args + emitSimultaneously (assignToRegs arg_regs) + + ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo + ; doFinalJump args_sp False{-not a LNE-} jump_to_primop } + +-- ----------------------------------------------------------------------------- +-- Return Addresses + +-- We always push the return address just before performing a tail call +-- or return. The reason we leave it until then is because the stack +-- slot that the return address is to go into might contain something +-- useful. +-- +-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a +-- case expression and the return address is still to be pushed. +-- +-- There are cases where it doesn't look necessary to push the return +-- address: for example, just before doing a return to a known +-- continuation. However, the continuation will expect to find the +-- return address on the stack in case it needs to do a heap check. - -- - -- *************** The non-semi-tagging case *************** - -- - other -> finish_code sequel -\end{code} +pushReturnAddress :: EndOfBlockInfo -> Code -%************************************************************************ -%* * -\subsection[tailCallPrimOp]{@tailCallPrimOp@} -%* * -%************************************************************************ +pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False)) + = do { sp_rel <- getSpRelOffset args_sp + ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) } -\begin{code} -tailCallPrimOp :: PrimOp -> [StgArg] -> Code -tailCallPrimOp op args = - -- we're going to perform a normal-looking tail call, - -- except that *all* the arguments will be in registers. - getArgAmodes args `thenFC` \ arg_amodes -> - let (arg_regs, leftovers) = assignAllRegs [] (map getAmodeRep arg_amodes) - in - ASSERT(null leftovers) -- no stack-resident args - doTailCall arg_amodes arg_regs - (const (absC (CJump (CLbl (mkRtsPrimOpLabel op) CodePtrRep)))) - 0 {- arity shouldn't matter, all args in regs -} - AbsCNop {- no pending assignments -} - Nothing {- not a let-no-escape -} - False {- node doesn't point -} -\end{code} +-- For a polymorphic case, we have two return addresses to push: the case +-- return, and stg_seq_frame_info which turns a possible vectored return +-- into a direct one. +pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True)) + = do { sp_rel <- getSpRelOffset (args_sp-1) + ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) + ; sp_rel <- getSpRelOffset args_sp + ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) } -%************************************************************************ -%* * -\subsection[doSimAssts]{@doSimAssts@} -%* * -%************************************************************************ +pushReturnAddress _ = nopC -@doSimAssts@ happens at the end of every block of code. -They are separate because we sometimes do some jiggery-pokery in between. +-- ----------------------------------------------------------------------------- +-- Misc. -\begin{code} -doSimAssts :: AbstractC -> Code +jumpToLbl :: CLabel -> Code +-- Passes no argument to the destination procedure +jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}]) -doSimAssts sim_assts - = absC (CSimultaneous sim_assts) +assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts +assignToRegs reg_args + = mkStmts [ CmmAssign (CmmGlobal reg_id) expr + | (expr, reg_id) <- reg_args ] \end{code} + %************************************************************************ %* * -\subsection[retAddr]{@Return Addresses@} +\subsection[CgStackery-adjust]{Adjusting the stack pointers} %* * %************************************************************************ -We always push the return address just before performing a tail call -or return. The reason we leave it until then is because the stack -slot that the return address is to go into might contain something -useful. - -If the end of block info is CaseAlts, then we're in the scrutinee of a -case expression and the return address is still to be pushed. +This function adjusts the stack and heap pointers just before a tail +call or return. The stack pointer is adjusted to its final position +(i.e. to point to the last argument for a tail call, or the activation +record for a return). The heap pointer may be moved backwards, in +cases where we overallocated at the beginning of the basic block (see +CgCase.lhs for discussion). -There are cases where it doesn't look necessary to push the return -address: for example, just before doing a return to a known -continuation. However, the continuation will expect to find the -return address on the stack in case it needs to do a heap check. +These functions {\em do not} deal with high-water-mark adjustment. +That's done by functions which allocate stack space. \begin{code} -pushReturnAddress :: EndOfBlockInfo -> Code -pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts amode _)) = - getSpRelOffset args_sp `thenFC` \ sp_rel -> - absC (CAssign (CVal sp_rel RetRep) amode) -pushReturnAddress (EndOfBlockInfo args_sp sequel@(SeqFrame amode _)) = - pushSeqFrame args_sp `thenFC` \ ret_sp -> - getSpRelOffset ret_sp `thenFC` \ sp_rel -> - absC (CAssign (CVal sp_rel RetRep) amode) -pushReturnAddress _ = nopC +adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr + -> Code +adjustSpAndHp newRealSp + = do { -- Adjust stack, if necessary. + -- NB: the conditional on the monad-carried realSp + -- is out of line (via codeOnly), to avoid a black hole + ; new_sp <- getSpRelOffset newRealSp + ; checkedAbsC (CmmAssign spReg new_sp) -- Will generate no code in the case + ; setRealSp newRealSp -- where realSp==newRealSp + + -- Adjust heap. The virtual heap pointer may be less than the real Hp + -- because the latter was advanced to deal with the worst-case branch + -- of the code, and we may be in a better-case branch. In that case, + -- move the real Hp *back* and retract some ticky allocation count. + ; hp_usg <- getHpUsage + ; let rHp = realHp hp_usg + vHp = virtHp hp_usg + ; new_hp <- getHpRelOffset vHp + ; checkedAbsC (CmmAssign hpReg new_hp) -- Generates nothing when vHp==rHp + ; tickyAllocHeap (vHp - rHp) -- ...ditto + ; setRealHp vHp + } \end{code}