2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 % $Id: CgTailCall.lhs,v 1.41 2004/11/26 16:20:12 simonmar Exp $
6 %********************************************************
8 \section[CgTailCall]{Tail calls: converting @StgApps@}
10 %********************************************************
14 cgTailCall, performTailCall,
15 performReturn, performPrimReturn,
16 emitKnownConReturnCode, emitAlgReturnCode,
17 returnUnboxedTuple, ccallReturnUnboxedTuple,
24 #include "HsVersions.h"
27 import CgBindery ( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape,
28 idInfoToAmode, cgIdInfoId, cgIdInfoLF,
30 import CgInfoTbls ( entryCode, emitDirectReturnInstr, dataConTagZ,
31 emitVectoredReturnInstr, closureInfoPtr )
33 import CgStackery ( setRealSp, mkStkAmodes, adjustStackHW,
35 import CgHeapery ( setRealHp, getHpRelOffset )
36 import CgUtils ( emitSimultaneously )
39 import SMRep ( CgRep, isVoidArg, separateByPtrFollowness )
42 import CLabel ( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel )
43 import Type ( isUnLiftedType )
44 import Id ( Id, idName, idUnique, idType )
45 import DataCon ( DataCon, dataConTyCon )
46 import StgSyn ( StgArg )
47 import TyCon ( TyCon )
48 import PrimOp ( PrimOp )
53 -----------------------------------------------------------------------------
56 cgTailCall :: Id -> [StgArg] -> Code
58 -- Here's the code we generate for a tail call. (NB there may be no
59 -- arguments, in which case this boils down to just entering a variable.)
61 -- * Put args in the top locations of the stack.
62 -- * Adjust the stack ptr
63 -- * Make R1 point to the function closure if necessary.
64 -- * Perform the call.
66 -- Things to be careful about:
68 -- * Don't overwrite stack locations before you have finished with
69 -- them (remember you need the function and the as-yet-unmoved
71 -- * Preferably, generate no code to replace x by x on the stack (a
72 -- common situation in tail-recursion).
73 -- * Adjust the stack high water mark appropriately.
75 -- Treat unboxed locals exactly like literals (above) except use the addr
76 -- mode for the local instead of (CLit lit) in the assignment.
79 = do { fun_info <- getCgIdInfo fun
81 ; if isUnLiftedType (idType fun)
82 then -- Primitive return
84 do { fun_amode <- idInfoToAmode fun_info
85 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
87 else -- Normal case, fun is boxed
88 do { arg_amodes <- getArgAmodes args
89 ; performTailCall fun_info arg_amodes noStmts }
93 -- -----------------------------------------------------------------------------
94 -- The guts of a tail-call
97 :: CgIdInfo -- The function
98 -> [(CgRep,CmmExpr)] -- Args
99 -> CmmStmts -- Pending simultaneous assignments
100 -- *** GUARANTEED to contain only stack assignments.
103 performTailCall fun_info arg_amodes pending_assts
104 | Just join_sp <- maybeLetNoEscape fun_info
105 = -- A let-no-escape is slightly different, because we
106 -- arrange the stack arguments into pointers and non-pointers
107 -- to make the heap check easier. The tail-call sequence
108 -- is very similar to returning an unboxed tuple, so we
110 do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
111 ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
112 ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
113 ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
116 = do { fun_amode <- idInfoToAmode fun_info
117 ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
118 opt_node_asst | nodeMustPointToIt lf_info = node_asst
119 | otherwise = noStmts
120 ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
121 ; dflags <- getDynFlags
123 ; case (getCallMethod dflags fun_name lf_info (length arg_amodes)) of
125 -- Node must always point to things we enter
127 { emitSimultaneously (node_asst `plusStmts` pending_assts)
128 ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
129 ; doFinalJump sp False (stmtC (CmmJump target [])) }
131 -- A function, but we have zero arguments. It is already in WHNF,
132 -- so we can just return it.
133 -- As with any return, Node must point to it.
135 { emitSimultaneously (node_asst `plusStmts` pending_assts)
136 ; doFinalJump sp False emitDirectReturnInstr }
138 -- A real constructor. Don't bother entering it,
139 -- just do the right sort of return instead.
140 -- As with any return, Node must point to it.
142 { emitSimultaneously (node_asst `plusStmts` pending_assts)
143 ; doFinalJump sp False (emitKnownConReturnCode con) }
146 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
147 ; doFinalJump sp False (jumpToLbl lbl) }
149 -- A slow function call via the RTS apply routines
150 -- Node must definitely point to the thing
152 { let (apply_lbl, new_amodes) = constructSlowCall arg_amodes
154 -- Fill in all the arguments on the stack
155 ; (final_sp,stk_assts) <- mkStkAmodes sp new_amodes
157 ; emitSimultaneously (node_asst `plusStmts` stk_assts
158 `plusStmts` pending_assts)
160 ; when (not (null arg_amodes)) $ do
161 { if (isKnownFun lf_info)
162 then tickyKnownCallTooFewArgs
163 else tickyUnknownCall
164 ; tickySlowCallPat (map fst arg_amodes)
167 ; doFinalJump (final_sp + 1)
168 -- Add one, because the stg_ap functions
169 -- expect there to be a free slot on the stk
170 False (jumpToLbl apply_lbl)
173 -- A direct function call (possibly with some left-over arguments)
174 DirectEntry lbl arity -> do
176 -- The args beyond the arity go straight on the stack
177 (arity_args, extra_stk_args) = splitAt arity arg_amodes
179 -- First chunk of args go in registers
180 (reg_arg_amodes, stk_args) = assignCallRegs arity_args
182 -- Any "extra" arguments are placed in frames on the
183 -- stack after the other arguments.
184 slow_stk_args = slowArgs extra_stk_args
186 reg_assts = assignToRegs reg_arg_amodes
188 ; if null slow_stk_args
189 then tickyKnownCallExact
190 else do tickyKnownCallExtraArgs
191 tickySlowCallPat (map fst extra_stk_args)
193 ; (final_sp, stk_assts) <- mkStkAmodes sp
194 (stk_args ++ slow_stk_args)
196 ; emitSimultaneously (opt_node_asst `plusStmts`
197 reg_assts `plusStmts`
198 stk_assts `plusStmts`
201 ; doFinalJump final_sp False (jumpToLbl lbl) }
204 fun_name = idName (cgIdInfoId fun_info)
205 lf_info = cgIdInfoLF fun_info
209 -- -----------------------------------------------------------------------------
210 -- The final clean-up before we do a jump at the end of a basic block.
211 -- This code is shared by tail-calls and returns.
213 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
214 doFinalJump final_sp is_let_no_escape jump_code
215 = do { -- Adjust the high-water mark if necessary
216 adjustStackHW final_sp
218 -- Push a return address if necessary (after the assignments
219 -- above, in case we clobber a live stack location)
221 -- DONT push the return address when we're about to jump to a
222 -- let-no-escape: the final tail call in the let-no-escape
224 ; eob <- getEndOfBlockInfo
225 ; whenC (not is_let_no_escape) (pushReturnAddress eob)
227 -- Final adjustment of Sp/Hp
228 ; adjustSpAndHp final_sp
233 -- -----------------------------------------------------------------------------
234 -- A general return (just a special case of doFinalJump, above)
236 performReturn :: Code -- The code to execute to actually do the return
239 performReturn finish_code
240 = do { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
241 ; doFinalJump args_sp False{-not a LNE-} finish_code }
243 -- -----------------------------------------------------------------------------
245 -- Just load the return value into the right register, and return.
247 performPrimReturn :: CgRep -> CmmExpr -- The thing to return
249 performPrimReturn rep amode
250 = do { whenC (not (isVoidArg rep))
251 (stmtC (CmmAssign ret_reg amode))
252 ; performReturn emitDirectReturnInstr }
254 ret_reg = dataReturnConvPrim rep
256 -- -----------------------------------------------------------------------------
257 -- Algebraic constructor returns
259 -- Constructor is built on the heap; Node is set.
260 -- All that remains is to do the right sort of jump.
262 emitKnownConReturnCode :: DataCon -> Code
263 emitKnownConReturnCode con
264 = emitAlgReturnCode (dataConTyCon con)
265 (CmmLit (mkIntCLit (dataConTagZ con)))
266 -- emitAlgReturnCode requires zero-indexed tag
268 emitAlgReturnCode :: TyCon -> CmmExpr -> Code
269 -- emitAlgReturnCode is used both by emitKnownConReturnCode,
270 -- and by by PrimOps that return enumerated types (i.e.
271 -- all the comparison operators).
272 emitAlgReturnCode tycon tag
273 = do { case ctrlReturnConvAlg tycon of
274 VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
275 ; emitVectoredReturnInstr tag }
276 UnvectoredReturn _ -> emitDirectReturnInstr
280 -- ---------------------------------------------------------------------------
281 -- Unboxed tuple returns
283 -- These are a bit like a normal tail call, except that:
285 -- - The tail-call target is an info table on the stack
287 -- - We separate stack arguments into pointers and non-pointers,
288 -- to make it easier to leave things in a sane state for a heap check.
289 -- This is OK because we can never partially-apply an unboxed tuple,
290 -- unlike a function. The same technique is used when calling
291 -- let-no-escape functions, because they also can't be partially
294 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
295 returnUnboxedTuple amodes
296 = do { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
297 ; tickyUnboxedTupleReturn (length amodes)
298 ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
299 ; emitSimultaneously assts
300 ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }
302 pushUnboxedTuple :: VirtualSpOffset -- Sp at which to start pushing
303 -> [(CgRep, CmmExpr)] -- amodes of the components
304 -> FCode (VirtualSpOffset, -- final Sp
305 CmmStmts) -- assignments (regs+stack)
307 pushUnboxedTuple sp []
308 = return (sp, noStmts)
309 pushUnboxedTuple sp amodes
310 = do { let (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
312 -- separate the rest of the args into pointers and non-pointers
313 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
314 reg_arg_assts = assignToRegs reg_arg_amodes
316 -- push ptrs, then nonptrs, on the stack
317 ; (ptr_sp, ptr_assts) <- mkStkAmodes sp ptr_args
318 ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
320 ; returnFC (final_sp,
321 reg_arg_assts `plusStmts`
322 ptr_assts `plusStmts` nptr_assts) }
325 -- -----------------------------------------------------------------------------
326 -- Returning unboxed tuples. This is mainly to support _ccall_GC_, where
327 -- we want to do things in a slightly different order to normal:
329 -- - push return address
330 -- - adjust stack pointer
331 -- - r = call(args...)
332 -- - assign regs for unboxed tuple (usually just R1 = r)
333 -- - return to continuation
335 -- The return address (i.e. stack frame) must be on the stack before
336 -- doing the call in case the call ends up in the garbage collector.
338 -- Sadly, the information about the continuation is lost after we push it
339 -- (in order to avoid pushing it again), so we end up doing a needless
340 -- indirect jump (ToDo).
342 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
343 ccallReturnUnboxedTuple amodes before_jump
344 = do { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
346 -- Push a return address if necessary
347 ; pushReturnAddress eob
348 ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
349 (do { adjustSpAndHp args_sp
351 ; returnUnboxedTuple amodes })
354 -- -----------------------------------------------------------------------------
355 -- Calling an out-of-line primop
357 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
358 tailCallPrimOp op args
359 = do { -- We're going to perform a normal-looking tail call,
360 -- except that *all* the arguments will be in registers.
361 -- Hence the ASSERT( null leftovers )
362 arg_amodes <- getArgAmodes args
363 ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
364 jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
366 ; ASSERT(null leftovers) -- no stack-resident args
367 emitSimultaneously (assignToRegs arg_regs)
369 ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
370 ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
372 -- -----------------------------------------------------------------------------
375 -- | We always push the return address just before performing a tail call
376 -- or return. The reason we leave it until then is because the stack
377 -- slot that the return address is to go into might contain something
380 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
381 -- case expression and the return address is still to be pushed.
383 -- There are cases where it doesn't look necessary to push the return
384 -- address: for example, just before doing a return to a known
385 -- continuation. However, the continuation will expect to find the
386 -- return address on the stack in case it needs to do a heap check.
388 pushReturnAddress :: EndOfBlockInfo -> Code
390 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
391 = do { sp_rel <- getSpRelOffset args_sp
392 ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
394 -- For a polymorphic case, we have two return addresses to push: the case
395 -- return, and stg_seq_frame_info which turns a possible vectored return
396 -- into a direct one.
397 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
398 = do { sp_rel <- getSpRelOffset (args_sp-1)
399 ; stmtC (CmmStore sp_rel (mkLblExpr lbl))
400 ; sp_rel <- getSpRelOffset args_sp
401 ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }
403 pushReturnAddress _ = nopC
405 -- -----------------------------------------------------------------------------
408 jumpToLbl :: CLabel -> Code
409 -- Passes no argument to the destination procedure
410 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
412 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
413 assignToRegs reg_args
414 = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
415 | (expr, reg_id) <- reg_args ]
419 %************************************************************************
421 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
423 %************************************************************************
425 This function adjusts the stack and heap pointers just before a tail
426 call or return. The stack pointer is adjusted to its final position
427 (i.e. to point to the last argument for a tail call, or the activation
428 record for a return). The heap pointer may be moved backwards, in
429 cases where we overallocated at the beginning of the basic block (see
430 CgCase.lhs for discussion).
432 These functions {\em do not} deal with high-water-mark adjustment.
433 That's done by functions which allocate stack space.
436 adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr
438 adjustSpAndHp newRealSp
439 = do { -- Adjust stack, if necessary.
440 -- NB: the conditional on the monad-carried realSp
441 -- is out of line (via codeOnly), to avoid a black hole
442 ; new_sp <- getSpRelOffset newRealSp
443 ; checkedAbsC (CmmAssign spReg new_sp) -- Will generate no code in the case
444 ; setRealSp newRealSp -- where realSp==newRealSp
446 -- Adjust heap. The virtual heap pointer may be less than the real Hp
447 -- because the latter was advanced to deal with the worst-case branch
448 -- of the code, and we may be in a better-case branch. In that case,
449 -- move the real Hp *back* and retract some ticky allocation count.
450 ; hp_usg <- getHpUsage
451 ; let rHp = realHp hp_usg
453 ; new_hp <- getHpRelOffset vHp
454 ; checkedAbsC (CmmAssign hpReg new_hp) -- Generates nothing when vHp==rHp
455 ; tickyAllocHeap (vHp - rHp) -- ...ditto