2 % (c) The University of Glasgow 2006
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 % Code generation for tail calls.
9 cgTailCall, performTailCall,
10 performReturn, performPrimReturn,
11 returnUnboxedTuple, ccallReturnUnboxedTuple,
18 #include "HsVersions.h"
41 -----------------------------------------------------------------------------
44 cgTailCall :: Id -> [StgArg] -> Code
46 -- Here's the code we generate for a tail call. (NB there may be no
47 -- arguments, in which case this boils down to just entering a variable.)
49 -- * Put args in the top locations of the stack.
50 -- * Adjust the stack ptr
51 -- * Make R1 point to the function closure if necessary.
52 -- * Perform the call.
54 -- Things to be careful about:
56 -- * Don't overwrite stack locations before you have finished with
57 -- them (remember you need the function and the as-yet-unmoved
59 -- * Preferably, generate no code to replace x by x on the stack (a
60 -- common situation in tail-recursion).
61 -- * Adjust the stack high water mark appropriately.
63 -- Treat unboxed locals exactly like literals (above) except use the addr
64 -- mode for the local instead of (CLit lit) in the assignment.
67 = do { fun_info <- getCgIdInfo fun
69 ; if isUnLiftedType (idType fun)
70 then -- Primitive return
72 do { fun_amode <- idInfoToAmode fun_info
73 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
75 else -- Normal case, fun is boxed
76 do { arg_amodes <- getArgAmodes args
77 ; performTailCall fun_info arg_amodes noStmts }
81 -- -----------------------------------------------------------------------------
82 -- The guts of a tail-call
85 :: CgIdInfo -- The function
86 -> [(CgRep,CmmExpr)] -- Args
87 -> CmmStmts -- Pending simultaneous assignments
88 -- *** GUARANTEED to contain only stack assignments.
91 performTailCall fun_info arg_amodes pending_assts
92 | Just join_sp <- maybeLetNoEscape fun_info
93 = -- A let-no-escape is slightly different, because we
94 -- arrange the stack arguments into pointers and non-pointers
95 -- to make the heap check easier. The tail-call sequence
96 -- is very similar to returning an unboxed tuple, so we
98 do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
99 ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
100 ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
101 ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
104 = do { fun_amode <- idInfoToAmode fun_info
105 ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
106 opt_node_asst | nodeMustPointToIt lf_info = node_asst
107 | otherwise = noStmts
108 ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
109 ; this_pkg <- getThisPackage
111 ; case (getCallMethod this_pkg fun_name lf_info (length arg_amodes)) of
113 -- Node must always point to things we enter
115 { emitSimultaneously (node_asst `plusStmts` pending_assts)
116 ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
117 ; doFinalJump sp False (stmtC (CmmJump target [])) }
119 -- A function, but we have zero arguments. It is already in WHNF,
120 -- so we can just return it.
121 -- As with any return, Node must point to it.
123 { emitSimultaneously (node_asst `plusStmts` pending_assts)
124 ; doFinalJump sp False emitReturnInstr }
126 -- A real constructor. Don't bother entering it,
127 -- just do the right sort of return instead.
128 -- As with any return, Node must point to it.
130 { emitSimultaneously (node_asst `plusStmts` pending_assts)
131 ; doFinalJump sp False emitReturnInstr }
134 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
135 ; doFinalJump sp False (jumpToLbl lbl) }
137 -- A slow function call via the RTS apply routines
138 -- Node must definitely point to the thing
140 { when (not (null arg_amodes)) $ do
141 { if (isKnownFun lf_info)
142 then tickyKnownCallTooFewArgs
143 else tickyUnknownCall
144 ; tickySlowCallPat (map fst arg_amodes)
147 ; let (apply_lbl, args, extra_args)
148 = constructSlowCall arg_amodes
150 ; directCall sp apply_lbl args extra_args
151 (node_asst `plusStmts` pending_assts)
154 -- A direct function call (possibly with some left-over arguments)
155 DirectEntry lbl arity -> do
156 { if arity == length arg_amodes
157 then tickyKnownCallExact
158 else do tickyKnownCallExtraArgs
159 tickySlowCallPat (map fst (drop arity arg_amodes))
162 -- The args beyond the arity go straight on the stack
163 (arity_args, extra_args) = splitAt arity arg_amodes
165 ; directCall sp lbl arity_args extra_args
166 (opt_node_asst `plusStmts` pending_assts)
170 fun_name = idName (cgIdInfoId fun_info)
171 lf_info = cgIdInfoLF fun_info
175 directCall sp lbl args extra_args assts = do
177 -- First chunk of args go in registers
178 (reg_arg_amodes, stk_args) = assignCallRegs args
180 -- Any "extra" arguments are placed in frames on the
181 -- stack after the other arguments.
182 slow_stk_args = slowArgs extra_args
184 reg_assts = assignToRegs reg_arg_amodes
186 (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
188 emitSimultaneously (reg_assts `plusStmts`
189 stk_assts `plusStmts`
192 doFinalJump final_sp False (jumpToLbl lbl)
194 -- -----------------------------------------------------------------------------
195 -- The final clean-up before we do a jump at the end of a basic block.
196 -- This code is shared by tail-calls and returns.
198 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
199 doFinalJump final_sp is_let_no_escape jump_code
200 = do { -- Adjust the high-water mark if necessary
201 adjustStackHW final_sp
203 -- Push a return address if necessary (after the assignments
204 -- above, in case we clobber a live stack location)
206 -- DONT push the return address when we're about to jump to a
207 -- let-no-escape: the final tail call in the let-no-escape
209 ; eob <- getEndOfBlockInfo
210 ; whenC (not is_let_no_escape) (pushReturnAddress eob)
212 -- Final adjustment of Sp/Hp
213 ; adjustSpAndHp final_sp
218 -- ----------------------------------------------------------------------------
219 -- A general return (just a special case of doFinalJump, above)
221 performReturn :: Code -- The code to execute to actually do the return
224 performReturn finish_code
225 = do { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
226 ; doFinalJump args_sp False{-not a LNE-} finish_code }
228 -- ----------------------------------------------------------------------------
230 -- Just load the return value into the right register, and return.
232 performPrimReturn :: CgRep -> CmmExpr -- The thing to return
234 performPrimReturn rep amode
235 = do { whenC (not (isVoidArg rep))
236 (stmtC (CmmAssign ret_reg amode))
237 ; performReturn emitReturnInstr }
239 ret_reg = dataReturnConvPrim rep
241 -- ---------------------------------------------------------------------------
242 -- Unboxed tuple returns
244 -- These are a bit like a normal tail call, except that:
246 -- - The tail-call target is an info table on the stack
248 -- - We separate stack arguments into pointers and non-pointers,
249 -- to make it easier to leave things in a sane state for a heap check.
250 -- This is OK because we can never partially-apply an unboxed tuple,
251 -- unlike a function. The same technique is used when calling
252 -- let-no-escape functions, because they also can't be partially
255 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
256 returnUnboxedTuple amodes
257 = do { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
258 ; tickyUnboxedTupleReturn (length amodes)
259 ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
260 ; emitSimultaneously assts
261 ; doFinalJump final_sp False{-not a LNE-} emitReturnInstr }
263 pushUnboxedTuple :: VirtualSpOffset -- Sp at which to start pushing
264 -> [(CgRep, CmmExpr)] -- amodes of the components
265 -> FCode (VirtualSpOffset, -- final Sp
266 CmmStmts) -- assignments (regs+stack)
268 pushUnboxedTuple sp []
269 = return (sp, noStmts)
270 pushUnboxedTuple sp amodes
271 = do { let (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
273 -- separate the rest of the args into pointers and non-pointers
274 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
275 reg_arg_assts = assignToRegs reg_arg_amodes
277 -- push ptrs, then nonptrs, on the stack
278 ; (ptr_sp, ptr_assts) <- mkStkAmodes sp ptr_args
279 ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
281 ; returnFC (final_sp,
282 reg_arg_assts `plusStmts`
283 ptr_assts `plusStmts` nptr_assts) }
286 -- -----------------------------------------------------------------------------
287 -- Returning unboxed tuples. This is mainly to support _ccall_GC_, where
288 -- we want to do things in a slightly different order to normal:
290 -- - push return address
291 -- - adjust stack pointer
292 -- - r = call(args...)
293 -- - assign regs for unboxed tuple (usually just R1 = r)
294 -- - return to continuation
296 -- The return address (i.e. stack frame) must be on the stack before
297 -- doing the call in case the call ends up in the garbage collector.
299 -- Sadly, the information about the continuation is lost after we push it
300 -- (in order to avoid pushing it again), so we end up doing a needless
301 -- indirect jump (ToDo).
303 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
304 ccallReturnUnboxedTuple amodes before_jump
305 = do { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
307 -- Push a return address if necessary
308 ; pushReturnAddress eob
309 ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
310 (do { adjustSpAndHp args_sp
312 ; returnUnboxedTuple amodes })
315 -- -----------------------------------------------------------------------------
316 -- Calling an out-of-line primop
318 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
319 tailCallPrimOp op args
320 = do { -- We're going to perform a normal-looking tail call,
321 -- except that *all* the arguments will be in registers.
322 -- Hence the ASSERT( null leftovers )
323 arg_amodes <- getArgAmodes args
324 ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
325 jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
327 ; ASSERT(null leftovers) -- no stack-resident args
328 emitSimultaneously (assignToRegs arg_regs)
330 ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
331 ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
333 -- -----------------------------------------------------------------------------
336 -- We always push the return address just before performing a tail call
337 -- or return. The reason we leave it until then is because the stack
338 -- slot that the return address is to go into might contain something
341 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
342 -- case expression and the return address is still to be pushed.
344 -- There are cases where it doesn't look necessary to push the return
345 -- address: for example, just before doing a return to a known
346 -- continuation. However, the continuation will expect to find the
347 -- return address on the stack in case it needs to do a heap check.
349 pushReturnAddress :: EndOfBlockInfo -> Code
351 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _))
352 = do { sp_rel <- getSpRelOffset args_sp
353 ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
355 pushReturnAddress _ = nopC
357 -- -----------------------------------------------------------------------------
360 jumpToLbl :: CLabel -> Code
361 -- Passes no argument to the destination procedure
362 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
364 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
365 assignToRegs reg_args
366 = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
367 | (expr, reg_id) <- reg_args ]
371 %************************************************************************
373 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
375 %************************************************************************
377 This function adjusts the stack and heap pointers just before a tail
378 call or return. The stack pointer is adjusted to its final position
379 (i.e. to point to the last argument for a tail call, or the activation
380 record for a return). The heap pointer may be moved backwards, in
381 cases where we overallocated at the beginning of the basic block (see
382 CgCase.lhs for discussion).
384 These functions {\em do not} deal with high-water-mark adjustment.
385 That's done by functions which allocate stack space.
388 adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr
390 adjustSpAndHp newRealSp
391 = do { -- Adjust stack, if necessary.
392 -- NB: the conditional on the monad-carried realSp
393 -- is out of line (via codeOnly), to avoid a black hole
394 ; new_sp <- getSpRelOffset newRealSp
395 ; checkedAbsC (CmmAssign spReg new_sp) -- Will generate no code in the case
396 ; setRealSp newRealSp -- where realSp==newRealSp
398 -- Adjust heap. The virtual heap pointer may be less than the real Hp
399 -- because the latter was advanced to deal with the worst-case branch
400 -- of the code, and we may be in a better-case branch. In that case,
401 -- move the real Hp *back* and retract some ticky allocation count.
402 ; hp_usg <- getHpUsage
403 ; let rHp = realHp hp_usg
405 ; new_hp <- getHpRelOffset vHp
406 ; checkedAbsC (CmmAssign hpReg new_hp) -- Generates nothing when vHp==rHp
407 ; tickyAllocHeap (vHp - rHp) -- ...ditto