2 % (c) The University of Glasgow 2006
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 % Code generation for tail calls.
9 cgTailCall, performTailCall,
10 performReturn, performPrimReturn,
11 emitKnownConReturnCode, emitAlgReturnCode,
12 returnUnboxedTuple, ccallReturnUnboxedTuple,
19 #include "HsVersions.h"
44 -----------------------------------------------------------------------------
47 cgTailCall :: Id -> [StgArg] -> Code
49 -- Here's the code we generate for a tail call. (NB there may be no
50 -- arguments, in which case this boils down to just entering a variable.)
52 -- * Put args in the top locations of the stack.
53 -- * Adjust the stack ptr
54 -- * Make R1 point to the function closure if necessary.
55 -- * Perform the call.
57 -- Things to be careful about:
59 -- * Don't overwrite stack locations before you have finished with
60 -- them (remember you need the function and the as-yet-unmoved
62 -- * Preferably, generate no code to replace x by x on the stack (a
63 -- common situation in tail-recursion).
64 -- * Adjust the stack high water mark appropriately.
66 -- Treat unboxed locals exactly like literals (above) except use the addr
67 -- mode for the local instead of (CLit lit) in the assignment.
70 = do { fun_info <- getCgIdInfo fun
72 ; if isUnLiftedType (idType fun)
73 then -- Primitive return
75 do { fun_amode <- idInfoToAmode fun_info
76 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
78 else -- Normal case, fun is boxed
79 do { arg_amodes <- getArgAmodes args
80 ; performTailCall fun_info arg_amodes noStmts }
84 -- -----------------------------------------------------------------------------
85 -- The guts of a tail-call
88 :: CgIdInfo -- The function
89 -> [(CgRep,CmmExpr)] -- Args
90 -> CmmStmts -- Pending simultaneous assignments
91 -- *** GUARANTEED to contain only stack assignments.
94 performTailCall fun_info arg_amodes pending_assts
95 | Just join_sp <- maybeLetNoEscape fun_info
96 = -- A let-no-escape is slightly different, because we
97 -- arrange the stack arguments into pointers and non-pointers
98 -- to make the heap check easier. The tail-call sequence
99 -- is very similar to returning an unboxed tuple, so we
101 do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
102 ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
103 ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
104 ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
107 = do { fun_amode <- idInfoToAmode fun_info
108 ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
109 opt_node_asst | nodeMustPointToIt lf_info = node_asst
110 | otherwise = noStmts
111 ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
112 ; this_pkg <- getThisPackage
114 ; case (getCallMethod this_pkg fun_name lf_info (length arg_amodes)) of
116 -- Node must always point to things we enter
118 { emitSimultaneously (node_asst `plusStmts` pending_assts)
119 ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
120 ; doFinalJump sp False (stmtC (CmmJump target [])) }
122 -- A function, but we have zero arguments. It is already in WHNF,
123 -- so we can just return it.
124 -- As with any return, Node must point to it.
126 { emitSimultaneously (node_asst `plusStmts` pending_assts)
127 ; doFinalJump sp False emitDirectReturnInstr }
129 -- A real constructor. Don't bother entering it,
130 -- just do the right sort of return instead.
131 -- As with any return, Node must point to it.
133 { emitSimultaneously (node_asst `plusStmts` pending_assts)
134 ; doFinalJump sp False (emitKnownConReturnCode con) }
137 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
138 ; doFinalJump sp False (jumpToLbl lbl) }
140 -- A slow function call via the RTS apply routines
141 -- Node must definitely point to the thing
143 { when (not (null arg_amodes)) $ do
144 { if (isKnownFun lf_info)
145 then tickyKnownCallTooFewArgs
146 else tickyUnknownCall
147 ; tickySlowCallPat (map fst arg_amodes)
150 ; let (apply_lbl, args, extra_args)
151 = constructSlowCall arg_amodes
153 ; directCall sp apply_lbl args extra_args
154 (node_asst `plusStmts` pending_assts)
157 -- A direct function call (possibly with some left-over arguments)
158 DirectEntry lbl arity -> do
159 { if arity == length arg_amodes
160 then tickyKnownCallExact
161 else do tickyKnownCallExtraArgs
162 tickySlowCallPat (map fst (drop arity arg_amodes))
165 -- The args beyond the arity go straight on the stack
166 (arity_args, extra_args) = splitAt arity arg_amodes
168 ; directCall sp lbl arity_args extra_args
169 (opt_node_asst `plusStmts` pending_assts)
173 fun_name = idName (cgIdInfoId fun_info)
174 lf_info = cgIdInfoLF fun_info
178 directCall sp lbl args extra_args assts = do
180 -- First chunk of args go in registers
181 (reg_arg_amodes, stk_args) = assignCallRegs args
183 -- Any "extra" arguments are placed in frames on the
184 -- stack after the other arguments.
185 slow_stk_args = slowArgs extra_args
187 reg_assts = assignToRegs reg_arg_amodes
189 (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
191 emitSimultaneously (reg_assts `plusStmts`
192 stk_assts `plusStmts`
195 doFinalJump final_sp False (jumpToLbl lbl)
197 -- -----------------------------------------------------------------------------
198 -- The final clean-up before we do a jump at the end of a basic block.
199 -- This code is shared by tail-calls and returns.
201 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
202 doFinalJump final_sp is_let_no_escape jump_code
203 = do { -- Adjust the high-water mark if necessary
204 adjustStackHW final_sp
206 -- Push a return address if necessary (after the assignments
207 -- above, in case we clobber a live stack location)
209 -- DONT push the return address when we're about to jump to a
210 -- let-no-escape: the final tail call in the let-no-escape
212 ; eob <- getEndOfBlockInfo
213 ; whenC (not is_let_no_escape) (pushReturnAddress eob)
215 -- Final adjustment of Sp/Hp
216 ; adjustSpAndHp final_sp
221 -- -----------------------------------------------------------------------------
222 -- A general return (just a special case of doFinalJump, above)
224 performReturn :: Code -- The code to execute to actually do the return
227 performReturn finish_code
228 = do { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
229 ; doFinalJump args_sp False{-not a LNE-} finish_code }
231 -- -----------------------------------------------------------------------------
233 -- Just load the return value into the right register, and return.
235 performPrimReturn :: CgRep -> CmmExpr -- The thing to return
237 performPrimReturn rep amode
238 = do { whenC (not (isVoidArg rep))
239 (stmtC (CmmAssign ret_reg amode))
240 ; performReturn emitDirectReturnInstr }
242 ret_reg = dataReturnConvPrim rep
244 -- -----------------------------------------------------------------------------
245 -- Algebraic constructor returns
247 -- Constructor is built on the heap; Node is set.
248 -- All that remains is to do the right sort of jump.
250 emitKnownConReturnCode :: DataCon -> Code
251 emitKnownConReturnCode con
252 = emitAlgReturnCode (dataConTyCon con)
253 (CmmLit (mkIntCLit (dataConTagZ con)))
254 -- emitAlgReturnCode requires zero-indexed tag
256 emitAlgReturnCode :: TyCon -> CmmExpr -> Code
257 -- emitAlgReturnCode is used both by emitKnownConReturnCode,
258 -- and by by PrimOps that return enumerated types (i.e.
259 -- all the comparison operators).
260 emitAlgReturnCode tycon tag
261 = do { case ctrlReturnConvAlg tycon of
262 VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
263 ; emitVectoredReturnInstr tag }
264 UnvectoredReturn _ -> emitDirectReturnInstr
268 -- ---------------------------------------------------------------------------
269 -- Unboxed tuple returns
271 -- These are a bit like a normal tail call, except that:
273 -- - The tail-call target is an info table on the stack
275 -- - We separate stack arguments into pointers and non-pointers,
276 -- to make it easier to leave things in a sane state for a heap check.
277 -- This is OK because we can never partially-apply an unboxed tuple,
278 -- unlike a function. The same technique is used when calling
279 -- let-no-escape functions, because they also can't be partially
282 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
283 returnUnboxedTuple amodes
284 = do { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
285 ; tickyUnboxedTupleReturn (length amodes)
286 ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
287 ; emitSimultaneously assts
288 ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }
290 pushUnboxedTuple :: VirtualSpOffset -- Sp at which to start pushing
291 -> [(CgRep, CmmExpr)] -- amodes of the components
292 -> FCode (VirtualSpOffset, -- final Sp
293 CmmStmts) -- assignments (regs+stack)
295 pushUnboxedTuple sp []
296 = return (sp, noStmts)
297 pushUnboxedTuple sp amodes
298 = do { let (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
300 -- separate the rest of the args into pointers and non-pointers
301 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
302 reg_arg_assts = assignToRegs reg_arg_amodes
304 -- push ptrs, then nonptrs, on the stack
305 ; (ptr_sp, ptr_assts) <- mkStkAmodes sp ptr_args
306 ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
308 ; returnFC (final_sp,
309 reg_arg_assts `plusStmts`
310 ptr_assts `plusStmts` nptr_assts) }
313 -- -----------------------------------------------------------------------------
314 -- Returning unboxed tuples. This is mainly to support _ccall_GC_, where
315 -- we want to do things in a slightly different order to normal:
317 -- - push return address
318 -- - adjust stack pointer
319 -- - r = call(args...)
320 -- - assign regs for unboxed tuple (usually just R1 = r)
321 -- - return to continuation
323 -- The return address (i.e. stack frame) must be on the stack before
324 -- doing the call in case the call ends up in the garbage collector.
326 -- Sadly, the information about the continuation is lost after we push it
327 -- (in order to avoid pushing it again), so we end up doing a needless
328 -- indirect jump (ToDo).
330 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
331 ccallReturnUnboxedTuple amodes before_jump
332 = do { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
334 -- Push a return address if necessary
335 ; pushReturnAddress eob
336 ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
337 (do { adjustSpAndHp args_sp
339 ; returnUnboxedTuple amodes })
342 -- -----------------------------------------------------------------------------
343 -- Calling an out-of-line primop
345 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
346 tailCallPrimOp op args
347 = do { -- We're going to perform a normal-looking tail call,
348 -- except that *all* the arguments will be in registers.
349 -- Hence the ASSERT( null leftovers )
350 arg_amodes <- getArgAmodes args
351 ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
352 jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
354 ; ASSERT(null leftovers) -- no stack-resident args
355 emitSimultaneously (assignToRegs arg_regs)
357 ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
358 ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
360 -- -----------------------------------------------------------------------------
363 -- We always push the return address just before performing a tail call
364 -- or return. The reason we leave it until then is because the stack
365 -- slot that the return address is to go into might contain something
368 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
369 -- case expression and the return address is still to be pushed.
371 -- There are cases where it doesn't look necessary to push the return
372 -- address: for example, just before doing a return to a known
373 -- continuation. However, the continuation will expect to find the
374 -- return address on the stack in case it needs to do a heap check.
376 pushReturnAddress :: EndOfBlockInfo -> Code
378 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
379 = do { sp_rel <- getSpRelOffset args_sp
380 ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
382 -- For a polymorphic case, we have two return addresses to push: the case
383 -- return, and stg_seq_frame_info which turns a possible vectored return
384 -- into a direct one.
385 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
386 = do { sp_rel <- getSpRelOffset (args_sp-1)
387 ; stmtC (CmmStore sp_rel (mkLblExpr lbl))
388 ; sp_rel <- getSpRelOffset args_sp
389 ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }
391 pushReturnAddress _ = nopC
393 -- -----------------------------------------------------------------------------
396 jumpToLbl :: CLabel -> Code
397 -- Passes no argument to the destination procedure
398 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
400 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
401 assignToRegs reg_args
402 = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
403 | (expr, reg_id) <- reg_args ]
407 %************************************************************************
409 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
411 %************************************************************************
413 This function adjusts the stack and heap pointers just before a tail
414 call or return. The stack pointer is adjusted to its final position
415 (i.e. to point to the last argument for a tail call, or the activation
416 record for a return). The heap pointer may be moved backwards, in
417 cases where we overallocated at the beginning of the basic block (see
418 CgCase.lhs for discussion).
420 These functions {\em do not} deal with high-water-mark adjustment.
421 That's done by functions which allocate stack space.
424 adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr
426 adjustSpAndHp newRealSp
427 = do { -- Adjust stack, if necessary.
428 -- NB: the conditional on the monad-carried realSp
429 -- is out of line (via codeOnly), to avoid a black hole
430 ; new_sp <- getSpRelOffset newRealSp
431 ; checkedAbsC (CmmAssign spReg new_sp) -- Will generate no code in the case
432 ; setRealSp newRealSp -- where realSp==newRealSp
434 -- Adjust heap. The virtual heap pointer may be less than the real Hp
435 -- because the latter was advanced to deal with the worst-case branch
436 -- of the code, and we may be in a better-case branch. In that case,
437 -- move the real Hp *back* and retract some ticky allocation count.
438 ; hp_usg <- getHpUsage
439 ; let rHp = realHp hp_usg
441 ; new_hp <- getHpRelOffset vHp
442 ; checkedAbsC (CmmAssign hpReg new_hp) -- Generates nothing when vHp==rHp
443 ; tickyAllocHeap (vHp - rHp) -- ...ditto