2 % (c) The University of Glasgow 2006
3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
5 % Code generation for tail calls.
9 cgTailCall, performTailCall,
10 performReturn, performPrimReturn,
11 returnUnboxedTuple, ccallReturnUnboxedTuple,
19 #include "HsVersions.h"
43 -----------------------------------------------------------------------------
46 cgTailCall :: Id -> [StgArg] -> Code
48 -- Here's the code we generate for a tail call. (NB there may be no
49 -- arguments, in which case this boils down to just entering a variable.)
51 -- * Put args in the top locations of the stack.
52 -- * Adjust the stack ptr
53 -- * Make R1 point to the function closure if necessary.
54 -- * Perform the call.
56 -- Things to be careful about:
58 -- * Don't overwrite stack locations before you have finished with
59 -- them (remember you need the function and the as-yet-unmoved
61 -- * Preferably, generate no code to replace x by x on the stack (a
62 -- common situation in tail-recursion).
63 -- * Adjust the stack high water mark appropriately.
65 -- Treat unboxed locals exactly like literals (above) except use the addr
66 -- mode for the local instead of (CLit lit) in the assignment.
69 = do { fun_info <- getCgIdInfo fun
71 ; if isUnLiftedType (idType fun)
72 then -- Primitive return
74 do { fun_amode <- idInfoToAmode fun_info
75 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
77 else -- Normal case, fun is boxed
78 do { arg_amodes <- getArgAmodes args
79 ; performTailCall fun_info arg_amodes noStmts }
83 -- -----------------------------------------------------------------------------
84 -- The guts of a tail-call
87 :: CgIdInfo -- The function
88 -> [(CgRep,CmmExpr)] -- Args
89 -> CmmStmts -- Pending simultaneous assignments
90 -- *** GUARANTEED to contain only stack assignments.
93 performTailCall fun_info arg_amodes pending_assts
94 | Just join_sp <- maybeLetNoEscape fun_info
95 = -- A let-no-escape is slightly different, because we
96 -- arrange the stack arguments into pointers and non-pointers
97 -- to make the heap check easier. The tail-call sequence
98 -- is very similar to returning an unboxed tuple, so we
100 do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
101 ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
102 ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
103 ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
106 = do { fun_amode <- idInfoToAmode fun_info
107 ; let assignSt = CmmAssign nodeReg fun_amode
108 node_asst = oneStmt assignSt
109 opt_node_asst | nodeMustPointToIt lf_info = node_asst
110 | otherwise = noStmts
111 ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
113 ; dflags <- getDynFlags
114 ; case (getCallMethod dflags fun_name fun_has_cafs lf_info (length arg_amodes)) of
116 -- Node must always point to things we enter
118 { emitSimultaneously (node_asst `plusStmts` pending_assts)
119 ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
120 enterClosure = stmtC (CmmJump target [])
121 -- If this is a scrutinee
122 -- let's check if the closure is a constructor
123 -- so we can directly jump to the alternatives switch
125 jumpInstr = getEndOfBlockInfo >>=
126 maybeSwitchOnCons enterClosure
127 ; doFinalJump sp False jumpInstr }
129 -- A function, but we have zero arguments. It is already in WHNF,
130 -- so we can just return it.
131 -- As with any return, Node must point to it.
133 { emitSimultaneously (node_asst `plusStmts` pending_assts)
134 ; doFinalJump sp False emitReturnInstr }
136 -- A real constructor. Don't bother entering it,
137 -- just do the right sort of return instead.
138 -- As with any return, Node must point to it.
140 { emitSimultaneously (node_asst `plusStmts` pending_assts)
141 ; doFinalJump sp False emitReturnInstr }
144 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
145 ; doFinalJump sp False (jumpToLbl lbl) }
147 -- A slow function call via the RTS apply routines
148 -- Node must definitely point to the thing
150 { when (not (null arg_amodes)) $ do
151 { if (isKnownFun lf_info)
152 then tickyKnownCallTooFewArgs
153 else tickyUnknownCall
154 ; tickySlowCallPat (map fst arg_amodes)
157 ; let (apply_lbl, args, extra_args)
158 = constructSlowCall arg_amodes
160 ; directCall sp apply_lbl args extra_args
161 (node_asst `plusStmts` pending_assts)
165 -- A direct function call (possibly with some left-over arguments)
166 DirectEntry lbl arity -> do
167 { if arity == length arg_amodes
168 then tickyKnownCallExact
169 else do tickyKnownCallExtraArgs
170 tickySlowCallPat (map fst (drop arity arg_amodes))
173 -- The args beyond the arity go straight on the stack
174 (arity_args, extra_args) = splitAt arity arg_amodes
176 ; directCall sp lbl arity_args extra_args
177 (opt_node_asst `plusStmts` pending_assts)
181 fun_id = cgIdInfoId fun_info
182 fun_name = idName fun_id
183 lf_info = cgIdInfoLF fun_info
184 fun_has_cafs = idCafInfo fun_id
185 untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg))
186 -- Test if closure is a constructor
187 maybeSwitchOnCons enterClosure eob
188 | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob,
189 not opt_SccProfilingOn
190 -- we can't shortcut when profiling is on, because we have
191 -- to enter a closure to mark it as "used" for LDV profiling
192 = do { is_constr <- newLabelC
193 -- Is the pointer tagged?
194 -- Yes, jump to switch statement
195 ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg))
197 -- No, enter the closure.
200 ; stmtC (CmmJump (entryCode $ CmmLit (CmmLabel lbl)) [])
203 -- This is a scrutinee for a case expression
204 -- so let's see if we can directly inspect the closure
205 | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob
206 = do { no_cons <- newLabelC
207 -- Both the NCG and gcc optimize away the temp
208 ; z <- newTemp wordRep
209 ; stmtC (CmmAssign z tag_expr)
211 -- Is the closure a cons?
212 ; stmtC (CmmCondBranch (cond1 tag) no_cons)
213 ; stmtC (CmmCondBranch (cond2 tag) no_cons)
214 -- Yes, jump to switch statement
215 ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) [])
217 -- No, enter the closure.
221 -- No case expression involved, enter the closure.
223 = do { stmtC untag_node
227 --cond1 tag = cmmULtWord tag lowCons
228 -- More efficient than the above?
230 tag_expr = cmmGetClosureType (CmmReg nodeReg)
231 cond1 tag = cmmEqWord tag (CmmLit (mkIntCLit 0))
232 cond2 tag = cmmUGtWord tag highCons
233 lowCons = CmmLit (mkIntCLit 1)
235 highCons = CmmLit (mkIntCLit 8)
236 -- CONSTR_NOCAF_STATIC (from ClosureType.h)
239 directCall :: VirtualSpOffset -> CLabel -> [(CgRep, CmmExpr)]
240 -> [(CgRep, CmmExpr)] -> CmmStmts
242 directCall sp lbl args extra_args assts = do
244 -- First chunk of args go in registers
245 (reg_arg_amodes, stk_args) = assignCallRegs args
247 -- Any "extra" arguments are placed in frames on the
248 -- stack after the other arguments.
249 slow_stk_args = slowArgs extra_args
251 reg_assts = assignToRegs reg_arg_amodes
253 (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
255 emitSimultaneously (reg_assts `plusStmts`
256 stk_assts `plusStmts`
259 doFinalJump final_sp False (jumpToLbl lbl)
261 -- -----------------------------------------------------------------------------
262 -- The final clean-up before we do a jump at the end of a basic block.
263 -- This code is shared by tail-calls and returns.
265 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
266 doFinalJump final_sp is_let_no_escape jump_code
267 = do { -- Adjust the high-water mark if necessary
268 adjustStackHW final_sp
270 -- Push a return address if necessary (after the assignments
271 -- above, in case we clobber a live stack location)
273 -- DONT push the return address when we're about to jump to a
274 -- let-no-escape: the final tail call in the let-no-escape
276 ; eob <- getEndOfBlockInfo
277 ; whenC (not is_let_no_escape) (pushReturnAddress eob)
279 -- Final adjustment of Sp/Hp
280 ; adjustSpAndHp final_sp
285 -- ----------------------------------------------------------------------------
286 -- A general return (just a special case of doFinalJump, above)
288 performReturn :: Code -- The code to execute to actually do the return
291 performReturn finish_code
292 = do { EndOfBlockInfo args_sp _sequel <- getEndOfBlockInfo
293 ; doFinalJump args_sp False{-not a LNE-} finish_code }
295 -- ----------------------------------------------------------------------------
297 -- Just load the return value into the right register, and return.
299 performPrimReturn :: CgRep -> CmmExpr -- The thing to return
301 performPrimReturn rep amode
302 = do { whenC (not (isVoidArg rep))
303 (stmtC (CmmAssign ret_reg amode))
304 ; performReturn emitReturnInstr }
306 ret_reg = dataReturnConvPrim rep
308 -- ---------------------------------------------------------------------------
309 -- Unboxed tuple returns
311 -- These are a bit like a normal tail call, except that:
313 -- - The tail-call target is an info table on the stack
315 -- - We separate stack arguments into pointers and non-pointers,
316 -- to make it easier to leave things in a sane state for a heap check.
317 -- This is OK because we can never partially-apply an unboxed tuple,
318 -- unlike a function. The same technique is used when calling
319 -- let-no-escape functions, because they also can't be partially
322 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
323 returnUnboxedTuple amodes
324 = do { (EndOfBlockInfo args_sp _sequel) <- getEndOfBlockInfo
325 ; tickyUnboxedTupleReturn (length amodes)
326 ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
327 ; emitSimultaneously assts
328 ; doFinalJump final_sp False{-not a LNE-} emitReturnInstr }
330 pushUnboxedTuple :: VirtualSpOffset -- Sp at which to start pushing
331 -> [(CgRep, CmmExpr)] -- amodes of the components
332 -> FCode (VirtualSpOffset, -- final Sp
333 CmmStmts) -- assignments (regs+stack)
335 pushUnboxedTuple sp []
336 = return (sp, noStmts)
337 pushUnboxedTuple sp amodes
338 = do { let (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
340 -- separate the rest of the args into pointers and non-pointers
341 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
342 reg_arg_assts = assignToRegs reg_arg_amodes
344 -- push ptrs, then nonptrs, on the stack
345 ; (ptr_sp, ptr_assts) <- mkStkAmodes sp ptr_args
346 ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
348 ; returnFC (final_sp,
349 reg_arg_assts `plusStmts`
350 ptr_assts `plusStmts` nptr_assts) }
353 -- -----------------------------------------------------------------------------
354 -- Returning unboxed tuples. This is mainly to support _ccall_GC_, where
355 -- we want to do things in a slightly different order to normal:
357 -- - push return address
358 -- - adjust stack pointer
359 -- - r = call(args...)
360 -- - assign regs for unboxed tuple (usually just R1 = r)
361 -- - return to continuation
363 -- The return address (i.e. stack frame) must be on the stack before
364 -- doing the call in case the call ends up in the garbage collector.
366 -- Sadly, the information about the continuation is lost after we push it
367 -- (in order to avoid pushing it again), so we end up doing a needless
368 -- indirect jump (ToDo).
370 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
371 ccallReturnUnboxedTuple amodes before_jump
372 = do { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
374 -- Push a return address if necessary
375 ; pushReturnAddress eob
376 ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
377 (do { adjustSpAndHp args_sp
379 ; returnUnboxedTuple amodes })
382 -- -----------------------------------------------------------------------------
383 -- Calling an out-of-line primop
385 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
387 = tailCallPrim (mkRtsPrimOpLabel op)
389 tailCallPrimCall :: PrimCall -> [StgArg] -> Code
390 tailCallPrimCall primcall
391 = tailCallPrim (mkPrimCallLabel primcall)
393 tailCallPrim :: CLabel -> [StgArg] -> Code
394 tailCallPrim lbl args
395 = do { -- We're going to perform a normal-looking tail call,
396 -- except that *all* the arguments will be in registers.
397 -- Hence the ASSERT( null leftovers )
398 arg_amodes <- getArgAmodes args
399 ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
400 jump_to_primop = jumpToLbl lbl
402 ; ASSERT(null leftovers) -- no stack-resident args
403 emitSimultaneously (assignToRegs arg_regs)
405 ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
406 ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
408 -- -----------------------------------------------------------------------------
411 -- We always push the return address just before performing a tail call
412 -- or return. The reason we leave it until then is because the stack
413 -- slot that the return address is to go into might contain something
416 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
417 -- case expression and the return address is still to be pushed.
419 -- There are cases where it doesn't look necessary to push the return
420 -- address: for example, just before doing a return to a known
421 -- continuation. However, the continuation will expect to find the
422 -- return address on the stack in case it needs to do a heap check.
424 pushReturnAddress :: EndOfBlockInfo -> Code
426 pushReturnAddress (EndOfBlockInfo args_sp (CaseAlts lbl _ _))
427 = do { sp_rel <- getSpRelOffset args_sp
428 ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
430 pushReturnAddress _ = nopC
432 -- -----------------------------------------------------------------------------
435 jumpToLbl :: CLabel -> Code
436 -- Passes no argument to the destination procedure
437 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
439 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
440 assignToRegs reg_args
441 = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
442 | (expr, reg_id) <- reg_args ]
446 %************************************************************************
448 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
450 %************************************************************************
452 This function adjusts the stack and heap pointers just before a tail
453 call or return. The stack pointer is adjusted to its final position
454 (i.e. to point to the last argument for a tail call, or the activation
455 record for a return). The heap pointer may be moved backwards, in
456 cases where we overallocated at the beginning of the basic block (see
457 CgCase.lhs for discussion).
459 These functions {\em do not} deal with high-water-mark adjustment.
460 That's done by functions which allocate stack space.
463 adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr
465 adjustSpAndHp newRealSp
466 = do { -- Adjust stack, if necessary.
467 -- NB: the conditional on the monad-carried realSp
468 -- is out of line (via codeOnly), to avoid a black hole
469 ; new_sp <- getSpRelOffset newRealSp
470 ; checkedAbsC (CmmAssign spReg new_sp) -- Will generate no code in the case
471 ; setRealSp newRealSp -- where realSp==newRealSp
473 -- Adjust heap. The virtual heap pointer may be less than the real Hp
474 -- because the latter was advanced to deal with the worst-case branch
475 -- of the code, and we may be in a better-case branch. In that case,
476 -- move the real Hp *back* and retract some ticky allocation count.
477 ; hp_usg <- getHpUsage
478 ; let rHp = realHp hp_usg
480 ; new_hp <- getHpRelOffset vHp
481 ; checkedAbsC (CmmAssign hpReg new_hp) -- Generates nothing when vHp==rHp
482 ; tickyAllocHeap (vHp - rHp) -- ...ditto