2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 % $Id: CgTailCall.lhs,v 1.36 2002/12/11 15:36:27 simonmar Exp $
6 %********************************************************
8 \section[CgTailCall]{Tail calls: converting @StgApps@}
10 %********************************************************
14 cgTailCall, performTailCall,
15 performReturn, performPrimReturn,
16 mkStaticAlgReturnCode, mkDynamicAlgReturnCode,
17 returnUnboxedTuple, ccallReturnUnboxedTuple,
24 #include "HsVersions.h"
27 import CgBindery ( getArgAmodes, getCAddrMode, getCAddrModeAndInfo )
30 import CgUsages ( getSpRelOffset, adjustSpAndHp )
33 import AbsCUtils ( mkAbstractCs, getAmodeRep )
35 import CLabel ( mkUpdInfoLabel, mkRtsPrimOpLabel, mkSeqInfoLabel )
37 import Id ( Id, idType, idName )
38 import DataCon ( DataCon, dataConTyCon, dataConTag, fIRST_TAG )
39 import PrimRep ( PrimRep(..) )
40 import StgSyn ( StgArg )
41 import Type ( isUnLiftedType )
43 import TyCon ( TyCon )
44 import PrimOp ( PrimOp )
45 import Util ( zipWithEqual, splitAtList )
46 import ListSetOps ( assocMaybe )
47 import PrimRep ( isFollowableRep )
49 import Panic ( panic, assertPanic )
51 import List ( partition )
53 -----------------------------------------------------------------------------
56 cgTailCall :: Id -> [StgArg] -> Code
58 -- Here's the code we generate for a tail call. (NB there may be no
59 -- arguments, in which case this boils down to just entering a variable.)
61 -- * Put args in the top locations of the stack.
62 -- * Adjust the stack ptr
63 -- * Make R1 point to the function closure if necessary.
64 -- * Perform the call.
66 -- Things to be careful about:
68 -- * Don't overwrite stack locations before you have finished with
69 -- them (remember you need the function and the as-yet-unmoved
71 -- * Preferably, generate no code to replace x by x on the stack (a
72 -- common situation in tail-recursion).
73 -- * Adjust the stack high water mark appropriately.
75 -- Treat unboxed locals exactly like literals (above) except use the addr
76 -- mode for the local instead of (CLit lit) in the assignment.
78 -- Case for unboxed returns first:
80 | isUnLiftedType (idType fun)
81 = getCAddrMode fun `thenFC` \ amode ->
82 performPrimReturn (ppr fun) amode
84 -- The general case (@fun@ is boxed):
86 = getCAddrModeAndInfo fun `thenFC` \ (fun', fun_amode, lf_info) ->
87 getArgAmodes args `thenFC` \ arg_amodes ->
88 performTailCall fun' fun_amode lf_info arg_amodes AbsCNop
91 -- -----------------------------------------------------------------------------
92 -- The guts of a tail-call
96 -> CAddrMode -- function amode
99 -> AbstractC -- Pending simultaneous assignments
100 -- *** GUARANTEED to contain only stack assignments.
103 performTailCall fun fun_amode lf_info arg_amodes pending_assts =
104 nodeMustPointToIt lf_info `thenFC` \ node_points ->
106 -- assign to node if necessary
108 | node_points = CAssign (CReg node) fun_amode
109 | otherwise = AbsCNop
112 getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
115 -- set up for a let-no-escape if necessary
116 join_sp = case fun_amode of
121 -- decide how to code the tail-call: which registers assignments to make,
122 -- what args to push on the stack, and how to make the jump
123 constructTailCall (idName fun) lf_info arg_amodes join_sp
124 node_points fun_amode sequel
125 `thenFC` \ (final_sp, arg_assts, jump_code) ->
127 let sim_assts = mkAbstractCs [node_asst,
131 is_lne = case fun_amode of { CJoinPoint _ -> True; _ -> False }
134 doFinalJump final_sp sim_assts is_lne (const jump_code)
137 -- Figure out how to do a particular tail-call.
143 -> VirtualSpOffset -- Sp at which to make the call
144 -> Bool -- node points to the fun closure?
145 -> CAddrMode -- addressing mode of the function
146 -> Sequel -- the sequel, in case we need it
148 VirtualSpOffset, -- Sp after pushing the args
149 AbstractC, -- assignments
150 Code -- code to do the jump
153 constructTailCall name lf_info arg_amodes sp node_points fun_amode sequel =
155 getEntryConvention name lf_info (map getAmodeRep arg_amodes)
156 `thenFC` \ entry_conv ->
159 EnterIt -> returnFC (sp, AbsCNop, code)
160 where code = profCtrC FSLIT("TICK_ENT_VIA_NODE") [] `thenC`
161 absC (CJump (CMacroExpr CodePtrRep ENTRY_CODE
162 [CVal (nodeRel 0) DataPtrRep]))
164 -- A function, but we have zero arguments. It is already in WHNF,
165 -- so we can just return it.
166 ReturnIt -> returnFC (sp, asst, code)
167 where -- if node doesn't already point to the closure, we have to
169 asst | node_points = AbsCNop
170 | otherwise = CAssign (CReg node) fun_amode
172 code = sequelToAmode sequel `thenFC` \ dest_amode ->
173 absC (CReturn dest_amode DirectReturn)
175 JumpToIt lbl -> returnFC (sp, AbsCNop, code)
176 where code = absC (CJump (CLbl lbl CodePtrRep))
178 -- a slow function call via the RTS apply routines
180 let (apply_fn, new_amodes) = constructSlowCall arg_amodes
182 -- if node doesn't already point to the closure,
183 -- we have to load it up.
184 node_asst | node_points = AbsCNop
185 | otherwise = CAssign (CReg node) fun_amode
188 -- Fill in all the arguments on the stack
189 mkStkAmodes sp new_amodes `thenFC`
190 \ (final_sp, stk_assts) ->
193 (final_sp + 1, -- add one, because the stg_ap functions
194 -- expect there to be a free slot on the stk
195 mkAbstractCs [node_asst, stk_assts],
196 absC (CJump apply_fn)
199 -- A direct function call (possibly with some left-over arguments)
200 DirectEntry lbl arity regs
202 -- A let-no-escape is slightly different, because we
203 -- arrange the stack arguments into pointers and non-pointers
204 -- to make the heap check easier. The tail-call sequence
205 -- is very similar to returning an unboxed tuple, so we
207 | is_let_no_escape ->
208 pushUnboxedTuple sp arg_amodes `thenFC` \ (final_sp, assts) ->
209 returnFC (final_sp, assts, absC (CJump (CLbl lbl CodePtrRep)))
212 -- A normal fast call
215 -- first chunk of args go in registers
216 (reg_arg_amodes, stk_arg_amodes) =
217 splitAtList regs arg_amodes
219 -- the rest of this function's args go straight on the stack
220 (stk_args, extra_stk_args) =
221 splitAt (arity - length regs) stk_arg_amodes
223 -- any "extra" arguments are placed in frames on the
224 -- stack after the other arguments.
225 slow_stk_args = slowArgs extra_stk_args
228 = mkAbstractCs (zipWithEqual "assign_to_reg2"
229 assign_to_reg regs reg_arg_amodes)
232 mkStkAmodes sp (stk_args ++ slow_stk_args) `thenFC`
233 \ (final_sp, stk_assts) ->
237 mkAbstractCs [reg_assts, stk_assts],
238 absC (CJump (CLbl lbl CodePtrRep))
241 where is_let_no_escape = case fun_amode of
245 -- -----------------------------------------------------------------------------
246 -- The final clean-up before we do a jump at the end of a basic block.
247 -- This code is shared by tail-calls and returns.
249 doFinalJump :: VirtualSpOffset -> AbstractC -> Bool -> (Sequel -> Code) -> Code
250 doFinalJump final_sp sim_assts is_let_no_escape jump_code =
252 -- adjust the high-water mark if necessary
253 adjustStackHW final_sp `thenC`
255 -- Do the simultaneous assignments,
256 absC (CSimultaneous sim_assts) `thenC`
258 -- push a return address if necessary (after the assignments
259 -- above, in case we clobber a live stack location)
261 -- DONT push the return address when we're about to jump to a
262 -- let-no-escape: the final tail call in the let-no-escape
264 getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
265 (if is_let_no_escape then nopC
266 else pushReturnAddress eob) `thenC`
268 -- Final adjustment of Sp/Hp
269 adjustSpAndHp final_sp `thenC`
274 -- -----------------------------------------------------------------------------
275 -- A general return (just a special case of doFinalJump, above)
277 performReturn :: AbstractC -- Simultaneous assignments to perform
278 -> (Sequel -> Code) -- The code to execute to actually do
279 -- the return, given an addressing mode
280 -- for the return address
283 performReturn sim_assts finish_code
284 = getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
285 doFinalJump args_sp sim_assts False{-not a LNE-} finish_code
287 -- -----------------------------------------------------------------------------
290 -- Just load the return value into the right register, and return.
292 performPrimReturn :: SDoc -- Just for debugging (sigh)
293 -> CAddrMode -- The thing to return
296 performPrimReturn doc amode
298 kind = getAmodeRep amode
299 ret_reg = dataReturnConvPrim kind
301 assign_possibly = case kind of
303 kind -> (CAssign (CReg ret_reg) amode)
305 performReturn assign_possibly (mkPrimReturnCode doc)
307 mkPrimReturnCode :: SDoc -- Debugging only
310 mkPrimReturnCode doc UpdateCode = pprPanic "mkPrimReturnCode: Upd" doc
311 mkPrimReturnCode doc sequel = sequelToAmode sequel `thenFC` \ dest_amode ->
312 absC (CReturn dest_amode DirectReturn)
313 -- Direct, no vectoring
315 -- -----------------------------------------------------------------------------
316 -- Algebraic constructor returns
318 -- Constructor is built on the heap; Node is set.
319 -- All that remains is
320 -- (a) to set TagReg, if necessary
321 -- (c) to do the right sort of jump.
323 mkStaticAlgReturnCode :: DataCon -- The constructor
324 -> Sequel -- where to return to
327 mkStaticAlgReturnCode con sequel
328 = -- Generate profiling code if necessary
329 (case return_convention of
330 VectoredReturn sz -> profCtrC FSLIT("TICK_VEC_RETURN") [mkIntCLit sz]
334 -- Set tag if necessary
335 -- This is done by a macro, because if we are short of registers
336 -- we don't set TagReg; instead the continuation gets the tag
337 -- by indexing off the info ptr
338 (case return_convention of
340 UnvectoredReturn no_of_constrs
342 -> absC (CMacroStmt SET_TAG [mkIntCLit zero_indexed_tag])
347 -- Generate the right jump or return
349 UpdateCode -> -- Ha! We can go direct to the update code,
350 -- (making sure to jump to the *correct* update
352 absC (CReturn (CLbl mkUpdInfoLabel CodePtrRep)
355 CaseAlts _ (Just (alts, _)) False -> -- Ho! We know the constructor so
356 -- we can go right to the alternative
358 case assocMaybe alts tag of
359 Just (alt_absC, join_lbl) ->
360 absC (CJump (CLbl join_lbl CodePtrRep))
361 Nothing -> panic "mkStaticAlgReturnCode: default"
362 -- The Nothing case should never happen;
363 -- it's the subject of a wad of special-case
364 -- code in cgReturnCon
366 other -> -- OnStack, or (CaseAlts ret_amode Nothing)
367 sequelToAmode sequel `thenFC` \ ret_amode ->
368 absC (CReturn ret_amode return_info)
373 tycon = dataConTyCon con
374 return_convention = ctrlReturnConvAlg tycon
375 zero_indexed_tag = tag - fIRST_TAG -- Adjust tag to be zero-indexed
376 -- cf AbsCUtils.mkAlgAltsCSwitch
379 case return_convention of
380 UnvectoredReturn _ -> DirectReturn
381 VectoredReturn _ -> StaticVectoredReturn zero_indexed_tag
384 -- -----------------------------------------------------------------------------
385 -- Returning an enumerated type from a PrimOp
387 -- This function is used by PrimOps that return enumerated types (i.e.
388 -- all the comparison operators).
390 mkDynamicAlgReturnCode :: TyCon -> CAddrMode -> Sequel -> Code
392 mkDynamicAlgReturnCode tycon dyn_tag sequel
393 = case ctrlReturnConvAlg tycon of
396 profCtrC FSLIT("TICK_VEC_RETURN") [mkIntCLit sz] `thenC`
397 sequelToAmode sequel `thenFC` \ ret_addr ->
398 absC (CReturn ret_addr (DynamicVectoredReturn dyn_tag))
400 UnvectoredReturn no_of_constrs ->
402 -- Set tag if necessary
403 -- This is done by a macro, because if we are short of registers
404 -- we don't set TagReg; instead the continuation gets the tag
405 -- by indexing off the info ptr
406 (if no_of_constrs > 1 then
407 absC (CMacroStmt SET_TAG [dyn_tag])
413 sequelToAmode sequel `thenFC` \ ret_addr ->
414 -- Generate the right jump or return
415 absC (CReturn ret_addr DirectReturn)
418 -- ---------------------------------------------------------------------------
419 -- Unboxed tuple returns
421 -- These are a bit like a normal tail call, except that:
423 -- - The tail-call target is an info table on the stack
425 -- - We separate stack arguments into pointers and non-pointers,
426 -- to make it easier to leave things in a sane state for a heap check.
427 -- This is OK because we can never partially-apply an unboxed tuple,
428 -- unlike a function. The same technique is used when calling
429 -- let-no-escape functions, because they also can't be partially
432 returnUnboxedTuple :: [CAddrMode] -> Code
433 returnUnboxedTuple amodes =
434 getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
436 profCtrC FSLIT("TICK_RET_UNBOXED_TUP") [mkIntCLit (length amodes)] `thenC`
438 pushUnboxedTuple args_sp amodes `thenFC` \ (final_sp, assts) ->
439 doFinalJump final_sp assts False{-not a LNE-} mkUnboxedTupleReturnCode
443 :: VirtualSpOffset -- Sp at which to start pushing
444 -> [CAddrMode] -- amodes of the components
445 -> FCode (VirtualSpOffset, -- final Sp
446 AbstractC) -- assignments (regs+stack)
448 pushUnboxedTuple sp amodes =
450 (arg_regs, _leftovers) = assignRegs [] (map getAmodeRep amodes)
452 (reg_arg_amodes, stk_arg_amodes) = splitAtList arg_regs amodes
454 -- separate the rest of the args into pointers and non-pointers
455 ( ptr_args, nptr_args ) =
456 partition (isFollowableRep . getAmodeRep) stk_arg_amodes
459 = mkAbstractCs (zipWithEqual "assign_to_reg2"
460 assign_to_reg arg_regs reg_arg_amodes)
463 -- push ptrs, then nonptrs, on the stack
464 mkStkAmodes sp ptr_args `thenFC` \ (ptr_sp, ptr_assts) ->
465 mkStkAmodes ptr_sp nptr_args `thenFC` \ (final_sp, nptr_assts) ->
468 mkAbstractCs [reg_arg_assts, ptr_assts, nptr_assts])
472 mkUnboxedTupleReturnCode :: Sequel -> Code
473 mkUnboxedTupleReturnCode sequel
475 -- can't update with an unboxed tuple!
476 UpdateCode -> panic "mkUnboxedTupleReturnCode"
478 CaseAlts _ (Just ([(_,(alt_absC,join_lbl))], _)) False ->
479 absC (CJump (CLbl join_lbl CodePtrRep))
481 other -> -- OnStack, or (CaseAlts ret_amode something)
482 sequelToAmode sequel `thenFC` \ ret_amode ->
483 absC (CReturn ret_amode DirectReturn)
485 -- -----------------------------------------------------------------------------
486 -- Returning unboxed tuples. This is mainly to support _ccall_GC_, where
487 -- we want to do things in a slightly different order to normal:
489 -- - push return address
490 -- - adjust stack pointer
491 -- - r = call(args...)
492 -- - assign regs for unboxed tuple (usually just R1 = r)
493 -- - return to continuation
495 -- The return address (i.e. stack frame) must be on the stack before
496 -- doing the call in case the call ends up in the garbage collector.
498 -- Sadly, the information about the continuation is lost after we push it
499 -- (in order to avoid pushing it again), so we end up doing a needless
500 -- indirect jump (ToDo).
502 ccallReturnUnboxedTuple :: [CAddrMode] -> Code -> Code
503 ccallReturnUnboxedTuple amodes before_jump
504 = getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
506 -- push a return address if necessary
507 pushReturnAddress eob `thenC`
508 setEndOfBlockInfo (EndOfBlockInfo args_sp (OnStack args_sp)) (
511 adjustSpAndHp args_sp `thenC`
515 returnUnboxedTuple amodes
518 -- -----------------------------------------------------------------------------
519 -- Calling an out-of-line primop
521 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
522 tailCallPrimOp op args =
523 -- we're going to perform a normal-looking tail call,
524 -- except that *all* the arguments will be in registers.
525 getArgAmodes args `thenFC` \ arg_amodes ->
526 let (arg_regs, leftovers) = assignAllRegs [] (map getAmodeRep arg_amodes)
529 = mkAbstractCs (zipWithEqual "assign_to_reg2"
530 assign_to_reg arg_regs arg_amodes)
533 absC (CJump (CLbl (mkRtsPrimOpLabel op) CodePtrRep))
536 ASSERT(null leftovers) -- no stack-resident args
538 getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
539 doFinalJump args_sp reg_arg_assts False{-not a LNE-} (const jump_to_primop)
541 -- -----------------------------------------------------------------------------
544 -- | We always push the return address just before performing a tail call
545 -- or return. The reason we leave it until then is because the stack
546 -- slot that the return address is to go into might contain something
549 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
550 -- case expression and the return address is still to be pushed.
552 -- There are cases where it doesn't look necessary to push the return
553 -- address: for example, just before doing a return to a known
554 -- continuation. However, the continuation will expect to find the
555 -- return address on the stack in case it needs to do a heap check.
557 pushReturnAddress :: EndOfBlockInfo -> Code
559 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts amode _ False)) =
560 getSpRelOffset args_sp `thenFC` \ sp_rel ->
561 absC (CAssign (CVal sp_rel RetRep) amode)
563 -- For a polymorphic case, we have two return addresses to push: the case
564 -- return, and stg_seq_frame_info which turns a possible vectored return
565 -- into a direct one.
566 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts amode _ True)) =
567 getSpRelOffset (args_sp-1) `thenFC` \ sp_rel ->
568 absC (CAssign (CVal sp_rel RetRep) amode) `thenC`
569 getSpRelOffset args_sp `thenFC` \ sp_rel ->
570 absC (CAssign (CVal sp_rel RetRep) (CLbl mkSeqInfoLabel RetRep))
571 pushReturnAddress _ = nopC
573 -- -----------------------------------------------------------------------------
576 assign_to_reg reg_id amode = CAssign (CReg reg_id) amode