2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4 % $Id: CgTailCall.lhs,v 1.24 2000/03/23 17:45:19 simonpj Exp $
6 %********************************************************
8 \section[CgTailCall]{Tail calls: converting @StgApps@}
10 %********************************************************
15 performReturn, performPrimReturn,
16 mkStaticAlgReturnCode, mkDynamicAlgReturnCode,
17 mkUnboxedTupleReturnCode, returnUnboxedTuple,
27 #include "HsVersions.h"
31 import PprAbsC ( pprAmode )
33 import AbsCUtils ( mkAbstractCs, mkAbsCStmts, getAmodeRep )
34 import CgBindery ( getArgAmodes, getCAddrMode, getCAddrModeAndInfo )
35 import CgRetConv ( dataReturnConvPrim,
36 ctrlReturnConvAlg, CtrlReturnConvention(..),
37 assignAllRegs, assignRegs
39 import CgStackery ( mkTaggedStkAmodes, adjustStackHW )
40 import CgUsages ( getSpRelOffset, adjustSpAndHp )
41 import CgUpdate ( pushSeqFrame )
42 import CLabel ( mkUpdInfoLabel, mkRtsPrimOpLabel,
43 mkBlackHoleInfoTableLabel )
44 import ClosureInfo ( nodeMustPointToIt,
45 getEntryConvention, EntryConvention(..),
48 import CmdLineOpts ( opt_DoSemiTagging )
49 import Id ( Id, idType, idName )
50 import DataCon ( DataCon, dataConTyCon, dataConTag, fIRST_TAG )
51 import Literal ( mkMachInt )
52 import Maybes ( assocMaybe, maybeToBool )
53 import PrimRep ( PrimRep(..) )
54 import StgSyn ( StgArg, GenStgArg(..) )
55 import Type ( isUnLiftedType )
56 import TyCon ( TyCon )
57 import PrimOp ( PrimOp )
58 import Util ( zipWithEqual )
59 import Unique ( mkPseudoUnique1 )
61 import Panic ( panic, assertPanic )
64 %************************************************************************
66 \subsection[tailcall-doc]{Documentation}
68 %************************************************************************
71 cgTailCall :: Id -> [StgArg] -> Code
74 Here's the code we generate for a tail call. (NB there may be no
75 arguments, in which case this boils down to just entering a variable.)
78 \item Adjust the stack ptr to \tr{tailSp + #args}.
79 \item Put args in the top locations of the resulting stack.
80 \item Make Node point to the function closure.
81 \item Enter the function closure.
84 Things to be careful about:
86 \item Don't overwrite stack locations before you have finished with
87 them (remember you need the function and the as-yet-unmoved
89 \item Preferably, generate no code to replace x by x on the stack (a
90 common situation in tail-recursion).
91 \item Adjust the stack high water mark appropriately.
94 Treat unboxed locals exactly like literals (above) except use the addr
95 mode for the local instead of (CLit lit) in the assignment.
97 Case for unboxed @Ids@ first:
100 | isUnLiftedType (idType fun)
101 = getCAddrMode fun `thenFC` \ amode ->
102 performPrimReturn (ppr fun) amode
105 The general case (@fun@ is boxed):
107 cgTailCall fun args = performTailCall fun args
110 %************************************************************************
112 \subsection[return-and-tail-call]{Return and tail call}
114 %************************************************************************
117 performPrimReturn :: SDoc -- Just for debugging (sigh)
118 -> CAddrMode -- The thing to return
121 performPrimReturn doc amode
123 kind = getAmodeRep amode
124 ret_reg = WARN( case kind of { PtrRep -> True; other -> False }, text "primRet" <+> doc <+> pprAmode amode )
125 dataReturnConvPrim kind
127 assign_possibly = case kind of
129 kind -> (CAssign (CReg ret_reg) amode)
131 performReturn assign_possibly (mkPrimReturnCode doc)
133 mkPrimReturnCode :: SDoc -- Debugging only
136 mkPrimReturnCode doc UpdateCode = pprPanic "mkPrimReturnCode: Upd" doc
137 mkPrimReturnCode doc sequel = sequelToAmode sequel `thenFC` \ dest_amode ->
138 absC (CReturn dest_amode DirectReturn)
139 -- Direct, no vectoring
141 -- Constructor is built on the heap; Node is set.
142 -- All that remains is
143 -- (a) to set TagReg, if necessary
144 -- (c) to do the right sort of jump.
146 mkStaticAlgReturnCode :: DataCon -- The constructor
147 -> Sequel -- where to return to
150 mkStaticAlgReturnCode con sequel
151 = -- Generate profiling code if necessary
152 (case return_convention of
153 VectoredReturn sz -> profCtrC SLIT("TICK_VEC_RETURN") [mkIntCLit sz]
157 -- Set tag if necessary
158 -- This is done by a macro, because if we are short of registers
159 -- we don't set TagReg; instead the continuation gets the tag
160 -- by indexing off the info ptr
161 (case return_convention of
163 UnvectoredReturn no_of_constrs
165 -> absC (CMacroStmt SET_TAG [mkIntCLit zero_indexed_tag])
170 -- Generate the right jump or return
172 UpdateCode -> -- Ha! We can go direct to the update code,
173 -- (making sure to jump to the *correct* update
175 absC (CReturn (CLbl mkUpdInfoLabel CodePtrRep)
178 CaseAlts _ (Just (alts, _)) -> -- Ho! We know the constructor so
179 -- we can go right to the alternative
181 case assocMaybe alts tag of
182 Just (alt_absC, join_lbl) ->
183 absC (CJump (CLbl join_lbl CodePtrRep))
184 Nothing -> panic "mkStaticAlgReturnCode: default"
185 -- The Nothing case should never happen;
186 -- it's the subject of a wad of special-case
187 -- code in cgReturnCon
189 -- can't be a SeqFrame, because we're returning a constructor
191 other -> -- OnStack, or (CaseAlts ret_amode Nothing)
192 sequelToAmode sequel `thenFC` \ ret_amode ->
193 absC (CReturn ret_amode return_info)
198 tycon = dataConTyCon con
199 return_convention = ctrlReturnConvAlg tycon
200 zero_indexed_tag = tag - fIRST_TAG -- Adjust tag to be zero-indexed
201 -- cf AbsCUtils.mkAlgAltsCSwitch
204 case return_convention of
205 UnvectoredReturn _ -> DirectReturn
206 VectoredReturn _ -> StaticVectoredReturn zero_indexed_tag
208 mkUnboxedTupleReturnCode :: Sequel -> Code
209 mkUnboxedTupleReturnCode sequel
211 -- can't update with an unboxed tuple!
212 UpdateCode -> panic "mkUnboxedTupleReturnCode"
214 CaseAlts _ (Just ([(_,(alt_absC,join_lbl))], _)) ->
215 absC (CJump (CLbl join_lbl CodePtrRep))
217 -- can't be a SeqFrame
219 other -> -- OnStack, or (CaseAlts ret_amode something)
220 sequelToAmode sequel `thenFC` \ ret_amode ->
221 absC (CReturn ret_amode DirectReturn)
223 -- This function is used by PrimOps that return enumerated types (i.e.
224 -- all the comparison operators).
226 mkDynamicAlgReturnCode :: TyCon -> CAddrMode -> Sequel -> Code
228 mkDynamicAlgReturnCode tycon dyn_tag sequel
229 = case ctrlReturnConvAlg tycon of
232 profCtrC SLIT("TICK_VEC_RETURN") [mkIntCLit sz] `thenC`
233 sequelToAmode sequel `thenFC` \ ret_addr ->
234 absC (CReturn ret_addr (DynamicVectoredReturn dyn_tag))
236 UnvectoredReturn no_of_constrs ->
238 -- Set tag if necessary
239 -- This is done by a macro, because if we are short of registers
240 -- we don't set TagReg; instead the continuation gets the tag
241 -- by indexing off the info ptr
242 (if no_of_constrs > 1 then
243 absC (CMacroStmt SET_TAG [dyn_tag])
249 sequelToAmode sequel `thenFC` \ ret_addr ->
250 -- Generate the right jump or return
251 absC (CReturn ret_addr DirectReturn)
255 performReturn :: AbstractC -- Simultaneous assignments to perform
256 -> (Sequel -> Code) -- The code to execute to actually do
257 -- the return, given an addressing mode
258 -- for the return address
261 -- this is just a special case of doTailCall, later.
262 performReturn sim_assts finish_code
263 = getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
265 -- Do the simultaneous assignments,
266 doSimAssts sim_assts `thenC`
268 -- push a return address if necessary
269 -- (after the assignments above, in case we clobber a live
271 pushReturnAddress eob `thenC`
274 adjustSpAndHp args_sp `thenC`
277 finish_code sequel -- "sequel" is `robust' in that it doesn't
278 -- depend on stk-ptr values
281 Returning unboxed tuples. This is mainly to support _ccall_GC_, where
282 we want to do things in a slightly different order to normal:
284 - push return address
285 - adjust stack pointer
287 - assign regs for unboxed tuple (usually just R1 = r)
288 - return to continuation
290 The return address (i.e. stack frame) must be on the stack before
291 doing the call in case the call ends up in the garbage collector.
293 Sadly, the information about the continuation is lost after we push it
294 (in order to avoid pushing it again), so we end up doing a needless
295 indirect jump (ToDo).
298 returnUnboxedTuple :: [CAddrMode] -> Code -> Code
299 returnUnboxedTuple amodes before_jump
300 = getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
302 -- push a return address if necessary
303 pushReturnAddress eob `thenC`
304 setEndOfBlockInfo (EndOfBlockInfo args_sp (OnStack args_sp)) (
307 adjustSpAndHp args_sp `thenC`
311 let (ret_regs, leftovers) = assignRegs [] (map getAmodeRep amodes)
314 profCtrC SLIT("TICK_RET_UNBOXED_TUP") [mkIntCLit (length amodes)] `thenC`
316 doTailCall amodes ret_regs
317 mkUnboxedTupleReturnCode
318 (length leftovers) {- fast args arity -}
319 AbsCNop {-no pending assigments-}
320 Nothing {-not a let-no-escape-}
321 False {-node doesn't point-}
326 performTailCall :: Id -- Function
330 performTailCall fun args
331 = -- Get all the info we have about the function and args and go on to
333 getCAddrModeAndInfo fun `thenFC` \ (fun_amode, lf_info) ->
334 getArgAmodes args `thenFC` \ arg_amodes ->
337 fun fun_amode lf_info arg_amodes
338 AbsCNop {- No pending assignments -}
341 -- generating code for a tail call to a function (or closure)
343 tailCallFun :: Id -> CAddrMode -- Function and its amode
344 -> LambdaFormInfo -- Info about the function
345 -> [CAddrMode] -- Arguments
347 -> AbstractC -- Pending simultaneous assignments
348 -- *** GUARANTEED to contain only stack
351 -- In ptic, we don't need to look in
352 -- here to discover all live regs
356 tailCallFun fun fun_amode lf_info arg_amodes pending_assts
357 = nodeMustPointToIt lf_info `thenFC` \ node_points ->
358 getEntryConvention (idName fun) lf_info
359 (map getAmodeRep arg_amodes) `thenFC` \ entry_conv ->
362 = if node_points then
363 CAssign (CReg node) fun_amode
367 (arg_regs, finish_code, arity)
371 profCtrC SLIT("TICK_ENT_VIA_NODE") [] `thenC`
372 absC (CJump (CMacroExpr CodePtrRep ENTRY_CODE
373 [CVal (nodeRel 0) DataPtrRep]))
375 StdEntry lbl -> ([], absC (CJump (CLbl lbl CodePtrRep)), 0)
376 DirectEntry lbl arity regs ->
377 (regs, absC (CJump (CLbl lbl CodePtrRep)),
380 -- set up for a let-no-escape if necessary
381 join_sp = case fun_amode of
382 CJoinPoint sp -> Just sp
385 doTailCall arg_amodes arg_regs (const finish_code) arity
386 (mkAbstractCs [node_asst,pending_assts]) join_sp node_points
389 -- this generic tail call code is used for both function calls and returns.
392 :: [CAddrMode] -- args to pass to function
393 -> [MagicId] -- registers to use
394 -> (Sequel->Code) -- code to perform jump
395 -> Int -- number of "fast" stack arguments
396 -> AbstractC -- pending assignments
397 -> Maybe VirtualSpOffset -- sp offset to trim stack to:
398 -- USED iff destination is a let-no-escape
399 -> Bool -- node points to the closure to enter
402 doTailCall arg_amodes arg_regs finish_code arity pending_assts
403 maybe_join_sp node_points
404 = getEndOfBlockInfo `thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
407 no_of_args = length arg_amodes
409 (reg_arg_amodes, stk_arg_amodes) = splitAt (length arg_regs) arg_amodes
410 -- We get some stk_arg_amodes if (a) no regs, or
411 -- (b) args beyond arity
414 = mkAbstractCs (zipWithEqual "assign_to_reg2"
415 assign_to_reg arg_regs reg_arg_amodes)
417 assign_to_reg reg_id amode = CAssign (CReg reg_id) amode
419 join_sp = case maybe_join_sp of
420 Just sp -> ASSERT(not (args_sp > sp)) sp
421 -- If ASSERTion fails: Oops: the join point has *lower*
422 -- stack ptrs than the continuation Note that we take
423 -- the Sp point without the return address here. The
424 -- return address is put on by the let-no-escapey thing
428 (fast_stk_amodes, tagged_stk_amodes) =
429 splitAt arity stk_arg_amodes
431 -- eager blackholing, at the end of the basic block.
432 node_save = CTemp (mkPseudoUnique1 2) DataPtrRep
433 (r1_tmp_asst, bh_asst)
436 -- no: UpdateCode doesn't tell us that we're in a thunk's entry code.
437 -- we might be in a case continuation later down the line. Also,
438 -- we might have pushed a return address on the stack, if we're in
439 -- a case scrut, and still be in the thunk's entry code.
441 (CAssign node_save nodeReg,
442 CAssign (CVal (CIndex node_save (mkIntCLit 0) PtrRep)
444 (CLbl mkBlackHoleInfoTableLabel DataPtrRep))
446 _ -> (AbsCNop, AbsCNop)
448 -- We can omit tags on the arguments passed to the fast entry point,
449 -- but we have to be careful to fill in the tags on any *extra*
450 -- arguments we're about to push on the stack.
452 mkTaggedStkAmodes join_sp tagged_stk_amodes `thenFC`
453 \ (fast_sp, tagged_arg_assts, tag_assts) ->
455 mkTaggedStkAmodes fast_sp fast_stk_amodes `thenFC`
456 \ (final_sp, fast_arg_assts, _) ->
458 -- adjust the high-water mark if necessary
459 adjustStackHW final_sp `thenC`
461 -- The stack space for the pushed return addess,
462 -- with any args pushed on top, is recorded in final_sp.
464 -- Do the simultaneous assignments,
465 doSimAssts (mkAbstractCs [r1_tmp_asst,
473 -- push a return address if necessary
474 -- (after the assignments above, in case we clobber a live
477 -- DONT push the return address when we're about
478 -- to jump to a let-no-escape: the final tail call
479 -- in the let-no-escape will do this.
480 (if (maybeToBool maybe_join_sp)
482 else pushReturnAddress eob) `thenC`
484 -- Final adjustment of Sp/Hp
485 adjustSpAndHp final_sp `thenC`
487 -- Now decide about semi-tagging
489 semi_tagging_on = opt_DoSemiTagging
491 case (semi_tagging_on, arg_amodes, node_points, sequel) of
494 -- *************** The semi-tagging case ***************
496 {- XXX leave this out for now.
497 ( True, [], True, CaseAlts _ (Just (st_alts, maybe_deflt_join_details))) ->
499 -- Whoppee! Semi-tagging rules OK!
500 -- (a) semi-tagging is switched on
501 -- (b) there are no arguments,
502 -- (c) Node points to the closure
503 -- (d) we have a case-alternative sequel with
504 -- some visible alternatives
506 -- Why is test (c) necessary?
507 -- Usually Node will point to it at this point, because we're
508 -- scrutinsing something which is either a thunk or a
510 -- But not always! The example I came across is when we have
511 -- a top-level Double:
513 -- ... (case lit.3 of ...) ...
514 -- Here, lit.3 is built as a re-entrant thing, which you must enter.
515 -- (OK, the simplifier should have eliminated this, but it's
516 -- easy to deal with the case anyway.)
518 join_details_to_code (load_regs_and_profiling_code, join_lbl)
519 = load_regs_and_profiling_code `mkAbsCStmts`
520 CJump (CLbl join_lbl CodePtrRep)
522 semi_tagged_alts = [ (mkMachInt (fromInt (tag - fIRST_TAG)),
523 join_details_to_code join_details)
524 | (tag, join_details) <- st_alts
528 -- Enter Node (we know infoptr will have the info ptr in it)!
530 CCallProfCtrMacro SLIT("RET_SEMI_FAILED")
531 [CMacroExpr IntRep INFO_TAG [CReg infoptr]],
532 CJump (CMacroExpr CodePtrRep ENTRY_CODE [CReg infoptr]) ]
536 CAssign (CReg infoptr)
537 (CVal (NodeRel zeroOff) DataPtrRep),
539 case maybe_deflt_join_details of
541 CSwitch (CMacroExpr IntRep INFO_TAG [CReg infoptr])
545 CSwitch (CMacroExpr IntRep EVAL_TAG [CReg infoptr])
546 [(mkMachInt 0, enter_jump)]
548 (CMacroExpr IntRep INFO_TAG [CReg infoptr])
550 (join_details_to_code details))
555 -- *************** The non-semi-tagging case ***************
557 other -> finish_code sequel
560 %************************************************************************
562 \subsection[tailCallPrimOp]{@tailCallPrimOp@}
564 %************************************************************************
567 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
568 tailCallPrimOp op args =
569 -- we're going to perform a normal-looking tail call,
570 -- except that *all* the arguments will be in registers.
571 getArgAmodes args `thenFC` \ arg_amodes ->
572 let (arg_regs, leftovers) = assignAllRegs [] (map getAmodeRep arg_amodes)
574 ASSERT(null leftovers) -- no stack-resident args
575 doTailCall arg_amodes arg_regs
576 (const (absC (CJump (CLbl (mkRtsPrimOpLabel op) CodePtrRep))))
577 0 {- arity shouldn't matter, all args in regs -}
578 AbsCNop {- no pending assignments -}
579 Nothing {- not a let-no-escape -}
580 False {- node doesn't point -}
583 %************************************************************************
585 \subsection[doSimAssts]{@doSimAssts@}
587 %************************************************************************
589 @doSimAssts@ happens at the end of every block of code.
590 They are separate because we sometimes do some jiggery-pokery in between.
593 doSimAssts :: AbstractC -> Code
596 = absC (CSimultaneous sim_assts)
599 %************************************************************************
601 \subsection[retAddr]{@Return Addresses@}
603 %************************************************************************
605 We always push the return address just before performing a tail call
606 or return. The reason we leave it until then is because the stack
607 slot that the return address is to go into might contain something
610 If the end of block info is CaseAlts, then we're in the scrutinee of a
611 case expression and the return address is still to be pushed.
613 There are cases where it doesn't look necessary to push the return
614 address: for example, just before doing a return to a known
615 continuation. However, the continuation will expect to find the
616 return address on the stack in case it needs to do a heap check.
619 pushReturnAddress :: EndOfBlockInfo -> Code
620 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts amode _)) =
621 getSpRelOffset args_sp `thenFC` \ sp_rel ->
622 absC (CAssign (CVal sp_rel RetRep) amode)
623 pushReturnAddress (EndOfBlockInfo args_sp sequel@(SeqFrame amode _)) =
624 pushSeqFrame args_sp `thenFC` \ ret_sp ->
625 getSpRelOffset ret_sp `thenFC` \ sp_rel ->
626 absC (CAssign (CVal sp_rel RetRep) amode)
627 pushReturnAddress _ = nopC