compiler/codeGen/CgTailCall.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5 % Code generation for tail calls.
   6
   7 \begin{code}
   8 module CgTailCall (
   9         cgTailCall, performTailCall,
  10         performReturn, performPrimReturn,
  11         returnUnboxedTuple, ccallReturnUnboxedTuple,
  12         pushUnboxedTuple,
  13         tailCallPrimOp,
  14
  15         pushReturnAddress
  16     ) where
  17
  18 #include "HsVersions.h"
  19
  20 import CgMonad
  21 import CgBindery
  22 import CgInfoTbls
  23 import CgCallConv
  24 import CgStackery
  25 import CgHeapery
  26 import CgUtils
  27 import CgTicky
  28 import ClosureInfo
  29 import SMRep
  30 import Cmm
  31 import CmmUtils
  32 import CLabel
  33 import Type
  34 import Id
  35 import StgSyn
  36 import PrimOp
  37 import Outputable
  38
  39 import Control.Monad
  40
  41 -----------------------------------------------------------------------------
  42 -- Tail Calls
  43
  44 cgTailCall :: Id -> [StgArg] -> Code
  45
  46 -- Here's the code we generate for a tail call.  (NB there may be no
  47 -- arguments, in which case this boils down to just entering a variable.)
  48 --
  49 --    * Put args in the top locations of the stack.
  50 --    * Adjust the stack ptr
  51 --    * Make R1 point to the function closure if necessary.
  52 --    * Perform the call.
  53 --
  54 -- Things to be careful about:
  55 --
  56 --    * Don't overwrite stack locations before you have finished with
  57 --      them (remember you need the function and the as-yet-unmoved
  58 --      arguments).
  59 --    * Preferably, generate no code to replace x by x on the stack (a
  60 --      common situation in tail-recursion).
  61 --    * Adjust the stack high water mark appropriately.
  62 --
  63 -- Treat unboxed locals exactly like literals (above) except use the addr
  64 -- mode for the local instead of (CLit lit) in the assignment.
  65
  66 cgTailCall fun args
  67   = do  { fun_info <- getCgIdInfo fun
  68
  69         ; if isUnLiftedType (idType fun)
  70           then  -- Primitive return
  71                 ASSERT( null args )
  72             do  { fun_amode <- idInfoToAmode fun_info
  73                 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
  74
  75           else -- Normal case, fun is boxed
  76             do  { arg_amodes <- getArgAmodes args
  77                 ; performTailCall fun_info arg_amodes noStmts }
  78         }
  79
  80
  81 -- -----------------------------------------------------------------------------
  82 -- The guts of a tail-call
  83
  84 performTailCall
  85         :: CgIdInfo             -- The function
  86         -> [(CgRep,CmmExpr)]    -- Args
  87         -> CmmStmts             -- Pending simultaneous assignments
  88                                 --  *** GUARANTEED to contain only stack assignments.
  89         -> Code
  90
  91 performTailCall fun_info arg_amodes pending_assts
  92   | Just join_sp <- maybeLetNoEscape fun_info
  93   =        -- A let-no-escape is slightly different, because we
  94            -- arrange the stack arguments into pointers and non-pointers
  95            -- to make the heap check easier.  The tail-call sequence
  96            -- is very similar to returning an unboxed tuple, so we
  97            -- share some code.
  98      do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
  99         ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
 100         ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
 101         ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
 102
 103   | otherwise
 104   = do  { fun_amode <- idInfoToAmode fun_info
 105         ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
 106               opt_node_asst | nodeMustPointToIt lf_info = node_asst
 107                             | otherwise                 = noStmts
 108         ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
 109         ; this_pkg <- getThisPackage
 110
 111         ; case (getCallMethod this_pkg fun_name lf_info (length arg_amodes)) of
 112
 113             -- Node must always point to things we enter
 114             EnterIt -> do
 115                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 116                 ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
 117                 ; doFinalJump sp False (stmtC (CmmJump target [])) }
 118
 119             -- A function, but we have zero arguments.  It is already in WHNF,
 120             -- so we can just return it.
 121             -- As with any return, Node must point to it.
 122             ReturnIt -> do
 123                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 124                 ; doFinalJump sp False emitReturnInstr }
 125
 126             -- A real constructor.  Don't bother entering it,
 127             -- just do the right sort of return instead.
 128             -- As with any return, Node must point to it.
 129             ReturnCon con -> do
 130                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 131                 ; doFinalJump sp False emitReturnInstr }
 132
 133             JumpToIt lbl -> do
 134                 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
 135                 ; doFinalJump sp False (jumpToLbl lbl) }
 136
 137             -- A slow function call via the RTS apply routines
 138             -- Node must definitely point to the thing
 139             SlowCall -> do
 140                 {  when (not (null arg_amodes)) $ do
 141                    { if (isKnownFun lf_info)
 142                         then tickyKnownCallTooFewArgs
 143                         else tickyUnknownCall
 144                    ; tickySlowCallPat (map fst arg_amodes)
 145                    }
 146
 147                 ; let (apply_lbl, args, extra_args)
 148                         = constructSlowCall arg_amodes
 149
 150                 ; directCall sp apply_lbl args extra_args
 151                         (node_asst `plusStmts` pending_assts)
 152                 }
 153
 154             -- A direct function call (possibly with some left-over arguments)
 155             DirectEntry lbl arity -> do
 156                 { if arity == length arg_amodes
 157                         then tickyKnownCallExact
 158                         else do tickyKnownCallExtraArgs
 159                                 tickySlowCallPat (map fst (drop arity arg_amodes))
 160
 161                 ; let
 162                      -- The args beyond the arity go straight on the stack
 163                      (arity_args, extra_args) = splitAt arity arg_amodes
 164
 165                 ; directCall sp lbl arity_args extra_args
 166                         (opt_node_asst `plusStmts` pending_assts)
 167                 }
 168         }
 169   where
 170     fun_name  = idName (cgIdInfoId fun_info)
 171     lf_info   = cgIdInfoLF fun_info
 172
 173
 174
 175 directCall sp lbl args extra_args assts = do
 176   let
 177         -- First chunk of args go in registers
 178         (reg_arg_amodes, stk_args) = assignCallRegs args
 179
 180         -- Any "extra" arguments are placed in frames on the
 181         -- stack after the other arguments.
 182         slow_stk_args = slowArgs extra_args
 183
 184         reg_assts = assignToRegs reg_arg_amodes
 185   --
 186   (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
 187
 188   emitSimultaneously (reg_assts     `plusStmts`
 189                       stk_assts     `plusStmts`
 190                       assts)
 191
 192   doFinalJump final_sp False (jumpToLbl lbl)
 193
 194 -- -----------------------------------------------------------------------------
 195 -- The final clean-up before we do a jump at the end of a basic block.
 196 -- This code is shared by tail-calls and returns.
 197
 198 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
 199 doFinalJump final_sp is_let_no_escape jump_code
 200   = do  { -- Adjust the high-water mark if necessary
 201           adjustStackHW final_sp
 202
 203         -- Push a return address if necessary (after the assignments
 204         -- above, in case we clobber a live stack location)
 205         --
 206         -- DONT push the return address when we're about to jump to a
 207         -- let-no-escape: the final tail call in the let-no-escape
 208         -- will do this.
 209         ; eob <- getEndOfBlockInfo
 210         ; whenC (not is_let_no_escape) (pushReturnAddress eob)
 211
 212             -- Final adjustment of Sp/Hp
 213         ; adjustSpAndHp final_sp
 214
 215             -- and do the jump
 216         ; jump_code }
 217
 218 -- ----------------------------------------------------------------------------
 219 -- A general return (just a special case of doFinalJump, above)
 220
 221 performReturn :: Code   -- The code to execute to actually do the return
 222               -> Code
 223
 224 performReturn finish_code
 225   = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
 226         ; doFinalJump args_sp False{-not a LNE-} finish_code }
 227
 228 -- ----------------------------------------------------------------------------
 229 -- Primitive Returns
 230 -- Just load the return value into the right register, and return.
 231
 232 performPrimReturn :: CgRep -> CmmExpr   -- The thing to return
 233                   -> Code
 234 performPrimReturn rep amode
 235   =  do { whenC (not (isVoidArg rep))
 236                 (stmtC (CmmAssign ret_reg amode))
 237         ; performReturn emitReturnInstr }
 238   where
 239     ret_reg = dataReturnConvPrim rep
 240
 241 -- ---------------------------------------------------------------------------
 242 -- Unboxed tuple returns
 243
 244 -- These are a bit like a normal tail call, except that:
 245 --
 246 --   - The tail-call target is an info table on the stack
 247 --
 248 --   - We separate stack arguments into pointers and non-pointers,
 249 --     to make it easier to leave things in a sane state for a heap check.
 250 --     This is OK because we can never partially-apply an unboxed tuple,
 251 --     unlike a function.  The same technique is used when calling
 252 --     let-no-escape functions, because they also can't be partially
 253 --     applied.
 254
 255 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
 256 returnUnboxedTuple amodes
 257   = do  { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
 258         ; tickyUnboxedTupleReturn (length amodes)
 259         ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
 260         ; emitSimultaneously assts
 261         ; doFinalJump final_sp False{-not a LNE-} emitReturnInstr }
 262
 263 pushUnboxedTuple :: VirtualSpOffset             -- Sp at which to start pushing
 264                  -> [(CgRep, CmmExpr)]          -- amodes of the components
 265                  -> FCode (VirtualSpOffset,     -- final Sp
 266                            CmmStmts)            -- assignments (regs+stack)
 267
 268 pushUnboxedTuple sp []
 269   = return (sp, noStmts)
 270 pushUnboxedTuple sp amodes
 271   = do  { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
 272
 273                 -- separate the rest of the args into pointers and non-pointers
 274                 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
 275                 reg_arg_assts = assignToRegs reg_arg_amodes
 276
 277             -- push ptrs, then nonptrs, on the stack
 278         ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
 279         ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
 280
 281         ; returnFC (final_sp,
 282                     reg_arg_assts `plusStmts`
 283                     ptr_assts `plusStmts` nptr_assts) }
 284
 285
 286 -- -----------------------------------------------------------------------------
 287 -- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
 288 -- we want to do things in a slightly different order to normal:
 289 --
 290 --              - push return address
 291 --              - adjust stack pointer
 292 --              - r = call(args...)
 293 --              - assign regs for unboxed tuple (usually just R1 = r)
 294 --              - return to continuation
 295 --
 296 -- The return address (i.e. stack frame) must be on the stack before
 297 -- doing the call in case the call ends up in the garbage collector.
 298 --
 299 -- Sadly, the information about the continuation is lost after we push it
 300 -- (in order to avoid pushing it again), so we end up doing a needless
 301 -- indirect jump (ToDo).
 302
 303 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
 304 ccallReturnUnboxedTuple amodes before_jump
 305   = do  { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
 306
 307         -- Push a return address if necessary
 308         ; pushReturnAddress eob
 309         ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
 310             (do { adjustSpAndHp args_sp
 311                 ; before_jump
 312                 ; returnUnboxedTuple amodes })
 313     }
 314
 315 -- -----------------------------------------------------------------------------
 316 -- Calling an out-of-line primop
 317
 318 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
 319 tailCallPrimOp op args
 320  = do   {       -- We're going to perform a normal-looking tail call,
 321                 -- except that *all* the arguments will be in registers.
 322                 -- Hence the ASSERT( null leftovers )
 323           arg_amodes <- getArgAmodes args
 324         ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
 325               jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
 326
 327         ; ASSERT(null leftovers) -- no stack-resident args
 328           emitSimultaneously (assignToRegs arg_regs)
 329
 330         ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
 331         ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
 332
 333 -- -----------------------------------------------------------------------------
 334 -- Return Addresses
 335
 336 -- We always push the return address just before performing a tail call
 337 -- or return.  The reason we leave it until then is because the stack
 338 -- slot that the return address is to go into might contain something
 339 -- useful.
 340 --
 341 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
 342 -- case expression and the return address is still to be pushed.
 343 --
 344 -- There are cases where it doesn't look necessary to push the return
 345 -- address: for example, just before doing a return to a known
 346 -- continuation.  However, the continuation will expect to find the
 347 -- return address on the stack in case it needs to do a heap check.
 348
 349 pushReturnAddress :: EndOfBlockInfo -> Code
 350
 351 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _))
 352   = do  { sp_rel <- getSpRelOffset args_sp
 353         ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
 354
 355 pushReturnAddress _ = nopC
 356
 357 -- -----------------------------------------------------------------------------
 358 -- Misc.
 359
 360 jumpToLbl :: CLabel -> Code
 361 -- Passes no argument to the destination procedure
 362 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
 363
 364 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
 365 assignToRegs reg_args
 366   = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
 367             | (expr, reg_id) <- reg_args ]
 368 \end{code}
 369
 370
 371 %************************************************************************
 372 %*                                                                      *
 373 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
 374 %*                                                                      *
 375 %************************************************************************
 376
 377 This function adjusts the stack and heap pointers just before a tail
 378 call or return.  The stack pointer is adjusted to its final position
 379 (i.e. to point to the last argument for a tail call, or the activation
 380 record for a return).  The heap pointer may be moved backwards, in
 381 cases where we overallocated at the beginning of the basic block (see
 382 CgCase.lhs for discussion).
 383
 384 These functions {\em do not} deal with high-water-mark adjustment.
 385 That's done by functions which allocate stack space.
 386
 387 \begin{code}
 388 adjustSpAndHp :: VirtualSpOffset        -- New offset for Arg stack ptr
 389               -> Code
 390 adjustSpAndHp newRealSp
 391   = do  { -- Adjust stack, if necessary.
 392           -- NB: the conditional on the monad-carried realSp
 393           --     is out of line (via codeOnly), to avoid a black hole
 394         ; new_sp <- getSpRelOffset newRealSp
 395         ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
 396         ; setRealSp newRealSp                   -- where realSp==newRealSp
 397
 398           -- Adjust heap.  The virtual heap pointer may be less than the real Hp
 399           -- because the latter was advanced to deal with the worst-case branch
 400           -- of the code, and we may be in a better-case branch.  In that case,
 401           -- move the real Hp *back* and retract some ticky allocation count.
 402         ; hp_usg <- getHpUsage
 403         ; let rHp = realHp hp_usg
 404               vHp = virtHp hp_usg
 405         ; new_hp <- getHpRelOffset vHp
 406         ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
 407         ; tickyAllocHeap (vHp - rHp)            -- ...ditto
 408         ; setRealHp vHp
 409         }
 410 \end{code}