compiler/codeGen/CgTailCall.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5 % Code generation for tail calls.
   6
   7 \begin{code}
   8 module CgTailCall (
   9         cgTailCall, performTailCall,
  10         performReturn, performPrimReturn,
  11         returnUnboxedTuple, ccallReturnUnboxedTuple,
  12         pushUnboxedTuple,
  13         tailCallPrimOp,
  14         tailCallPrimCall,
  15
  16         pushReturnAddress
  17     ) where
  18
  19 #include "HsVersions.h"
  20
  21 import CgMonad
  22 import CgBindery
  23 import CgInfoTbls
  24 import CgCallConv
  25 import CgStackery
  26 import CgHeapery
  27 import CgUtils
  28 import CgTicky
  29 import ClosureInfo
  30 import SMRep
  31 import Cmm
  32 import CmmUtils
  33 import CLabel
  34 import Type
  35 import Id
  36 import StgSyn
  37 import PrimOp
  38 import Outputable
  39 import StaticFlags
  40
  41 import Control.Monad
  42
  43 -----------------------------------------------------------------------------
  44 -- Tail Calls
  45
  46 cgTailCall :: Id -> [StgArg] -> Code
  47
  48 -- Here's the code we generate for a tail call.  (NB there may be no
  49 -- arguments, in which case this boils down to just entering a variable.)
  50 --
  51 --    * Put args in the top locations of the stack.
  52 --    * Adjust the stack ptr
  53 --    * Make R1 point to the function closure if necessary.
  54 --    * Perform the call.
  55 --
  56 -- Things to be careful about:
  57 --
  58 --    * Don't overwrite stack locations before you have finished with
  59 --      them (remember you need the function and the as-yet-unmoved
  60 --      arguments).
  61 --    * Preferably, generate no code to replace x by x on the stack (a
  62 --      common situation in tail-recursion).
  63 --    * Adjust the stack high water mark appropriately.
  64 --
  65 -- Treat unboxed locals exactly like literals (above) except use the addr
  66 -- mode for the local instead of (CLit lit) in the assignment.
  67
  68 cgTailCall fun args
  69   = do  { fun_info <- getCgIdInfo fun
  70
  71         ; if isUnLiftedType (idType fun)
  72           then  -- Primitive return
  73                 ASSERT( null args )
  74             do  { fun_amode <- idInfoToAmode fun_info
  75                 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
  76
  77           else -- Normal case, fun is boxed
  78             do  { arg_amodes <- getArgAmodes args
  79                 ; performTailCall fun_info arg_amodes noStmts }
  80         }
  81
  82
  83 -- -----------------------------------------------------------------------------
  84 -- The guts of a tail-call
  85
  86 performTailCall
  87         :: CgIdInfo             -- The function
  88         -> [(CgRep,CmmExpr)]    -- Args
  89         -> CmmStmts             -- Pending simultaneous assignments
  90                                 --  *** GUARANTEED to contain only stack assignments.
  91         -> Code
  92
  93 performTailCall fun_info arg_amodes pending_assts
  94   | Just join_sp <- maybeLetNoEscape fun_info
  95   =        -- A let-no-escape is slightly different, because we
  96            -- arrange the stack arguments into pointers and non-pointers
  97            -- to make the heap check easier.  The tail-call sequence
  98            -- is very similar to returning an unboxed tuple, so we
  99            -- share some code.
 100      do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
 101         ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
 102         ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
 103         ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
 104
 105   | otherwise
 106   = do  { fun_amode <- idInfoToAmode fun_info
 107         ; let assignSt  = CmmAssign nodeReg fun_amode
 108               node_asst = oneStmt assignSt
 109               opt_node_asst | nodeMustPointToIt lf_info = node_asst
 110                             | otherwise                 = noStmts
 111         ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
 112
 113         ; dflags <- getDynFlags
 114         ; case (getCallMethod dflags fun_name fun_has_cafs lf_info (length arg_amodes)) of
 115
 116             -- Node must always point to things we enter
 117             EnterIt -> do
 118                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 119                 ; let target     = entryCode (closureInfoPtr (CmmReg nodeReg))
 120                       enterClosure = stmtC (CmmJump target [])
 121                       -- If this is a scrutinee
 122                       -- let's check if the closure is a constructor
 123                       -- so we can directly jump to the alternatives switch
 124                       -- statement.
 125                       jumpInstr = getEndOfBlockInfo >>=
 126                                   maybeSwitchOnCons enterClosure
 127                 ; doFinalJump sp False jumpInstr }
 128
 129             -- A function, but we have zero arguments.  It is already in WHNF,
 130             -- so we can just return it.
 131             -- As with any return, Node must point to it.
 132             ReturnIt -> do
 133                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 134                 ; doFinalJump sp False emitReturnInstr }
 135
 136             -- A real constructor.  Don't bother entering it,
 137             -- just do the right sort of return instead.
 138             -- As with any return, Node must point to it.
 139             ReturnCon _ -> do
 140                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 141                 ; doFinalJump sp False emitReturnInstr }
 142
 143             JumpToIt lbl -> do
 144                 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
 145                 ; doFinalJump sp False (jumpToLbl lbl) }
 146
 147             -- A slow function call via the RTS apply routines
 148             -- Node must definitely point to the thing
 149             SlowCall -> do
 150                 {  when (not (null arg_amodes)) $ do
 151                    { if (isKnownFun lf_info)
 152                         then tickyKnownCallTooFewArgs
 153                         else tickyUnknownCall
 154                    ; tickySlowCallPat (map fst arg_amodes)
 155                    }
 156
 157                 ; let (apply_lbl, args, extra_args)
 158                         = constructSlowCall arg_amodes
 159
 160                 ; directCall sp apply_lbl args extra_args
 161                         (node_asst `plusStmts` pending_assts)
 162
 163                 }
 164
 165             -- A direct function call (possibly with some left-over arguments)
 166             DirectEntry lbl arity -> do
 167                 { if arity == length arg_amodes
 168                         then tickyKnownCallExact
 169                         else do tickyKnownCallExtraArgs
 170                                 tickySlowCallPat (map fst (drop arity arg_amodes))
 171
 172                 ; let
 173                      -- The args beyond the arity go straight on the stack
 174                      (arity_args, extra_args) = splitAt arity arg_amodes
 175
 176                 ; directCall sp lbl arity_args extra_args
 177                         (opt_node_asst `plusStmts` pending_assts)
 178                 }
 179         }
 180   where
 181     fun_id    = cgIdInfoId fun_info
 182     fun_name  = idName fun_id
 183     lf_info   = cgIdInfoLF fun_info
 184     fun_has_cafs = idCafInfo fun_id
 185     untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg))
 186     -- Test if closure is a constructor
 187     maybeSwitchOnCons enterClosure eob
 188               | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob,
 189                 not opt_SccProfilingOn
 190                 -- we can't shortcut when profiling is on, because we have
 191                 -- to enter a closure to mark it as "used" for LDV profiling
 192               = do { is_constr <- newLabelC
 193                    -- Is the pointer tagged?
 194                    -- Yes, jump to switch statement
 195                    ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg))
 196                                 is_constr)
 197                    -- No, enter the closure.
 198                    ; enterClosure
 199                    ; labelC is_constr
 200                    ; stmtC (CmmJump (entryCode $ CmmLit (CmmLabel lbl)) [])
 201                    }
 202 {-
 203               -- This is a scrutinee for a case expression
 204               -- so let's see if we can directly inspect the closure
 205               | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob
 206               = do { no_cons <- newLabelC
 207                    -- Both the NCG and gcc optimize away the temp
 208                    ; z <- newTemp  wordRep
 209                    ; stmtC (CmmAssign z tag_expr)
 210                    ; let tag = CmmReg z
 211                    -- Is the closure a cons?
 212                    ; stmtC (CmmCondBranch (cond1 tag) no_cons)
 213                    ; stmtC (CmmCondBranch (cond2 tag) no_cons)
 214                    -- Yes, jump to switch statement
 215                    ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) [])
 216                    ; labelC no_cons
 217                    -- No, enter the closure.
 218                    ; enterClosure
 219                    }
 220 -}
 221               -- No case expression involved, enter the closure.
 222               | otherwise
 223               = do { stmtC untag_node
 224                    ; enterClosure
 225                    }
 226         where
 227           --cond1 tag  = cmmULtWord tag lowCons
 228           -- More efficient than the above?
 229 {-
 230           tag_expr   = cmmGetClosureType (CmmReg nodeReg)
 231           cond1 tag  = cmmEqWord tag (CmmLit (mkIntCLit 0))
 232           cond2 tag  = cmmUGtWord tag highCons
 233           lowCons    = CmmLit (mkIntCLit 1)
 234             -- CONSTR
 235           highCons   = CmmLit (mkIntCLit 8)
 236             -- CONSTR_NOCAF_STATIC (from ClosureType.h)
 237 -}
 238
 239 directCall :: VirtualSpOffset -> CLabel -> [(CgRep, CmmExpr)]
 240            -> [(CgRep, CmmExpr)] -> CmmStmts
 241            -> Code
 242 directCall sp lbl args extra_args assts = do
 243   let
 244         -- First chunk of args go in registers
 245         (reg_arg_amodes, stk_args) = assignCallRegs args
 246
 247         -- Any "extra" arguments are placed in frames on the
 248         -- stack after the other arguments.
 249         slow_stk_args = slowArgs extra_args
 250
 251         reg_assts = assignToRegs reg_arg_amodes
 252   --
 253   (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
 254
 255   emitSimultaneously (reg_assts     `plusStmts`
 256                       stk_assts     `plusStmts`
 257                       assts)
 258
 259   doFinalJump final_sp False (jumpToLbl lbl)
 260
 261 -- -----------------------------------------------------------------------------
 262 -- The final clean-up before we do a jump at the end of a basic block.
 263 -- This code is shared by tail-calls and returns.
 264
 265 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
 266 doFinalJump final_sp is_let_no_escape jump_code
 267   = do  { -- Adjust the high-water mark if necessary
 268           adjustStackHW final_sp
 269
 270         -- Push a return address if necessary (after the assignments
 271         -- above, in case we clobber a live stack location)
 272         --
 273         -- DONT push the return address when we're about to jump to a
 274         -- let-no-escape: the final tail call in the let-no-escape
 275         -- will do this.
 276         ; eob <- getEndOfBlockInfo
 277         ; whenC (not is_let_no_escape) (pushReturnAddress eob)
 278
 279             -- Final adjustment of Sp/Hp
 280         ; adjustSpAndHp final_sp
 281
 282             -- and do the jump
 283         ; jump_code }
 284
 285 -- ----------------------------------------------------------------------------
 286 -- A general return (just a special case of doFinalJump, above)
 287
 288 performReturn :: Code   -- The code to execute to actually do the return
 289               -> Code
 290
 291 performReturn finish_code
 292   = do  { EndOfBlockInfo args_sp _sequel <- getEndOfBlockInfo
 293         ; doFinalJump args_sp False{-not a LNE-} finish_code }
 294
 295 -- ----------------------------------------------------------------------------
 296 -- Primitive Returns
 297 -- Just load the return value into the right register, and return.
 298
 299 performPrimReturn :: CgRep -> CmmExpr   -- The thing to return
 300                   -> Code
 301 performPrimReturn rep amode
 302   =  do { whenC (not (isVoidArg rep))
 303                 (stmtC (CmmAssign ret_reg amode))
 304         ; performReturn emitReturnInstr }
 305   where
 306     ret_reg = dataReturnConvPrim rep
 307
 308 -- ---------------------------------------------------------------------------
 309 -- Unboxed tuple returns
 310
 311 -- These are a bit like a normal tail call, except that:
 312 --
 313 --   - The tail-call target is an info table on the stack
 314 --
 315 --   - We separate stack arguments into pointers and non-pointers,
 316 --     to make it easier to leave things in a sane state for a heap check.
 317 --     This is OK because we can never partially-apply an unboxed tuple,
 318 --     unlike a function.  The same technique is used when calling
 319 --     let-no-escape functions, because they also can't be partially
 320 --     applied.
 321
 322 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
 323 returnUnboxedTuple amodes
 324   = do  { (EndOfBlockInfo args_sp _sequel) <- getEndOfBlockInfo
 325         ; tickyUnboxedTupleReturn (length amodes)
 326         ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
 327         ; emitSimultaneously assts
 328         ; doFinalJump final_sp False{-not a LNE-} emitReturnInstr }
 329
 330 pushUnboxedTuple :: VirtualSpOffset             -- Sp at which to start pushing
 331                  -> [(CgRep, CmmExpr)]          -- amodes of the components
 332                  -> FCode (VirtualSpOffset,     -- final Sp
 333                            CmmStmts)            -- assignments (regs+stack)
 334
 335 pushUnboxedTuple sp []
 336   = return (sp, noStmts)
 337 pushUnboxedTuple sp amodes
 338   = do  { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
 339
 340                 -- separate the rest of the args into pointers and non-pointers
 341                 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
 342                 reg_arg_assts = assignToRegs reg_arg_amodes
 343
 344             -- push ptrs, then nonptrs, on the stack
 345         ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
 346         ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
 347
 348         ; returnFC (final_sp,
 349                     reg_arg_assts `plusStmts`
 350                     ptr_assts `plusStmts` nptr_assts) }
 351
 352
 353 -- -----------------------------------------------------------------------------
 354 -- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
 355 -- we want to do things in a slightly different order to normal:
 356 --
 357 --              - push return address
 358 --              - adjust stack pointer
 359 --              - r = call(args...)
 360 --              - assign regs for unboxed tuple (usually just R1 = r)
 361 --              - return to continuation
 362 --
 363 -- The return address (i.e. stack frame) must be on the stack before
 364 -- doing the call in case the call ends up in the garbage collector.
 365 --
 366 -- Sadly, the information about the continuation is lost after we push it
 367 -- (in order to avoid pushing it again), so we end up doing a needless
 368 -- indirect jump (ToDo).
 369
 370 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
 371 ccallReturnUnboxedTuple amodes before_jump
 372   = do  { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
 373
 374         -- Push a return address if necessary
 375         ; pushReturnAddress eob
 376         ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
 377             (do { adjustSpAndHp args_sp
 378                 ; before_jump
 379                 ; returnUnboxedTuple amodes })
 380     }
 381
 382 -- -----------------------------------------------------------------------------
 383 -- Calling an out-of-line primop
 384
 385 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
 386 tailCallPrimOp op
 387  = tailCallPrim (mkRtsPrimOpLabel op)
 388
 389 tailCallPrimCall :: PrimCall -> [StgArg] -> Code
 390 tailCallPrimCall primcall
 391  = tailCallPrim (mkPrimCallLabel primcall)
 392
 393 tailCallPrim :: CLabel -> [StgArg] -> Code
 394 tailCallPrim lbl args
 395  = do   {       -- We're going to perform a normal-looking tail call,
 396                 -- except that *all* the arguments will be in registers.
 397                 -- Hence the ASSERT( null leftovers )
 398           arg_amodes <- getArgAmodes args
 399         ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
 400               jump_to_primop = jumpToLbl lbl
 401
 402         ; ASSERT(null leftovers) -- no stack-resident args
 403           emitSimultaneously (assignToRegs arg_regs)
 404
 405         ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
 406         ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
 407
 408 -- -----------------------------------------------------------------------------
 409 -- Return Addresses
 410
 411 -- We always push the return address just before performing a tail call
 412 -- or return.  The reason we leave it until then is because the stack
 413 -- slot that the return address is to go into might contain something
 414 -- useful.
 415 --
 416 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
 417 -- case expression and the return address is still to be pushed.
 418 --
 419 -- There are cases where it doesn't look necessary to push the return
 420 -- address: for example, just before doing a return to a known
 421 -- continuation.  However, the continuation will expect to find the
 422 -- return address on the stack in case it needs to do a heap check.
 423
 424 pushReturnAddress :: EndOfBlockInfo -> Code
 425
 426 pushReturnAddress (EndOfBlockInfo args_sp (CaseAlts lbl _ _))
 427   = do  { sp_rel <- getSpRelOffset args_sp
 428         ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
 429
 430 pushReturnAddress _ = nopC
 431
 432 -- -----------------------------------------------------------------------------
 433 -- Misc.
 434
 435 jumpToLbl :: CLabel -> Code
 436 -- Passes no argument to the destination procedure
 437 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
 438
 439 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
 440 assignToRegs reg_args
 441   = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
 442             | (expr, reg_id) <- reg_args ]
 443 \end{code}
 444
 445
 446 %************************************************************************
 447 %*                                                                      *
 448 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
 449 %*                                                                      *
 450 %************************************************************************
 451
 452 This function adjusts the stack and heap pointers just before a tail
 453 call or return.  The stack pointer is adjusted to its final position
 454 (i.e. to point to the last argument for a tail call, or the activation
 455 record for a return).  The heap pointer may be moved backwards, in
 456 cases where we overallocated at the beginning of the basic block (see
 457 CgCase.lhs for discussion).
 458
 459 These functions {\em do not} deal with high-water-mark adjustment.
 460 That's done by functions which allocate stack space.
 461
 462 \begin{code}
 463 adjustSpAndHp :: VirtualSpOffset        -- New offset for Arg stack ptr
 464               -> Code
 465 adjustSpAndHp newRealSp
 466   = do  { -- Adjust stack, if necessary.
 467           -- NB: the conditional on the monad-carried realSp
 468           --     is out of line (via codeOnly), to avoid a black hole
 469         ; new_sp <- getSpRelOffset newRealSp
 470         ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
 471         ; setRealSp newRealSp                   -- where realSp==newRealSp
 472
 473           -- Adjust heap.  The virtual heap pointer may be less than the real Hp
 474           -- because the latter was advanced to deal with the worst-case branch
 475           -- of the code, and we may be in a better-case branch.  In that case,
 476           -- move the real Hp *back* and retract some ticky allocation count.
 477         ; hp_usg <- getHpUsage
 478         ; let rHp = realHp hp_usg
 479               vHp = virtHp hp_usg
 480         ; new_hp <- getHpRelOffset vHp
 481         ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
 482         ; tickyAllocHeap (vHp - rHp)            -- ...ditto
 483         ; setRealHp vHp
 484         }
 485 \end{code}
 486