compiler/codeGen/CgTailCall.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5 % Code generation for tail calls.
   6
   7 \begin{code}
   8 module CgTailCall (
   9         cgTailCall, performTailCall,
  10         performReturn, performPrimReturn,
  11         returnUnboxedTuple, ccallReturnUnboxedTuple,
  12         pushUnboxedTuple,
  13         tailCallPrimOp,
  14
  15         pushReturnAddress
  16     ) where
  17
  18 #include "HsVersions.h"
  19
  20 import CgMonad
  21 import CgBindery
  22 import CgInfoTbls
  23 import CgCallConv
  24 import CgStackery
  25 import CgHeapery
  26 import CgUtils
  27 import CgTicky
  28 import ClosureInfo
  29 import SMRep
  30 import Cmm
  31 import CmmUtils
  32 import CLabel
  33 import Type
  34 import Id
  35 import StgSyn
  36 import PrimOp
  37 import Outputable
  38
  39 import Control.Monad
  40
  41 -----------------------------------------------------------------------------
  42 -- Tail Calls
  43
  44 cgTailCall :: Id -> [StgArg] -> Code
  45
  46 -- Here's the code we generate for a tail call.  (NB there may be no
  47 -- arguments, in which case this boils down to just entering a variable.)
  48 --
  49 --    * Put args in the top locations of the stack.
  50 --    * Adjust the stack ptr
  51 --    * Make R1 point to the function closure if necessary.
  52 --    * Perform the call.
  53 --
  54 -- Things to be careful about:
  55 --
  56 --    * Don't overwrite stack locations before you have finished with
  57 --      them (remember you need the function and the as-yet-unmoved
  58 --      arguments).
  59 --    * Preferably, generate no code to replace x by x on the stack (a
  60 --      common situation in tail-recursion).
  61 --    * Adjust the stack high water mark appropriately.
  62 --
  63 -- Treat unboxed locals exactly like literals (above) except use the addr
  64 -- mode for the local instead of (CLit lit) in the assignment.
  65
  66 cgTailCall fun args
  67   = do  { fun_info <- getCgIdInfo fun
  68
  69         ; if isUnLiftedType (idType fun)
  70           then  -- Primitive return
  71                 ASSERT( null args )
  72             do  { fun_amode <- idInfoToAmode fun_info
  73                 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
  74
  75           else -- Normal case, fun is boxed
  76             do  { arg_amodes <- getArgAmodes args
  77                 ; performTailCall fun_info arg_amodes noStmts }
  78         }
  79
  80
  81 -- -----------------------------------------------------------------------------
  82 -- The guts of a tail-call
  83
  84 performTailCall
  85         :: CgIdInfo             -- The function
  86         -> [(CgRep,CmmExpr)]    -- Args
  87         -> CmmStmts             -- Pending simultaneous assignments
  88                                 --  *** GUARANTEED to contain only stack assignments.
  89         -> Code
  90
  91 performTailCall fun_info arg_amodes pending_assts
  92   | Just join_sp <- maybeLetNoEscape fun_info
  93   =        -- A let-no-escape is slightly different, because we
  94            -- arrange the stack arguments into pointers and non-pointers
  95            -- to make the heap check easier.  The tail-call sequence
  96            -- is very similar to returning an unboxed tuple, so we
  97            -- share some code.
  98      do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
  99         ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
 100         ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
 101         ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
 102
 103   | otherwise
 104   = do  { fun_amode <- idInfoToAmode fun_info
 105         ; let assignSt  = CmmAssign nodeReg fun_amode
 106               node_asst = oneStmt assignSt
 107               opt_node_asst | nodeMustPointToIt lf_info = node_asst
 108                             | otherwise                 = noStmts
 109         ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
 110
 111         ; dflags <- getDynFlags
 112         ; case (getCallMethod dflags fun_name fun_has_cafs lf_info (length arg_amodes)) of
 113
 114             -- Node must always point to things we enter
 115             EnterIt -> do
 116                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 117                 ; let target     = entryCode (closureInfoPtr (CmmReg nodeReg))
 118                       enterClosure = stmtC (CmmJump target [])
 119                       -- If this is a scrutinee
 120                       -- let's check if the closure is a constructor
 121                       -- so we can directly jump to the alternatives switch
 122                       -- statement.
 123                       jumpInstr = getEndOfBlockInfo >>=
 124                                   maybeSwitchOnCons enterClosure
 125                 ; doFinalJump sp False jumpInstr }
 126
 127             -- A function, but we have zero arguments.  It is already in WHNF,
 128             -- so we can just return it.
 129             -- As with any return, Node must point to it.
 130             ReturnIt -> do
 131                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 132                 ; doFinalJump sp False emitReturnInstr }
 133
 134             -- A real constructor.  Don't bother entering it,
 135             -- just do the right sort of return instead.
 136             -- As with any return, Node must point to it.
 137             ReturnCon _ -> do
 138                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 139                 ; doFinalJump sp False emitReturnInstr }
 140
 141             JumpToIt lbl -> do
 142                 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
 143                 ; doFinalJump sp False (jumpToLbl lbl) }
 144
 145             -- A slow function call via the RTS apply routines
 146             -- Node must definitely point to the thing
 147             SlowCall -> do
 148                 {  when (not (null arg_amodes)) $ do
 149                    { if (isKnownFun lf_info)
 150                         then tickyKnownCallTooFewArgs
 151                         else tickyUnknownCall
 152                    ; tickySlowCallPat (map fst arg_amodes)
 153                    }
 154
 155                 ; let (apply_lbl, args, extra_args)
 156                         = constructSlowCall arg_amodes
 157
 158                 ; directCall sp apply_lbl args extra_args
 159                         (node_asst `plusStmts` pending_assts)
 160
 161                 }
 162
 163             -- A direct function call (possibly with some left-over arguments)
 164             DirectEntry lbl arity -> do
 165                 { if arity == length arg_amodes
 166                         then tickyKnownCallExact
 167                         else do tickyKnownCallExtraArgs
 168                                 tickySlowCallPat (map fst (drop arity arg_amodes))
 169
 170                 ; let
 171                      -- The args beyond the arity go straight on the stack
 172                      (arity_args, extra_args) = splitAt arity arg_amodes
 173
 174                 ; directCall sp lbl arity_args extra_args
 175                         (opt_node_asst `plusStmts` pending_assts)
 176                 }
 177         }
 178   where
 179     fun_id    = cgIdInfoId fun_info
 180     fun_name  = idName fun_id
 181     lf_info   = cgIdInfoLF fun_info
 182     fun_has_cafs = idCafInfo fun_id
 183     untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg))
 184     -- Test if closure is a constructor
 185     maybeSwitchOnCons enterClosure eob
 186               | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob
 187               = do { is_constr <- newLabelC
 188                    -- Is the pointer tagged?
 189                    -- Yes, jump to switch statement
 190                    ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg))
 191                                 is_constr)
 192                    -- No, enter the closure.
 193                    ; enterClosure
 194                    ; labelC is_constr
 195                    ; stmtC (CmmJump (entryCode $ CmmLit (CmmLabel lbl)) [])
 196                    }
 197 {-
 198               -- This is a scrutinee for a case expression
 199               -- so let's see if we can directly inspect the closure
 200               | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob
 201               = do { no_cons <- newLabelC
 202                    -- Both the NCG and gcc optimize away the temp
 203                    ; z <- newTemp  wordRep
 204                    ; stmtC (CmmAssign z tag_expr)
 205                    ; let tag = CmmReg z
 206                    -- Is the closure a cons?
 207                    ; stmtC (CmmCondBranch (cond1 tag) no_cons)
 208                    ; stmtC (CmmCondBranch (cond2 tag) no_cons)
 209                    -- Yes, jump to switch statement
 210                    ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) [])
 211                    ; labelC no_cons
 212                    -- No, enter the closure.
 213                    ; enterClosure
 214                    }
 215 -}
 216               -- No case expression involved, enter the closure.
 217               | otherwise
 218               = do { stmtC untag_node
 219                    ; enterClosure
 220                    }
 221         where
 222           --cond1 tag  = cmmULtWord tag lowCons
 223           -- More efficient than the above?
 224 {-
 225           tag_expr   = cmmGetClosureType (CmmReg nodeReg)
 226           cond1 tag  = cmmEqWord tag (CmmLit (mkIntCLit 0))
 227           cond2 tag  = cmmUGtWord tag highCons
 228           lowCons    = CmmLit (mkIntCLit 1)
 229             -- CONSTR
 230           highCons   = CmmLit (mkIntCLit 8)
 231             -- CONSTR_NOCAF_STATIC (from ClosureType.h)
 232 -}
 233
 234 directCall :: VirtualSpOffset -> CLabel -> [(CgRep, CmmExpr)]
 235            -> [(CgRep, CmmExpr)] -> CmmStmts
 236            -> Code
 237 directCall sp lbl args extra_args assts = do
 238   let
 239         -- First chunk of args go in registers
 240         (reg_arg_amodes, stk_args) = assignCallRegs args
 241
 242         -- Any "extra" arguments are placed in frames on the
 243         -- stack after the other arguments.
 244         slow_stk_args = slowArgs extra_args
 245
 246         reg_assts = assignToRegs reg_arg_amodes
 247   --
 248   (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
 249
 250   emitSimultaneously (reg_assts     `plusStmts`
 251                       stk_assts     `plusStmts`
 252                       assts)
 253
 254   doFinalJump final_sp False (jumpToLbl lbl)
 255
 256 -- -----------------------------------------------------------------------------
 257 -- The final clean-up before we do a jump at the end of a basic block.
 258 -- This code is shared by tail-calls and returns.
 259
 260 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
 261 doFinalJump final_sp is_let_no_escape jump_code
 262   = do  { -- Adjust the high-water mark if necessary
 263           adjustStackHW final_sp
 264
 265         -- Push a return address if necessary (after the assignments
 266         -- above, in case we clobber a live stack location)
 267         --
 268         -- DONT push the return address when we're about to jump to a
 269         -- let-no-escape: the final tail call in the let-no-escape
 270         -- will do this.
 271         ; eob <- getEndOfBlockInfo
 272         ; whenC (not is_let_no_escape) (pushReturnAddress eob)
 273
 274             -- Final adjustment of Sp/Hp
 275         ; adjustSpAndHp final_sp
 276
 277             -- and do the jump
 278         ; jump_code }
 279
 280 -- ----------------------------------------------------------------------------
 281 -- A general return (just a special case of doFinalJump, above)
 282
 283 performReturn :: Code   -- The code to execute to actually do the return
 284               -> Code
 285
 286 performReturn finish_code
 287   = do  { EndOfBlockInfo args_sp _sequel <- getEndOfBlockInfo
 288         ; doFinalJump args_sp False{-not a LNE-} finish_code }
 289
 290 -- ----------------------------------------------------------------------------
 291 -- Primitive Returns
 292 -- Just load the return value into the right register, and return.
 293
 294 performPrimReturn :: CgRep -> CmmExpr   -- The thing to return
 295                   -> Code
 296 performPrimReturn rep amode
 297   =  do { whenC (not (isVoidArg rep))
 298                 (stmtC (CmmAssign ret_reg amode))
 299         ; performReturn emitReturnInstr }
 300   where
 301     ret_reg = dataReturnConvPrim rep
 302
 303 -- ---------------------------------------------------------------------------
 304 -- Unboxed tuple returns
 305
 306 -- These are a bit like a normal tail call, except that:
 307 --
 308 --   - The tail-call target is an info table on the stack
 309 --
 310 --   - We separate stack arguments into pointers and non-pointers,
 311 --     to make it easier to leave things in a sane state for a heap check.
 312 --     This is OK because we can never partially-apply an unboxed tuple,
 313 --     unlike a function.  The same technique is used when calling
 314 --     let-no-escape functions, because they also can't be partially
 315 --     applied.
 316
 317 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
 318 returnUnboxedTuple amodes
 319   = do  { (EndOfBlockInfo args_sp _sequel) <- getEndOfBlockInfo
 320         ; tickyUnboxedTupleReturn (length amodes)
 321         ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
 322         ; emitSimultaneously assts
 323         ; doFinalJump final_sp False{-not a LNE-} emitReturnInstr }
 324
 325 pushUnboxedTuple :: VirtualSpOffset             -- Sp at which to start pushing
 326                  -> [(CgRep, CmmExpr)]          -- amodes of the components
 327                  -> FCode (VirtualSpOffset,     -- final Sp
 328                            CmmStmts)            -- assignments (regs+stack)
 329
 330 pushUnboxedTuple sp []
 331   = return (sp, noStmts)
 332 pushUnboxedTuple sp amodes
 333   = do  { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
 334
 335                 -- separate the rest of the args into pointers and non-pointers
 336                 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
 337                 reg_arg_assts = assignToRegs reg_arg_amodes
 338
 339             -- push ptrs, then nonptrs, on the stack
 340         ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
 341         ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
 342
 343         ; returnFC (final_sp,
 344                     reg_arg_assts `plusStmts`
 345                     ptr_assts `plusStmts` nptr_assts) }
 346
 347
 348 -- -----------------------------------------------------------------------------
 349 -- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
 350 -- we want to do things in a slightly different order to normal:
 351 --
 352 --              - push return address
 353 --              - adjust stack pointer
 354 --              - r = call(args...)
 355 --              - assign regs for unboxed tuple (usually just R1 = r)
 356 --              - return to continuation
 357 --
 358 -- The return address (i.e. stack frame) must be on the stack before
 359 -- doing the call in case the call ends up in the garbage collector.
 360 --
 361 -- Sadly, the information about the continuation is lost after we push it
 362 -- (in order to avoid pushing it again), so we end up doing a needless
 363 -- indirect jump (ToDo).
 364
 365 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
 366 ccallReturnUnboxedTuple amodes before_jump
 367   = do  { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
 368
 369         -- Push a return address if necessary
 370         ; pushReturnAddress eob
 371         ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
 372             (do { adjustSpAndHp args_sp
 373                 ; before_jump
 374                 ; returnUnboxedTuple amodes })
 375     }
 376
 377 -- -----------------------------------------------------------------------------
 378 -- Calling an out-of-line primop
 379
 380 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
 381 tailCallPrimOp op args
 382  = do   {       -- We're going to perform a normal-looking tail call,
 383                 -- except that *all* the arguments will be in registers.
 384                 -- Hence the ASSERT( null leftovers )
 385           arg_amodes <- getArgAmodes args
 386         ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
 387               jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
 388
 389         ; ASSERT(null leftovers) -- no stack-resident args
 390           emitSimultaneously (assignToRegs arg_regs)
 391
 392         ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
 393         ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
 394
 395 -- -----------------------------------------------------------------------------
 396 -- Return Addresses
 397
 398 -- We always push the return address just before performing a tail call
 399 -- or return.  The reason we leave it until then is because the stack
 400 -- slot that the return address is to go into might contain something
 401 -- useful.
 402 --
 403 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
 404 -- case expression and the return address is still to be pushed.
 405 --
 406 -- There are cases where it doesn't look necessary to push the return
 407 -- address: for example, just before doing a return to a known
 408 -- continuation.  However, the continuation will expect to find the
 409 -- return address on the stack in case it needs to do a heap check.
 410
 411 pushReturnAddress :: EndOfBlockInfo -> Code
 412
 413 pushReturnAddress (EndOfBlockInfo args_sp (CaseAlts lbl _ _))
 414   = do  { sp_rel <- getSpRelOffset args_sp
 415         ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
 416
 417 pushReturnAddress _ = nopC
 418
 419 -- -----------------------------------------------------------------------------
 420 -- Misc.
 421
 422 jumpToLbl :: CLabel -> Code
 423 -- Passes no argument to the destination procedure
 424 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
 425
 426 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
 427 assignToRegs reg_args
 428   = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
 429             | (expr, reg_id) <- reg_args ]
 430 \end{code}
 431
 432
 433 %************************************************************************
 434 %*                                                                      *
 435 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
 436 %*                                                                      *
 437 %************************************************************************
 438
 439 This function adjusts the stack and heap pointers just before a tail
 440 call or return.  The stack pointer is adjusted to its final position
 441 (i.e. to point to the last argument for a tail call, or the activation
 442 record for a return).  The heap pointer may be moved backwards, in
 443 cases where we overallocated at the beginning of the basic block (see
 444 CgCase.lhs for discussion).
 445
 446 These functions {\em do not} deal with high-water-mark adjustment.
 447 That's done by functions which allocate stack space.
 448
 449 \begin{code}
 450 adjustSpAndHp :: VirtualSpOffset        -- New offset for Arg stack ptr
 451               -> Code
 452 adjustSpAndHp newRealSp
 453   = do  { -- Adjust stack, if necessary.
 454           -- NB: the conditional on the monad-carried realSp
 455           --     is out of line (via codeOnly), to avoid a black hole
 456         ; new_sp <- getSpRelOffset newRealSp
 457         ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
 458         ; setRealSp newRealSp                   -- where realSp==newRealSp
 459
 460           -- Adjust heap.  The virtual heap pointer may be less than the real Hp
 461           -- because the latter was advanced to deal with the worst-case branch
 462           -- of the code, and we may be in a better-case branch.  In that case,
 463           -- move the real Hp *back* and retract some ticky allocation count.
 464         ; hp_usg <- getHpUsage
 465         ; let rHp = realHp hp_usg
 466               vHp = virtHp hp_usg
 467         ; new_hp <- getHpRelOffset vHp
 468         ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
 469         ; tickyAllocHeap (vHp - rHp)            -- ...ditto
 470         ; setRealHp vHp
 471         }
 472 \end{code}
 473