compiler/codeGen/CgTailCall.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5 % Code generation for tail calls.
   6
   7 \begin{code}
   8 module CgTailCall (
   9         cgTailCall, performTailCall,
  10         performReturn, performPrimReturn,
  11         returnUnboxedTuple, ccallReturnUnboxedTuple,
  12         pushUnboxedTuple,
  13         tailCallPrimOp,
  14
  15         pushReturnAddress
  16     ) where
  17
  18 #include "HsVersions.h"
  19
  20 import CgMonad
  21 import CgBindery
  22 import CgInfoTbls
  23 import CgCallConv
  24 import CgStackery
  25 import CgHeapery
  26 import CgUtils
  27 import CgTicky
  28 import ClosureInfo
  29 import SMRep
  30 import Cmm
  31 import CmmUtils
  32 import CLabel
  33 import Type
  34 import Id
  35 import StgSyn
  36 import PrimOp
  37 import Outputable
  38 import StaticFlags
  39
  40 import Control.Monad
  41
  42 -----------------------------------------------------------------------------
  43 -- Tail Calls
  44
  45 cgTailCall :: Id -> [StgArg] -> Code
  46
  47 -- Here's the code we generate for a tail call.  (NB there may be no
  48 -- arguments, in which case this boils down to just entering a variable.)
  49 --
  50 --    * Put args in the top locations of the stack.
  51 --    * Adjust the stack ptr
  52 --    * Make R1 point to the function closure if necessary.
  53 --    * Perform the call.
  54 --
  55 -- Things to be careful about:
  56 --
  57 --    * Don't overwrite stack locations before you have finished with
  58 --      them (remember you need the function and the as-yet-unmoved
  59 --      arguments).
  60 --    * Preferably, generate no code to replace x by x on the stack (a
  61 --      common situation in tail-recursion).
  62 --    * Adjust the stack high water mark appropriately.
  63 --
  64 -- Treat unboxed locals exactly like literals (above) except use the addr
  65 -- mode for the local instead of (CLit lit) in the assignment.
  66
  67 cgTailCall fun args
  68   = do  { fun_info <- getCgIdInfo fun
  69
  70         ; if isUnLiftedType (idType fun)
  71           then  -- Primitive return
  72                 ASSERT( null args )
  73             do  { fun_amode <- idInfoToAmode fun_info
  74                 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
  75
  76           else -- Normal case, fun is boxed
  77             do  { arg_amodes <- getArgAmodes args
  78                 ; performTailCall fun_info arg_amodes noStmts }
  79         }
  80
  81
  82 -- -----------------------------------------------------------------------------
  83 -- The guts of a tail-call
  84
  85 performTailCall
  86         :: CgIdInfo             -- The function
  87         -> [(CgRep,CmmExpr)]    -- Args
  88         -> CmmStmts             -- Pending simultaneous assignments
  89                                 --  *** GUARANTEED to contain only stack assignments.
  90         -> Code
  91
  92 performTailCall fun_info arg_amodes pending_assts
  93   | Just join_sp <- maybeLetNoEscape fun_info
  94   =        -- A let-no-escape is slightly different, because we
  95            -- arrange the stack arguments into pointers and non-pointers
  96            -- to make the heap check easier.  The tail-call sequence
  97            -- is very similar to returning an unboxed tuple, so we
  98            -- share some code.
  99      do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
 100         ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
 101         ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
 102         ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
 103
 104   | otherwise
 105   = do  { fun_amode <- idInfoToAmode fun_info
 106         ; let assignSt  = CmmAssign nodeReg fun_amode
 107               node_asst = oneStmt assignSt
 108               opt_node_asst | nodeMustPointToIt lf_info = node_asst
 109                             | otherwise                 = noStmts
 110         ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
 111
 112         ; dflags <- getDynFlags
 113         ; case (getCallMethod dflags fun_name fun_has_cafs lf_info (length arg_amodes)) of
 114
 115             -- Node must always point to things we enter
 116             EnterIt -> do
 117                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 118                 ; let target     = entryCode (closureInfoPtr (CmmReg nodeReg))
 119                       enterClosure = stmtC (CmmJump target [])
 120                       -- If this is a scrutinee
 121                       -- let's check if the closure is a constructor
 122                       -- so we can directly jump to the alternatives switch
 123                       -- statement.
 124                       jumpInstr = getEndOfBlockInfo >>=
 125                                   maybeSwitchOnCons enterClosure
 126                 ; doFinalJump sp False jumpInstr }
 127
 128             -- A function, but we have zero arguments.  It is already in WHNF,
 129             -- so we can just return it.
 130             -- As with any return, Node must point to it.
 131             ReturnIt -> do
 132                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 133                 ; doFinalJump sp False emitReturnInstr }
 134
 135             -- A real constructor.  Don't bother entering it,
 136             -- just do the right sort of return instead.
 137             -- As with any return, Node must point to it.
 138             ReturnCon _ -> do
 139                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 140                 ; doFinalJump sp False emitReturnInstr }
 141
 142             JumpToIt lbl -> do
 143                 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
 144                 ; doFinalJump sp False (jumpToLbl lbl) }
 145
 146             -- A slow function call via the RTS apply routines
 147             -- Node must definitely point to the thing
 148             SlowCall -> do
 149                 {  when (not (null arg_amodes)) $ do
 150                    { if (isKnownFun lf_info)
 151                         then tickyKnownCallTooFewArgs
 152                         else tickyUnknownCall
 153                    ; tickySlowCallPat (map fst arg_amodes)
 154                    }
 155
 156                 ; let (apply_lbl, args, extra_args)
 157                         = constructSlowCall arg_amodes
 158
 159                 ; directCall sp apply_lbl args extra_args
 160                         (node_asst `plusStmts` pending_assts)
 161
 162                 }
 163
 164             -- A direct function call (possibly with some left-over arguments)
 165             DirectEntry lbl arity -> do
 166                 { if arity == length arg_amodes
 167                         then tickyKnownCallExact
 168                         else do tickyKnownCallExtraArgs
 169                                 tickySlowCallPat (map fst (drop arity arg_amodes))
 170
 171                 ; let
 172                      -- The args beyond the arity go straight on the stack
 173                      (arity_args, extra_args) = splitAt arity arg_amodes
 174
 175                 ; directCall sp lbl arity_args extra_args
 176                         (opt_node_asst `plusStmts` pending_assts)
 177                 }
 178         }
 179   where
 180     fun_id    = cgIdInfoId fun_info
 181     fun_name  = idName fun_id
 182     lf_info   = cgIdInfoLF fun_info
 183     fun_has_cafs = idCafInfo fun_id
 184     untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg))
 185     -- Test if closure is a constructor
 186     maybeSwitchOnCons enterClosure eob
 187               | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob,
 188                 not opt_SccProfilingOn
 189                 -- we can't shortcut when profiling is on, because we have
 190                 -- to enter a closure to mark it as "used" for LDV profiling
 191               = do { is_constr <- newLabelC
 192                    -- Is the pointer tagged?
 193                    -- Yes, jump to switch statement
 194                    ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg))
 195                                 is_constr)
 196                    -- No, enter the closure.
 197                    ; enterClosure
 198                    ; labelC is_constr
 199                    ; stmtC (CmmJump (entryCode $ CmmLit (CmmLabel lbl)) [])
 200                    }
 201 {-
 202               -- This is a scrutinee for a case expression
 203               -- so let's see if we can directly inspect the closure
 204               | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob
 205               = do { no_cons <- newLabelC
 206                    -- Both the NCG and gcc optimize away the temp
 207                    ; z <- newTemp  wordRep
 208                    ; stmtC (CmmAssign z tag_expr)
 209                    ; let tag = CmmReg z
 210                    -- Is the closure a cons?
 211                    ; stmtC (CmmCondBranch (cond1 tag) no_cons)
 212                    ; stmtC (CmmCondBranch (cond2 tag) no_cons)
 213                    -- Yes, jump to switch statement
 214                    ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) [])
 215                    ; labelC no_cons
 216                    -- No, enter the closure.
 217                    ; enterClosure
 218                    }
 219 -}
 220               -- No case expression involved, enter the closure.
 221               | otherwise
 222               = do { stmtC untag_node
 223                    ; enterClosure
 224                    }
 225         where
 226           --cond1 tag  = cmmULtWord tag lowCons
 227           -- More efficient than the above?
 228 {-
 229           tag_expr   = cmmGetClosureType (CmmReg nodeReg)
 230           cond1 tag  = cmmEqWord tag (CmmLit (mkIntCLit 0))
 231           cond2 tag  = cmmUGtWord tag highCons
 232           lowCons    = CmmLit (mkIntCLit 1)
 233             -- CONSTR
 234           highCons   = CmmLit (mkIntCLit 8)
 235             -- CONSTR_NOCAF_STATIC (from ClosureType.h)
 236 -}
 237
 238 directCall :: VirtualSpOffset -> CLabel -> [(CgRep, CmmExpr)]
 239            -> [(CgRep, CmmExpr)] -> CmmStmts
 240            -> Code
 241 directCall sp lbl args extra_args assts = do
 242   let
 243         -- First chunk of args go in registers
 244         (reg_arg_amodes, stk_args) = assignCallRegs args
 245
 246         -- Any "extra" arguments are placed in frames on the
 247         -- stack after the other arguments.
 248         slow_stk_args = slowArgs extra_args
 249
 250         reg_assts = assignToRegs reg_arg_amodes
 251   --
 252   (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
 253
 254   emitSimultaneously (reg_assts     `plusStmts`
 255                       stk_assts     `plusStmts`
 256                       assts)
 257
 258   doFinalJump final_sp False (jumpToLbl lbl)
 259
 260 -- -----------------------------------------------------------------------------
 261 -- The final clean-up before we do a jump at the end of a basic block.
 262 -- This code is shared by tail-calls and returns.
 263
 264 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
 265 doFinalJump final_sp is_let_no_escape jump_code
 266   = do  { -- Adjust the high-water mark if necessary
 267           adjustStackHW final_sp
 268
 269         -- Push a return address if necessary (after the assignments
 270         -- above, in case we clobber a live stack location)
 271         --
 272         -- DONT push the return address when we're about to jump to a
 273         -- let-no-escape: the final tail call in the let-no-escape
 274         -- will do this.
 275         ; eob <- getEndOfBlockInfo
 276         ; whenC (not is_let_no_escape) (pushReturnAddress eob)
 277
 278             -- Final adjustment of Sp/Hp
 279         ; adjustSpAndHp final_sp
 280
 281             -- and do the jump
 282         ; jump_code }
 283
 284 -- ----------------------------------------------------------------------------
 285 -- A general return (just a special case of doFinalJump, above)
 286
 287 performReturn :: Code   -- The code to execute to actually do the return
 288               -> Code
 289
 290 performReturn finish_code
 291   = do  { EndOfBlockInfo args_sp _sequel <- getEndOfBlockInfo
 292         ; doFinalJump args_sp False{-not a LNE-} finish_code }
 293
 294 -- ----------------------------------------------------------------------------
 295 -- Primitive Returns
 296 -- Just load the return value into the right register, and return.
 297
 298 performPrimReturn :: CgRep -> CmmExpr   -- The thing to return
 299                   -> Code
 300 performPrimReturn rep amode
 301   =  do { whenC (not (isVoidArg rep))
 302                 (stmtC (CmmAssign ret_reg amode))
 303         ; performReturn emitReturnInstr }
 304   where
 305     ret_reg = dataReturnConvPrim rep
 306
 307 -- ---------------------------------------------------------------------------
 308 -- Unboxed tuple returns
 309
 310 -- These are a bit like a normal tail call, except that:
 311 --
 312 --   - The tail-call target is an info table on the stack
 313 --
 314 --   - We separate stack arguments into pointers and non-pointers,
 315 --     to make it easier to leave things in a sane state for a heap check.
 316 --     This is OK because we can never partially-apply an unboxed tuple,
 317 --     unlike a function.  The same technique is used when calling
 318 --     let-no-escape functions, because they also can't be partially
 319 --     applied.
 320
 321 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
 322 returnUnboxedTuple amodes
 323   = do  { (EndOfBlockInfo args_sp _sequel) <- getEndOfBlockInfo
 324         ; tickyUnboxedTupleReturn (length amodes)
 325         ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
 326         ; emitSimultaneously assts
 327         ; doFinalJump final_sp False{-not a LNE-} emitReturnInstr }
 328
 329 pushUnboxedTuple :: VirtualSpOffset             -- Sp at which to start pushing
 330                  -> [(CgRep, CmmExpr)]          -- amodes of the components
 331                  -> FCode (VirtualSpOffset,     -- final Sp
 332                            CmmStmts)            -- assignments (regs+stack)
 333
 334 pushUnboxedTuple sp []
 335   = return (sp, noStmts)
 336 pushUnboxedTuple sp amodes
 337   = do  { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
 338
 339                 -- separate the rest of the args into pointers and non-pointers
 340                 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
 341                 reg_arg_assts = assignToRegs reg_arg_amodes
 342
 343             -- push ptrs, then nonptrs, on the stack
 344         ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
 345         ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
 346
 347         ; returnFC (final_sp,
 348                     reg_arg_assts `plusStmts`
 349                     ptr_assts `plusStmts` nptr_assts) }
 350
 351
 352 -- -----------------------------------------------------------------------------
 353 -- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
 354 -- we want to do things in a slightly different order to normal:
 355 --
 356 --              - push return address
 357 --              - adjust stack pointer
 358 --              - r = call(args...)
 359 --              - assign regs for unboxed tuple (usually just R1 = r)
 360 --              - return to continuation
 361 --
 362 -- The return address (i.e. stack frame) must be on the stack before
 363 -- doing the call in case the call ends up in the garbage collector.
 364 --
 365 -- Sadly, the information about the continuation is lost after we push it
 366 -- (in order to avoid pushing it again), so we end up doing a needless
 367 -- indirect jump (ToDo).
 368
 369 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
 370 ccallReturnUnboxedTuple amodes before_jump
 371   = do  { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
 372
 373         -- Push a return address if necessary
 374         ; pushReturnAddress eob
 375         ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
 376             (do { adjustSpAndHp args_sp
 377                 ; before_jump
 378                 ; returnUnboxedTuple amodes })
 379     }
 380
 381 -- -----------------------------------------------------------------------------
 382 -- Calling an out-of-line primop
 383
 384 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
 385 tailCallPrimOp op args
 386  = do   {       -- We're going to perform a normal-looking tail call,
 387                 -- except that *all* the arguments will be in registers.
 388                 -- Hence the ASSERT( null leftovers )
 389           arg_amodes <- getArgAmodes args
 390         ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
 391               jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
 392
 393         ; ASSERT(null leftovers) -- no stack-resident args
 394           emitSimultaneously (assignToRegs arg_regs)
 395
 396         ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
 397         ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
 398
 399 -- -----------------------------------------------------------------------------
 400 -- Return Addresses
 401
 402 -- We always push the return address just before performing a tail call
 403 -- or return.  The reason we leave it until then is because the stack
 404 -- slot that the return address is to go into might contain something
 405 -- useful.
 406 --
 407 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
 408 -- case expression and the return address is still to be pushed.
 409 --
 410 -- There are cases where it doesn't look necessary to push the return
 411 -- address: for example, just before doing a return to a known
 412 -- continuation.  However, the continuation will expect to find the
 413 -- return address on the stack in case it needs to do a heap check.
 414
 415 pushReturnAddress :: EndOfBlockInfo -> Code
 416
 417 pushReturnAddress (EndOfBlockInfo args_sp (CaseAlts lbl _ _))
 418   = do  { sp_rel <- getSpRelOffset args_sp
 419         ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
 420
 421 pushReturnAddress _ = nopC
 422
 423 -- -----------------------------------------------------------------------------
 424 -- Misc.
 425
 426 jumpToLbl :: CLabel -> Code
 427 -- Passes no argument to the destination procedure
 428 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
 429
 430 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
 431 assignToRegs reg_args
 432   = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
 433             | (expr, reg_id) <- reg_args ]
 434 \end{code}
 435
 436
 437 %************************************************************************
 438 %*                                                                      *
 439 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
 440 %*                                                                      *
 441 %************************************************************************
 442
 443 This function adjusts the stack and heap pointers just before a tail
 444 call or return.  The stack pointer is adjusted to its final position
 445 (i.e. to point to the last argument for a tail call, or the activation
 446 record for a return).  The heap pointer may be moved backwards, in
 447 cases where we overallocated at the beginning of the basic block (see
 448 CgCase.lhs for discussion).
 449
 450 These functions {\em do not} deal with high-water-mark adjustment.
 451 That's done by functions which allocate stack space.
 452
 453 \begin{code}
 454 adjustSpAndHp :: VirtualSpOffset        -- New offset for Arg stack ptr
 455               -> Code
 456 adjustSpAndHp newRealSp
 457   = do  { -- Adjust stack, if necessary.
 458           -- NB: the conditional on the monad-carried realSp
 459           --     is out of line (via codeOnly), to avoid a black hole
 460         ; new_sp <- getSpRelOffset newRealSp
 461         ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
 462         ; setRealSp newRealSp                   -- where realSp==newRealSp
 463
 464           -- Adjust heap.  The virtual heap pointer may be less than the real Hp
 465           -- because the latter was advanced to deal with the worst-case branch
 466           -- of the code, and we may be in a better-case branch.  In that case,
 467           -- move the real Hp *back* and retract some ticky allocation count.
 468         ; hp_usg <- getHpUsage
 469         ; let rHp = realHp hp_usg
 470               vHp = virtHp hp_usg
 471         ; new_hp <- getHpRelOffset vHp
 472         ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
 473         ; tickyAllocHeap (vHp - rHp)            -- ...ditto
 474         ; setRealHp vHp
 475         }
 476 \end{code}
 477