compiler/codeGen/CgTailCall.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5 % Code generation for tail calls.
   6
   7 \begin{code}
   8 {-# OPTIONS_GHC -w #-}
   9 -- The above warning supression flag is a temporary kludge.
  10 -- While working on this module you are encouraged to remove it and fix
  11 -- any warnings in the module. See
  12 --     http://hackage.haskell.org/trac/ghc/wiki/WorkingConventions#Warnings
  13 -- for details
  14
  15 module CgTailCall (
  16         cgTailCall, performTailCall,
  17         performReturn, performPrimReturn,
  18         returnUnboxedTuple, ccallReturnUnboxedTuple,
  19         pushUnboxedTuple,
  20         tailCallPrimOp,
  21
  22         pushReturnAddress
  23     ) where
  24
  25 #include "HsVersions.h"
  26
  27 import CgMonad
  28 import CgBindery
  29 import CgInfoTbls
  30 import CgCallConv
  31 import CgStackery
  32 import CgHeapery
  33 import CgUtils
  34 import CgTicky
  35 import ClosureInfo
  36 import SMRep
  37 import MachOp
  38 import Cmm
  39 import CmmUtils
  40 import CLabel
  41 import Type
  42 import Id
  43 import StgSyn
  44 import PrimOp
  45 import Outputable
  46
  47 import Control.Monad
  48
  49 -----------------------------------------------------------------------------
  50 -- Tail Calls
  51
  52 cgTailCall :: Id -> [StgArg] -> Code
  53
  54 -- Here's the code we generate for a tail call.  (NB there may be no
  55 -- arguments, in which case this boils down to just entering a variable.)
  56 --
  57 --    * Put args in the top locations of the stack.
  58 --    * Adjust the stack ptr
  59 --    * Make R1 point to the function closure if necessary.
  60 --    * Perform the call.
  61 --
  62 -- Things to be careful about:
  63 --
  64 --    * Don't overwrite stack locations before you have finished with
  65 --      them (remember you need the function and the as-yet-unmoved
  66 --      arguments).
  67 --    * Preferably, generate no code to replace x by x on the stack (a
  68 --      common situation in tail-recursion).
  69 --    * Adjust the stack high water mark appropriately.
  70 --
  71 -- Treat unboxed locals exactly like literals (above) except use the addr
  72 -- mode for the local instead of (CLit lit) in the assignment.
  73
  74 cgTailCall fun args
  75   = do  { fun_info <- getCgIdInfo fun
  76
  77         ; if isUnLiftedType (idType fun)
  78           then  -- Primitive return
  79                 ASSERT( null args )
  80             do  { fun_amode <- idInfoToAmode fun_info
  81                 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
  82
  83           else -- Normal case, fun is boxed
  84             do  { arg_amodes <- getArgAmodes args
  85                 ; performTailCall fun_info arg_amodes noStmts }
  86         }
  87
  88
  89 -- -----------------------------------------------------------------------------
  90 -- The guts of a tail-call
  91
  92 performTailCall
  93         :: CgIdInfo             -- The function
  94         -> [(CgRep,CmmExpr)]    -- Args
  95         -> CmmStmts             -- Pending simultaneous assignments
  96                                 --  *** GUARANTEED to contain only stack assignments.
  97         -> Code
  98
  99 performTailCall fun_info arg_amodes pending_assts
 100   | Just join_sp <- maybeLetNoEscape fun_info
 101   =        -- A let-no-escape is slightly different, because we
 102            -- arrange the stack arguments into pointers and non-pointers
 103            -- to make the heap check easier.  The tail-call sequence
 104            -- is very similar to returning an unboxed tuple, so we
 105            -- share some code.
 106      do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
 107         ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
 108         ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
 109         ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
 110
 111   | otherwise
 112   = do  { fun_amode <- idInfoToAmode fun_info
 113         ; let assignSt  = CmmAssign nodeReg fun_amode
 114               node_asst = oneStmt assignSt
 115               opt_node_asst | nodeMustPointToIt lf_info = node_asst
 116                             | otherwise                 = noStmts
 117         ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
 118         ; this_pkg <- getThisPackage
 119
 120         ; case (getCallMethod fun_name lf_info (length arg_amodes)) of
 121
 122             -- Node must always point to things we enter
 123             EnterIt -> do
 124                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 125                 ; let target     = entryCode (closureInfoPtr (CmmReg nodeReg))
 126                       enterClosure = stmtC (CmmJump target [])
 127                       -- If this is a scrutinee
 128                       -- let's check if the closure is a constructor
 129                       -- so we can directly jump to the alternatives switch
 130                       -- statement.
 131                       jumpInstr = getEndOfBlockInfo >>=
 132                                   maybeSwitchOnCons enterClosure
 133                 ; doFinalJump sp False jumpInstr }
 134
 135             -- A function, but we have zero arguments.  It is already in WHNF,
 136             -- so we can just return it.
 137             -- As with any return, Node must point to it.
 138             ReturnIt -> do
 139                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 140                 ; doFinalJump sp False emitReturnInstr }
 141
 142             -- A real constructor.  Don't bother entering it,
 143             -- just do the right sort of return instead.
 144             -- As with any return, Node must point to it.
 145             ReturnCon con -> do
 146                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 147                 ; doFinalJump sp False emitReturnInstr }
 148
 149             JumpToIt lbl -> do
 150                 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
 151                 ; doFinalJump sp False (jumpToLbl lbl) }
 152
 153             -- A slow function call via the RTS apply routines
 154             -- Node must definitely point to the thing
 155             SlowCall -> do
 156                 {  when (not (null arg_amodes)) $ do
 157                    { if (isKnownFun lf_info)
 158                         then tickyKnownCallTooFewArgs
 159                         else tickyUnknownCall
 160                    ; tickySlowCallPat (map fst arg_amodes)
 161                    }
 162
 163                 ; let (apply_lbl, args, extra_args)
 164                         = constructSlowCall arg_amodes
 165
 166                 ; directCall sp apply_lbl args extra_args
 167                         (node_asst `plusStmts` pending_assts)
 168
 169                 }
 170
 171             -- A direct function call (possibly with some left-over arguments)
 172             DirectEntry lbl arity -> do
 173                 { if arity == length arg_amodes
 174                         then tickyKnownCallExact
 175                         else do tickyKnownCallExtraArgs
 176                                 tickySlowCallPat (map fst (drop arity arg_amodes))
 177
 178                 ; let
 179                      -- The args beyond the arity go straight on the stack
 180                      (arity_args, extra_args) = splitAt arity arg_amodes
 181
 182                 ; directCall sp lbl arity_args extra_args
 183                         (opt_node_asst `plusStmts` pending_assts)
 184                 }
 185         }
 186   where
 187     fun_name  = idName (cgIdInfoId fun_info)
 188     lf_info   = cgIdInfoLF fun_info
 189     untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg))
 190     -- Test if closure is a constructor
 191     maybeSwitchOnCons enterClosure eob
 192               | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob
 193               = do { is_constr <- newLabelC
 194                    -- Is the pointer tagged?
 195                    -- Yes, jump to switch statement
 196                    ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg))
 197                                 is_constr)
 198                    -- No, enter the closure.
 199                    ; enterClosure
 200                    ; labelC is_constr
 201                    ; stmtC (CmmJump (entryCode $ CmmLit (CmmLabel lbl)) [])
 202                    }
 203 {-
 204               -- This is a scrutinee for a case expression
 205               -- so let's see if we can directly inspect the closure
 206               | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob
 207               = do { no_cons <- newLabelC
 208                    -- Both the NCG and gcc optimize away the temp
 209                    ; z <- newTemp  wordRep
 210                    ; stmtC (CmmAssign z tag_expr)
 211                    ; let tag = CmmReg z
 212                    -- Is the closure a cons?
 213                    ; stmtC (CmmCondBranch (cond1 tag) no_cons)
 214                    ; stmtC (CmmCondBranch (cond2 tag) no_cons)
 215                    -- Yes, jump to switch statement
 216                    ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) [])
 217                    ; labelC no_cons
 218                    -- No, enter the closure.
 219                    ; enterClosure
 220                    }
 221 -}
 222               -- No case expression involved, enter the closure.
 223               | otherwise
 224               = do { stmtC untag_node
 225                    ; enterClosure
 226                    }
 227         where
 228           --cond1 tag  = cmmULtWord tag lowCons
 229           -- More efficient than the above?
 230           tag_expr   = cmmGetClosureType (CmmReg nodeReg)
 231           cond1 tag  = cmmEqWord tag (CmmLit (mkIntCLit 0))
 232           cond2 tag  = cmmUGtWord tag highCons
 233           lowCons    = CmmLit (mkIntCLit 1)
 234             -- CONSTR
 235           highCons   = CmmLit (mkIntCLit 8)
 236             -- CONSTR_NOCAF_STATIC (from ClosureType.h)
 237
 238
 239 untagCmmAssign (CmmAssign r cmmExpr) = CmmAssign r (cmmUntag cmmExpr)
 240 untagCmmAssign stmt                  = stmt
 241
 242 directCall sp lbl args extra_args assts = do
 243   let
 244         -- First chunk of args go in registers
 245         (reg_arg_amodes, stk_args) = assignCallRegs args
 246
 247         -- Any "extra" arguments are placed in frames on the
 248         -- stack after the other arguments.
 249         slow_stk_args = slowArgs extra_args
 250
 251         reg_assts = assignToRegs reg_arg_amodes
 252   --
 253   (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
 254
 255   emitSimultaneously (reg_assts     `plusStmts`
 256                       stk_assts     `plusStmts`
 257                       assts)
 258
 259   doFinalJump final_sp False (jumpToLbl lbl)
 260
 261 -- -----------------------------------------------------------------------------
 262 -- The final clean-up before we do a jump at the end of a basic block.
 263 -- This code is shared by tail-calls and returns.
 264
 265 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
 266 doFinalJump final_sp is_let_no_escape jump_code
 267   = do  { -- Adjust the high-water mark if necessary
 268           adjustStackHW final_sp
 269
 270         -- Push a return address if necessary (after the assignments
 271         -- above, in case we clobber a live stack location)
 272         --
 273         -- DONT push the return address when we're about to jump to a
 274         -- let-no-escape: the final tail call in the let-no-escape
 275         -- will do this.
 276         ; eob <- getEndOfBlockInfo
 277         ; whenC (not is_let_no_escape) (pushReturnAddress eob)
 278
 279             -- Final adjustment of Sp/Hp
 280         ; adjustSpAndHp final_sp
 281
 282             -- and do the jump
 283         ; jump_code }
 284
 285 -- ----------------------------------------------------------------------------
 286 -- A general return (just a special case of doFinalJump, above)
 287
 288 performReturn :: Code   -- The code to execute to actually do the return
 289               -> Code
 290
 291 performReturn finish_code
 292   = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
 293         ; doFinalJump args_sp False{-not a LNE-} finish_code }
 294
 295 -- ----------------------------------------------------------------------------
 296 -- Primitive Returns
 297 -- Just load the return value into the right register, and return.
 298
 299 performPrimReturn :: CgRep -> CmmExpr   -- The thing to return
 300                   -> Code
 301 performPrimReturn rep amode
 302   =  do { whenC (not (isVoidArg rep))
 303                 (stmtC (CmmAssign ret_reg amode))
 304         ; performReturn emitReturnInstr }
 305   where
 306     ret_reg = dataReturnConvPrim rep
 307
 308 -- ---------------------------------------------------------------------------
 309 -- Unboxed tuple returns
 310
 311 -- These are a bit like a normal tail call, except that:
 312 --
 313 --   - The tail-call target is an info table on the stack
 314 --
 315 --   - We separate stack arguments into pointers and non-pointers,
 316 --     to make it easier to leave things in a sane state for a heap check.
 317 --     This is OK because we can never partially-apply an unboxed tuple,
 318 --     unlike a function.  The same technique is used when calling
 319 --     let-no-escape functions, because they also can't be partially
 320 --     applied.
 321
 322 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
 323 returnUnboxedTuple amodes
 324   = do  { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
 325         ; tickyUnboxedTupleReturn (length amodes)
 326         ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
 327         ; emitSimultaneously assts
 328         ; doFinalJump final_sp False{-not a LNE-} emitReturnInstr }
 329
 330 pushUnboxedTuple :: VirtualSpOffset             -- Sp at which to start pushing
 331                  -> [(CgRep, CmmExpr)]          -- amodes of the components
 332                  -> FCode (VirtualSpOffset,     -- final Sp
 333                            CmmStmts)            -- assignments (regs+stack)
 334
 335 pushUnboxedTuple sp []
 336   = return (sp, noStmts)
 337 pushUnboxedTuple sp amodes
 338   = do  { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
 339
 340                 -- separate the rest of the args into pointers and non-pointers
 341                 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
 342                 reg_arg_assts = assignToRegs reg_arg_amodes
 343
 344             -- push ptrs, then nonptrs, on the stack
 345         ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
 346         ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
 347
 348         ; returnFC (final_sp,
 349                     reg_arg_assts `plusStmts`
 350                     ptr_assts `plusStmts` nptr_assts) }
 351
 352
 353 -- -----------------------------------------------------------------------------
 354 -- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
 355 -- we want to do things in a slightly different order to normal:
 356 --
 357 --              - push return address
 358 --              - adjust stack pointer
 359 --              - r = call(args...)
 360 --              - assign regs for unboxed tuple (usually just R1 = r)
 361 --              - return to continuation
 362 --
 363 -- The return address (i.e. stack frame) must be on the stack before
 364 -- doing the call in case the call ends up in the garbage collector.
 365 --
 366 -- Sadly, the information about the continuation is lost after we push it
 367 -- (in order to avoid pushing it again), so we end up doing a needless
 368 -- indirect jump (ToDo).
 369
 370 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
 371 ccallReturnUnboxedTuple amodes before_jump
 372   = do  { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
 373
 374         -- Push a return address if necessary
 375         ; pushReturnAddress eob
 376         ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
 377             (do { adjustSpAndHp args_sp
 378                 ; before_jump
 379                 ; returnUnboxedTuple amodes })
 380     }
 381
 382 -- -----------------------------------------------------------------------------
 383 -- Calling an out-of-line primop
 384
 385 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
 386 tailCallPrimOp op args
 387  = do   {       -- We're going to perform a normal-looking tail call,
 388                 -- except that *all* the arguments will be in registers.
 389                 -- Hence the ASSERT( null leftovers )
 390           arg_amodes <- getArgAmodes args
 391         ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
 392               jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
 393
 394         ; ASSERT(null leftovers) -- no stack-resident args
 395           emitSimultaneously (assignToRegs arg_regs)
 396
 397         ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
 398         ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
 399
 400 -- -----------------------------------------------------------------------------
 401 -- Return Addresses
 402
 403 -- We always push the return address just before performing a tail call
 404 -- or return.  The reason we leave it until then is because the stack
 405 -- slot that the return address is to go into might contain something
 406 -- useful.
 407 --
 408 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
 409 -- case expression and the return address is still to be pushed.
 410 --
 411 -- There are cases where it doesn't look necessary to push the return
 412 -- address: for example, just before doing a return to a known
 413 -- continuation.  However, the continuation will expect to find the
 414 -- return address on the stack in case it needs to do a heap check.
 415
 416 pushReturnAddress :: EndOfBlockInfo -> Code
 417
 418 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _))
 419   = do  { sp_rel <- getSpRelOffset args_sp
 420         ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
 421
 422 pushReturnAddress _ = nopC
 423
 424 -- -----------------------------------------------------------------------------
 425 -- Misc.
 426
 427 jumpToLbl :: CLabel -> Code
 428 -- Passes no argument to the destination procedure
 429 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
 430
 431 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
 432 assignToRegs reg_args
 433   = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
 434             | (expr, reg_id) <- reg_args ]
 435 \end{code}
 436
 437
 438 %************************************************************************
 439 %*                                                                      *
 440 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
 441 %*                                                                      *
 442 %************************************************************************
 443
 444 This function adjusts the stack and heap pointers just before a tail
 445 call or return.  The stack pointer is adjusted to its final position
 446 (i.e. to point to the last argument for a tail call, or the activation
 447 record for a return).  The heap pointer may be moved backwards, in
 448 cases where we overallocated at the beginning of the basic block (see
 449 CgCase.lhs for discussion).
 450
 451 These functions {\em do not} deal with high-water-mark adjustment.
 452 That's done by functions which allocate stack space.
 453
 454 \begin{code}
 455 adjustSpAndHp :: VirtualSpOffset        -- New offset for Arg stack ptr
 456               -> Code
 457 adjustSpAndHp newRealSp
 458   = do  { -- Adjust stack, if necessary.
 459           -- NB: the conditional on the monad-carried realSp
 460           --     is out of line (via codeOnly), to avoid a black hole
 461         ; new_sp <- getSpRelOffset newRealSp
 462         ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
 463         ; setRealSp newRealSp                   -- where realSp==newRealSp
 464
 465           -- Adjust heap.  The virtual heap pointer may be less than the real Hp
 466           -- because the latter was advanced to deal with the worst-case branch
 467           -- of the code, and we may be in a better-case branch.  In that case,
 468           -- move the real Hp *back* and retract some ticky allocation count.
 469         ; hp_usg <- getHpUsage
 470         ; let rHp = realHp hp_usg
 471               vHp = virtHp hp_usg
 472         ; new_hp <- getHpRelOffset vHp
 473         ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
 474         ; tickyAllocHeap (vHp - rHp)            -- ...ditto
 475         ; setRealHp vHp
 476         }
 477 \end{code}