compiler/codeGen/CgTailCall.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5 % Code generation for tail calls.
   6
   7 \begin{code}
   8 {-# OPTIONS -w #-}
   9 -- The above warning supression flag is a temporary kludge.
  10 -- While working on this module you are encouraged to remove it and fix
  11 -- any warnings in the module. See
  12 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
  13 -- for details
  14
  15 module CgTailCall (
  16         cgTailCall, performTailCall,
  17         performReturn, performPrimReturn,
  18         returnUnboxedTuple, ccallReturnUnboxedTuple,
  19         pushUnboxedTuple,
  20         tailCallPrimOp,
  21
  22         pushReturnAddress
  23     ) where
  24
  25 #include "HsVersions.h"
  26
  27 import CgMonad
  28 import CgBindery
  29 import CgInfoTbls
  30 import CgCallConv
  31 import CgStackery
  32 import CgHeapery
  33 import CgUtils
  34 import CgTicky
  35 import ClosureInfo
  36 import SMRep
  37 import Cmm
  38 import CmmUtils
  39 import CLabel
  40 import Type
  41 import Id
  42 import StgSyn
  43 import PrimOp
  44 import FastString
  45 import Outputable
  46
  47 import Control.Monad
  48
  49 -----------------------------------------------------------------------------
  50 -- Tail Calls
  51
  52 cgTailCall :: Id -> [StgArg] -> Code
  53
  54 -- Here's the code we generate for a tail call.  (NB there may be no
  55 -- arguments, in which case this boils down to just entering a variable.)
  56 --
  57 --    * Put args in the top locations of the stack.
  58 --    * Adjust the stack ptr
  59 --    * Make R1 point to the function closure if necessary.
  60 --    * Perform the call.
  61 --
  62 -- Things to be careful about:
  63 --
  64 --    * Don't overwrite stack locations before you have finished with
  65 --      them (remember you need the function and the as-yet-unmoved
  66 --      arguments).
  67 --    * Preferably, generate no code to replace x by x on the stack (a
  68 --      common situation in tail-recursion).
  69 --    * Adjust the stack high water mark appropriately.
  70 --
  71 -- Treat unboxed locals exactly like literals (above) except use the addr
  72 -- mode for the local instead of (CLit lit) in the assignment.
  73
  74 cgTailCall fun args
  75   = do  { fun_info <- getCgIdInfo fun
  76
  77         ; if isUnLiftedType (idType fun)
  78           then  -- Primitive return
  79                 ASSERT( null args )
  80             do  { fun_amode <- idInfoToAmode fun_info
  81                 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
  82
  83           else -- Normal case, fun is boxed
  84             do  { arg_amodes <- getArgAmodes args
  85                 ; performTailCall fun_info arg_amodes noStmts }
  86         }
  87
  88
  89 -- -----------------------------------------------------------------------------
  90 -- The guts of a tail-call
  91
  92 performTailCall
  93         :: CgIdInfo             -- The function
  94         -> [(CgRep,CmmExpr)]    -- Args
  95         -> CmmStmts             -- Pending simultaneous assignments
  96                                 --  *** GUARANTEED to contain only stack assignments.
  97         -> Code
  98
  99 performTailCall fun_info arg_amodes pending_assts
 100   | Just join_sp <- maybeLetNoEscape fun_info
 101   =        -- A let-no-escape is slightly different, because we
 102            -- arrange the stack arguments into pointers and non-pointers
 103            -- to make the heap check easier.  The tail-call sequence
 104            -- is very similar to returning an unboxed tuple, so we
 105            -- share some code.
 106      do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
 107         ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
 108         ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
 109         ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
 110
 111   | otherwise
 112   = do  { fun_amode <- idInfoToAmode fun_info
 113         ; let assignSt  = CmmAssign nodeReg fun_amode
 114               node_asst = oneStmt assignSt
 115               opt_node_asst | nodeMustPointToIt lf_info = node_asst
 116                             | otherwise                 = noStmts
 117         ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
 118         ; this_pkg <- getThisPackage
 119
 120         ; case (getCallMethod fun_name fun_has_cafs lf_info (length arg_amodes)) of
 121
 122             -- Node must always point to things we enter
 123             EnterIt -> do
 124                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 125                 ; let target     = entryCode (closureInfoPtr (CmmReg nodeReg))
 126                       enterClosure = stmtC (CmmJump target [])
 127                       -- If this is a scrutinee
 128                       -- let's check if the closure is a constructor
 129                       -- so we can directly jump to the alternatives switch
 130                       -- statement.
 131                       jumpInstr = getEndOfBlockInfo >>=
 132                                   maybeSwitchOnCons enterClosure
 133                 ; doFinalJump sp False jumpInstr }
 134
 135             -- A function, but we have zero arguments.  It is already in WHNF,
 136             -- so we can just return it.
 137             -- As with any return, Node must point to it.
 138             ReturnIt -> do
 139                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 140                 ; doFinalJump sp False emitReturnInstr }
 141
 142             -- A real constructor.  Don't bother entering it,
 143             -- just do the right sort of return instead.
 144             -- As with any return, Node must point to it.
 145             ReturnCon con -> do
 146                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 147                 ; doFinalJump sp False emitReturnInstr }
 148
 149             JumpToIt lbl -> do
 150                 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
 151                 ; doFinalJump sp False (jumpToLbl lbl) }
 152
 153             -- A slow function call via the RTS apply routines
 154             -- Node must definitely point to the thing
 155             SlowCall -> do
 156                 {  when (not (null arg_amodes)) $ do
 157                    { if (isKnownFun lf_info)
 158                         then tickyKnownCallTooFewArgs
 159                         else tickyUnknownCall
 160                    ; tickySlowCallPat (map fst arg_amodes)
 161                    }
 162
 163                 ; let (apply_lbl, args, extra_args)
 164                         = constructSlowCall arg_amodes
 165
 166                 ; directCall sp apply_lbl args extra_args
 167                         (node_asst `plusStmts` pending_assts)
 168
 169                 }
 170
 171             -- A direct function call (possibly with some left-over arguments)
 172             DirectEntry lbl arity -> do
 173                 { if arity == length arg_amodes
 174                         then tickyKnownCallExact
 175                         else do tickyKnownCallExtraArgs
 176                                 tickySlowCallPat (map fst (drop arity arg_amodes))
 177
 178                 ; let
 179                      -- The args beyond the arity go straight on the stack
 180                      (arity_args, extra_args) = splitAt arity arg_amodes
 181
 182                 ; directCall sp lbl arity_args extra_args
 183                         (opt_node_asst `plusStmts` pending_assts)
 184                 }
 185         }
 186   where
 187     fun_id    = cgIdInfoId fun_info
 188     fun_name  = idName fun_id
 189     lf_info   = cgIdInfoLF fun_info
 190     fun_has_cafs = idCafInfo fun_id
 191     untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg))
 192     -- Test if closure is a constructor
 193     maybeSwitchOnCons enterClosure eob
 194               | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob
 195               = do { is_constr <- newLabelC
 196                    -- Is the pointer tagged?
 197                    -- Yes, jump to switch statement
 198                    ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg))
 199                                 is_constr)
 200                    -- No, enter the closure.
 201                    ; enterClosure
 202                    ; labelC is_constr
 203                    ; stmtC (CmmJump (entryCode $ CmmLit (CmmLabel lbl)) [])
 204                    }
 205 {-
 206               -- This is a scrutinee for a case expression
 207               -- so let's see if we can directly inspect the closure
 208               | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob
 209               = do { no_cons <- newLabelC
 210                    -- Both the NCG and gcc optimize away the temp
 211                    ; z <- newTemp  wordRep
 212                    ; stmtC (CmmAssign z tag_expr)
 213                    ; let tag = CmmReg z
 214                    -- Is the closure a cons?
 215                    ; stmtC (CmmCondBranch (cond1 tag) no_cons)
 216                    ; stmtC (CmmCondBranch (cond2 tag) no_cons)
 217                    -- Yes, jump to switch statement
 218                    ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) [])
 219                    ; labelC no_cons
 220                    -- No, enter the closure.
 221                    ; enterClosure
 222                    }
 223 -}
 224               -- No case expression involved, enter the closure.
 225               | otherwise
 226               = do { stmtC untag_node
 227                    ; enterClosure
 228                    }
 229         where
 230           --cond1 tag  = cmmULtWord tag lowCons
 231           -- More efficient than the above?
 232 {-
 233           tag_expr   = cmmGetClosureType (CmmReg nodeReg)
 234           cond1 tag  = cmmEqWord tag (CmmLit (mkIntCLit 0))
 235           cond2 tag  = cmmUGtWord tag highCons
 236           lowCons    = CmmLit (mkIntCLit 1)
 237             -- CONSTR
 238           highCons   = CmmLit (mkIntCLit 8)
 239             -- CONSTR_NOCAF_STATIC (from ClosureType.h)
 240 -}
 241
 242
 243 directCall sp lbl args extra_args assts = do
 244   let
 245         -- First chunk of args go in registers
 246         (reg_arg_amodes, stk_args) = assignCallRegs args
 247
 248         -- Any "extra" arguments are placed in frames on the
 249         -- stack after the other arguments.
 250         slow_stk_args = slowArgs extra_args
 251
 252         reg_assts = assignToRegs reg_arg_amodes
 253   --
 254   (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
 255
 256   emitSimultaneously (reg_assts     `plusStmts`
 257                       stk_assts     `plusStmts`
 258                       assts)
 259
 260   doFinalJump final_sp False (jumpToLbl lbl)
 261
 262 -- -----------------------------------------------------------------------------
 263 -- The final clean-up before we do a jump at the end of a basic block.
 264 -- This code is shared by tail-calls and returns.
 265
 266 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
 267 doFinalJump final_sp is_let_no_escape jump_code
 268   = do  { -- Adjust the high-water mark if necessary
 269           adjustStackHW final_sp
 270
 271         -- Push a return address if necessary (after the assignments
 272         -- above, in case we clobber a live stack location)
 273         --
 274         -- DONT push the return address when we're about to jump to a
 275         -- let-no-escape: the final tail call in the let-no-escape
 276         -- will do this.
 277         ; eob <- getEndOfBlockInfo
 278         ; whenC (not is_let_no_escape) (pushReturnAddress eob)
 279
 280             -- Final adjustment of Sp/Hp
 281         ; adjustSpAndHp final_sp
 282
 283             -- and do the jump
 284         ; jump_code }
 285
 286 -- ----------------------------------------------------------------------------
 287 -- A general return (just a special case of doFinalJump, above)
 288
 289 performReturn :: Code   -- The code to execute to actually do the return
 290               -> Code
 291
 292 performReturn finish_code
 293   = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
 294         ; doFinalJump args_sp False{-not a LNE-} finish_code }
 295
 296 -- ----------------------------------------------------------------------------
 297 -- Primitive Returns
 298 -- Just load the return value into the right register, and return.
 299
 300 performPrimReturn :: CgRep -> CmmExpr   -- The thing to return
 301                   -> Code
 302 performPrimReturn rep amode
 303   =  do { whenC (not (isVoidArg rep))
 304                 (stmtC (CmmAssign ret_reg amode))
 305         ; performReturn emitReturnInstr }
 306   where
 307     ret_reg = dataReturnConvPrim rep
 308
 309 -- ---------------------------------------------------------------------------
 310 -- Unboxed tuple returns
 311
 312 -- These are a bit like a normal tail call, except that:
 313 --
 314 --   - The tail-call target is an info table on the stack
 315 --
 316 --   - We separate stack arguments into pointers and non-pointers,
 317 --     to make it easier to leave things in a sane state for a heap check.
 318 --     This is OK because we can never partially-apply an unboxed tuple,
 319 --     unlike a function.  The same technique is used when calling
 320 --     let-no-escape functions, because they also can't be partially
 321 --     applied.
 322
 323 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
 324 returnUnboxedTuple amodes
 325   = do  { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
 326         ; tickyUnboxedTupleReturn (length amodes)
 327         ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
 328         ; emitSimultaneously assts
 329         ; doFinalJump final_sp False{-not a LNE-} emitReturnInstr }
 330
 331 pushUnboxedTuple :: VirtualSpOffset             -- Sp at which to start pushing
 332                  -> [(CgRep, CmmExpr)]          -- amodes of the components
 333                  -> FCode (VirtualSpOffset,     -- final Sp
 334                            CmmStmts)            -- assignments (regs+stack)
 335
 336 pushUnboxedTuple sp []
 337   = return (sp, noStmts)
 338 pushUnboxedTuple sp amodes
 339   = do  { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
 340
 341                 -- separate the rest of the args into pointers and non-pointers
 342                 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
 343                 reg_arg_assts = assignToRegs reg_arg_amodes
 344
 345             -- push ptrs, then nonptrs, on the stack
 346         ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
 347         ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
 348
 349         ; returnFC (final_sp,
 350                     reg_arg_assts `plusStmts`
 351                     ptr_assts `plusStmts` nptr_assts) }
 352
 353
 354 -- -----------------------------------------------------------------------------
 355 -- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
 356 -- we want to do things in a slightly different order to normal:
 357 --
 358 --              - push return address
 359 --              - adjust stack pointer
 360 --              - r = call(args...)
 361 --              - assign regs for unboxed tuple (usually just R1 = r)
 362 --              - return to continuation
 363 --
 364 -- The return address (i.e. stack frame) must be on the stack before
 365 -- doing the call in case the call ends up in the garbage collector.
 366 --
 367 -- Sadly, the information about the continuation is lost after we push it
 368 -- (in order to avoid pushing it again), so we end up doing a needless
 369 -- indirect jump (ToDo).
 370
 371 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
 372 ccallReturnUnboxedTuple amodes before_jump
 373   = do  { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
 374
 375         -- Push a return address if necessary
 376         ; pushReturnAddress eob
 377         ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
 378             (do { adjustSpAndHp args_sp
 379                 ; before_jump
 380                 ; returnUnboxedTuple amodes })
 381     }
 382
 383 -- -----------------------------------------------------------------------------
 384 -- Calling an out-of-line primop
 385
 386 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
 387 tailCallPrimOp op args
 388  = do   {       -- We're going to perform a normal-looking tail call,
 389                 -- except that *all* the arguments will be in registers.
 390                 -- Hence the ASSERT( null leftovers )
 391           arg_amodes <- getArgAmodes args
 392         ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
 393               jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
 394
 395         ; ASSERT(null leftovers) -- no stack-resident args
 396           emitSimultaneously (assignToRegs arg_regs)
 397
 398         ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
 399         ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
 400
 401 -- -----------------------------------------------------------------------------
 402 -- Return Addresses
 403
 404 -- We always push the return address just before performing a tail call
 405 -- or return.  The reason we leave it until then is because the stack
 406 -- slot that the return address is to go into might contain something
 407 -- useful.
 408 --
 409 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
 410 -- case expression and the return address is still to be pushed.
 411 --
 412 -- There are cases where it doesn't look necessary to push the return
 413 -- address: for example, just before doing a return to a known
 414 -- continuation.  However, the continuation will expect to find the
 415 -- return address on the stack in case it needs to do a heap check.
 416
 417 pushReturnAddress :: EndOfBlockInfo -> Code
 418
 419 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _))
 420   = do  { sp_rel <- getSpRelOffset args_sp
 421         ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
 422
 423 pushReturnAddress _ = nopC
 424
 425 -- -----------------------------------------------------------------------------
 426 -- Misc.
 427
 428 jumpToLbl :: CLabel -> Code
 429 -- Passes no argument to the destination procedure
 430 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
 431
 432 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
 433 assignToRegs reg_args
 434   = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
 435             | (expr, reg_id) <- reg_args ]
 436 \end{code}
 437
 438
 439 %************************************************************************
 440 %*                                                                      *
 441 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
 442 %*                                                                      *
 443 %************************************************************************
 444
 445 This function adjusts the stack and heap pointers just before a tail
 446 call or return.  The stack pointer is adjusted to its final position
 447 (i.e. to point to the last argument for a tail call, or the activation
 448 record for a return).  The heap pointer may be moved backwards, in
 449 cases where we overallocated at the beginning of the basic block (see
 450 CgCase.lhs for discussion).
 451
 452 These functions {\em do not} deal with high-water-mark adjustment.
 453 That's done by functions which allocate stack space.
 454
 455 \begin{code}
 456 adjustSpAndHp :: VirtualSpOffset        -- New offset for Arg stack ptr
 457               -> Code
 458 adjustSpAndHp newRealSp
 459   = do  { -- Adjust stack, if necessary.
 460           -- NB: the conditional on the monad-carried realSp
 461           --     is out of line (via codeOnly), to avoid a black hole
 462         ; new_sp <- getSpRelOffset newRealSp
 463         ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
 464         ; setRealSp newRealSp                   -- where realSp==newRealSp
 465
 466           -- Adjust heap.  The virtual heap pointer may be less than the real Hp
 467           -- because the latter was advanced to deal with the worst-case branch
 468           -- of the code, and we may be in a better-case branch.  In that case,
 469           -- move the real Hp *back* and retract some ticky allocation count.
 470         ; hp_usg <- getHpUsage
 471         ; let rHp = realHp hp_usg
 472               vHp = virtHp hp_usg
 473         ; new_hp <- getHpRelOffset vHp
 474         ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
 475         ; tickyAllocHeap (vHp - rHp)            -- ...ditto
 476         ; setRealHp vHp
 477         }
 478 \end{code}
 479