ghc/compiler/codeGen/CgTailCall.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 % $Id: CgTailCall.lhs,v 1.41 2004/11/26 16:20:12 simonmar Exp $
   5 %
   6 %********************************************************
   7 %*                                                      *
   8 \section[CgTailCall]{Tail calls: converting @StgApps@}
   9 %*                                                      *
  10 %********************************************************
  11
  12 \begin{code}
  13 module CgTailCall (
  14         cgTailCall, performTailCall,
  15         performReturn, performPrimReturn,
  16         emitKnownConReturnCode, emitAlgReturnCode,
  17         returnUnboxedTuple, ccallReturnUnboxedTuple,
  18         pushUnboxedTuple,
  19         tailCallPrimOp,
  20
  21         pushReturnAddress
  22     ) where
  23
  24 #include "HsVersions.h"
  25
  26 import CgMonad
  27 import CgBindery        ( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape,
  28                           idInfoToAmode, cgIdInfoId, cgIdInfoLF,
  29                           cgIdInfoArgRep )
  30 import CgInfoTbls       ( entryCode, emitDirectReturnInstr, dataConTagZ,
  31                           emitVectoredReturnInstr, closureInfoPtr )
  32 import CgCallConv
  33 import CgStackery       ( setRealSp, mkStkAmodes, adjustStackHW,
  34                           getSpRelOffset )
  35 import CgHeapery        ( setRealHp, getHpRelOffset )
  36 import CgUtils          ( emitSimultaneously )
  37 import CgTicky
  38 import ClosureInfo
  39 import SMRep            ( CgRep, isVoidArg, separateByPtrFollowness )
  40 import Cmm
  41 import CmmUtils
  42 import CLabel           ( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel )
  43 import Type             ( isUnLiftedType )
  44 import Id               ( Id, idName, idUnique, idType )
  45 import DataCon          ( DataCon, dataConTyCon )
  46 import StgSyn           ( StgArg )
  47 import TyCon            ( TyCon )
  48 import PrimOp           ( PrimOp )
  49 import Outputable
  50
  51 import Monad            ( when )
  52
  53 -----------------------------------------------------------------------------
  54 -- Tail Calls
  55
  56 cgTailCall :: Id -> [StgArg] -> Code
  57
  58 -- Here's the code we generate for a tail call.  (NB there may be no
  59 -- arguments, in which case this boils down to just entering a variable.)
  60 --
  61 --    * Put args in the top locations of the stack.
  62 --    * Adjust the stack ptr
  63 --    * Make R1 point to the function closure if necessary.
  64 --    * Perform the call.
  65 --
  66 -- Things to be careful about:
  67 --
  68 --    * Don't overwrite stack locations before you have finished with
  69 --      them (remember you need the function and the as-yet-unmoved
  70 --      arguments).
  71 --    * Preferably, generate no code to replace x by x on the stack (a
  72 --      common situation in tail-recursion).
  73 --    * Adjust the stack high water mark appropriately.
  74 --
  75 -- Treat unboxed locals exactly like literals (above) except use the addr
  76 -- mode for the local instead of (CLit lit) in the assignment.
  77
  78 cgTailCall fun args
  79   = do  { fun_info <- getCgIdInfo fun
  80
  81         ; if isUnLiftedType (idType fun)
  82           then  -- Primitive return
  83                 ASSERT( null args )
  84             do  { fun_amode <- idInfoToAmode fun_info
  85                 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
  86
  87           else -- Normal case, fun is boxed
  88             do  { arg_amodes <- getArgAmodes args
  89                 ; performTailCall fun_info arg_amodes noStmts }
  90         }
  91
  92
  93 -- -----------------------------------------------------------------------------
  94 -- The guts of a tail-call
  95
  96 performTailCall
  97         :: CgIdInfo             -- The function
  98         -> [(CgRep,CmmExpr)]    -- Args
  99         -> CmmStmts             -- Pending simultaneous assignments
 100                                 -- *** GUARANTEED to contain only stack assignments.
 101         -> Code
 102
 103 performTailCall fun_info arg_amodes pending_assts
 104   | Just join_sp <- maybeLetNoEscape fun_info
 105   =        -- A let-no-escape is slightly different, because we
 106            -- arrange the stack arguments into pointers and non-pointers
 107            -- to make the heap check easier.  The tail-call sequence
 108            -- is very similar to returning an unboxed tuple, so we
 109            -- share some code.
 110      do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
 111         ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
 112         ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
 113         ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
 114
 115   | otherwise
 116   = do  { fun_amode <- idInfoToAmode fun_info
 117         ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
 118               opt_node_asst | nodeMustPointToIt lf_info = node_asst
 119                             | otherwise                 = noStmts
 120         ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
 121         ; dflags <- getDynFlags
 122
 123         ; case (getCallMethod dflags fun_name lf_info (length arg_amodes)) of
 124
 125             -- Node must always point to things we enter
 126             EnterIt -> do
 127                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 128                 ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
 129                 ; doFinalJump sp False (stmtC (CmmJump target [])) }
 130
 131             -- A function, but we have zero arguments.  It is already in WHNF,
 132             -- so we can just return it.
 133             -- As with any return, Node must point to it.
 134             ReturnIt -> do
 135                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 136                 ; doFinalJump sp False emitDirectReturnInstr }
 137
 138             -- A real constructor.  Don't bother entering it,
 139             -- just do the right sort of return instead.
 140             -- As with any return, Node must point to it.
 141             ReturnCon con -> do
 142                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 143                 ; doFinalJump sp False (emitKnownConReturnCode con) }
 144
 145             JumpToIt lbl -> do
 146                 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
 147                 ; doFinalJump sp False (jumpToLbl lbl) }
 148
 149             -- A slow function call via the RTS apply routines
 150             -- Node must definitely point to the thing
 151             SlowCall -> do
 152                 { let (apply_lbl, new_amodes) = constructSlowCall arg_amodes
 153
 154                     -- Fill in all the arguments on the stack
 155                 ; (final_sp,stk_assts) <- mkStkAmodes sp new_amodes
 156
 157                 ; emitSimultaneously (node_asst `plusStmts` stk_assts
 158                                                 `plusStmts` pending_assts)
 159
 160                 ; when (not (null arg_amodes)) $ do
 161                    { if (isKnownFun lf_info)
 162                         then tickyKnownCallTooFewArgs
 163                         else tickyUnknownCall
 164                    ; tickySlowCallPat (map fst arg_amodes)
 165                   }
 166
 167                 ; doFinalJump (final_sp + 1)
 168                         -- Add one, because the stg_ap functions
 169                         -- expect there to be a free slot on the stk
 170                       False (jumpToLbl apply_lbl)
 171                 }
 172
 173             -- A direct function call (possibly with some left-over arguments)
 174             DirectEntry lbl arity -> do
 175                 { let
 176                      -- The args beyond the arity go straight on the stack
 177                      (arity_args, extra_stk_args) = splitAt arity arg_amodes
 178
 179                      -- First chunk of args go in registers
 180                      (reg_arg_amodes, stk_args) = assignCallRegs arity_args
 181
 182                      -- Any "extra" arguments are placed in frames on the
 183                      -- stack after the other arguments.
 184                      slow_stk_args = slowArgs extra_stk_args
 185
 186                      reg_assts = assignToRegs reg_arg_amodes
 187
 188                 ; if null slow_stk_args
 189                         then tickyKnownCallExact
 190                         else do tickyKnownCallExtraArgs
 191                                 tickySlowCallPat (map fst extra_stk_args)
 192
 193                 ; (final_sp, stk_assts) <- mkStkAmodes sp
 194                                                 (stk_args ++ slow_stk_args)
 195
 196                 ; emitSimultaneously (opt_node_asst `plusStmts`
 197                                       reg_assts     `plusStmts`
 198                                       stk_assts     `plusStmts`
 199                                       pending_assts)
 200
 201                 ; doFinalJump final_sp False (jumpToLbl lbl) }
 202         }
 203   where
 204     fun_name  = idName (cgIdInfoId fun_info)
 205     lf_info   = cgIdInfoLF fun_info
 206
 207
 208
 209 -- -----------------------------------------------------------------------------
 210 -- The final clean-up before we do a jump at the end of a basic block.
 211 -- This code is shared by tail-calls and returns.
 212
 213 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
 214 doFinalJump final_sp is_let_no_escape jump_code
 215   = do  { -- Adjust the high-water mark if necessary
 216           adjustStackHW final_sp
 217
 218         -- Push a return address if necessary (after the assignments
 219         -- above, in case we clobber a live stack location)
 220         --
 221         -- DONT push the return address when we're about to jump to a
 222         -- let-no-escape: the final tail call in the let-no-escape
 223         -- will do this.
 224         ; eob <- getEndOfBlockInfo
 225         ; whenC (not is_let_no_escape) (pushReturnAddress eob)
 226
 227             -- Final adjustment of Sp/Hp
 228         ; adjustSpAndHp final_sp
 229
 230             -- and do the jump
 231         ; jump_code }
 232
 233 -- -----------------------------------------------------------------------------
 234 -- A general return (just a special case of doFinalJump, above)
 235
 236 performReturn :: Code           -- The code to execute to actually do the return
 237               -> Code
 238
 239 performReturn finish_code
 240   = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
 241         ; doFinalJump args_sp False{-not a LNE-} finish_code }
 242
 243 -- -----------------------------------------------------------------------------
 244 -- Primitive Returns
 245 -- Just load the return value into the right register, and return.
 246
 247 performPrimReturn :: CgRep -> CmmExpr   -- The thing to return
 248                   -> Code
 249 performPrimReturn rep amode
 250   =  do { whenC (not (isVoidArg rep))
 251                 (stmtC (CmmAssign ret_reg amode))
 252         ; performReturn emitDirectReturnInstr }
 253   where
 254     ret_reg = dataReturnConvPrim rep
 255
 256 -- -----------------------------------------------------------------------------
 257 -- Algebraic constructor returns
 258
 259 -- Constructor is built on the heap; Node is set.
 260 -- All that remains is to do the right sort of jump.
 261
 262 emitKnownConReturnCode :: DataCon -> Code
 263 emitKnownConReturnCode con
 264   = emitAlgReturnCode (dataConTyCon con)
 265                       (CmmLit (mkIntCLit (dataConTagZ con)))
 266                         -- emitAlgReturnCode requires zero-indexed tag
 267
 268 emitAlgReturnCode :: TyCon -> CmmExpr -> Code
 269 -- emitAlgReturnCode is used both by emitKnownConReturnCode,
 270 -- and by by PrimOps that return enumerated types (i.e.
 271 -- all the comparison operators).
 272 emitAlgReturnCode tycon tag
 273  =  do  { case ctrlReturnConvAlg tycon of
 274             VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
 275                                         ; emitVectoredReturnInstr tag }
 276             UnvectoredReturn _    -> emitDirectReturnInstr
 277         }
 278
 279
 280 -- ---------------------------------------------------------------------------
 281 -- Unboxed tuple returns
 282
 283 -- These are a bit like a normal tail call, except that:
 284 --
 285 --   - The tail-call target is an info table on the stack
 286 --
 287 --   - We separate stack arguments into pointers and non-pointers,
 288 --     to make it easier to leave things in a sane state for a heap check.
 289 --     This is OK because we can never partially-apply an unboxed tuple,
 290 --     unlike a function.  The same technique is used when calling
 291 --     let-no-escape functions, because they also can't be partially
 292 --     applied.
 293
 294 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
 295 returnUnboxedTuple amodes
 296   = do  { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
 297         ; tickyUnboxedTupleReturn (length amodes)
 298         ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
 299         ; emitSimultaneously assts
 300         ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }
 301
 302 pushUnboxedTuple :: VirtualSpOffset             -- Sp at which to start pushing
 303                  -> [(CgRep, CmmExpr)]          -- amodes of the components
 304                  -> FCode (VirtualSpOffset,     -- final Sp
 305                            CmmStmts)            -- assignments (regs+stack)
 306
 307 pushUnboxedTuple sp []
 308   = return (sp, noStmts)
 309 pushUnboxedTuple sp amodes
 310   = do  { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
 311
 312                 -- separate the rest of the args into pointers and non-pointers
 313                 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
 314                 reg_arg_assts = assignToRegs reg_arg_amodes
 315
 316             -- push ptrs, then nonptrs, on the stack
 317         ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
 318         ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
 319
 320         ; returnFC (final_sp,
 321                     reg_arg_assts `plusStmts`
 322                     ptr_assts `plusStmts` nptr_assts) }
 323
 324
 325 -- -----------------------------------------------------------------------------
 326 -- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
 327 -- we want to do things in a slightly different order to normal:
 328 --
 329 --              - push return address
 330 --              - adjust stack pointer
 331 --              - r = call(args...)
 332 --              - assign regs for unboxed tuple (usually just R1 = r)
 333 --              - return to continuation
 334 --
 335 -- The return address (i.e. stack frame) must be on the stack before
 336 -- doing the call in case the call ends up in the garbage collector.
 337 --
 338 -- Sadly, the information about the continuation is lost after we push it
 339 -- (in order to avoid pushing it again), so we end up doing a needless
 340 -- indirect jump (ToDo).
 341
 342 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
 343 ccallReturnUnboxedTuple amodes before_jump
 344   = do  { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
 345
 346         -- Push a return address if necessary
 347         ; pushReturnAddress eob
 348         ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
 349             (do { adjustSpAndHp args_sp
 350                 ; before_jump
 351                 ; returnUnboxedTuple amodes })
 352     }
 353
 354 -- -----------------------------------------------------------------------------
 355 -- Calling an out-of-line primop
 356
 357 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
 358 tailCallPrimOp op args
 359  = do   {       -- We're going to perform a normal-looking tail call,
 360                 -- except that *all* the arguments will be in registers.
 361                 -- Hence the ASSERT( null leftovers )
 362           arg_amodes <- getArgAmodes args
 363         ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
 364               jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
 365
 366         ; ASSERT(null leftovers) -- no stack-resident args
 367           emitSimultaneously (assignToRegs arg_regs)
 368
 369         ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
 370         ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
 371
 372 -- -----------------------------------------------------------------------------
 373 -- Return Addresses
 374
 375 -- | We always push the return address just before performing a tail call
 376 -- or return.  The reason we leave it until then is because the stack
 377 -- slot that the return address is to go into might contain something
 378 -- useful.
 379 --
 380 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
 381 -- case expression and the return address is still to be pushed.
 382 --
 383 -- There are cases where it doesn't look necessary to push the return
 384 -- address: for example, just before doing a return to a known
 385 -- continuation.  However, the continuation will expect to find the
 386 -- return address on the stack in case it needs to do a heap check.
 387
 388 pushReturnAddress :: EndOfBlockInfo -> Code
 389
 390 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
 391   = do  { sp_rel <- getSpRelOffset args_sp
 392         ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
 393
 394 -- For a polymorphic case, we have two return addresses to push: the case
 395 -- return, and stg_seq_frame_info which turns a possible vectored return
 396 -- into a direct one.
 397 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
 398   = do  { sp_rel <- getSpRelOffset (args_sp-1)
 399         ; stmtC (CmmStore sp_rel (mkLblExpr lbl))
 400         ; sp_rel <- getSpRelOffset args_sp
 401         ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }
 402
 403 pushReturnAddress _ = nopC
 404
 405 -- -----------------------------------------------------------------------------
 406 -- Misc.
 407
 408 jumpToLbl :: CLabel -> Code
 409 -- Passes no argument to the destination procedure
 410 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
 411
 412 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
 413 assignToRegs reg_args
 414   = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
 415             | (expr, reg_id) <- reg_args ]
 416 \end{code}
 417
 418
 419 %************************************************************************
 420 %*                                                                      *
 421 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
 422 %*                                                                      *
 423 %************************************************************************
 424
 425 This function adjusts the stack and heap pointers just before a tail
 426 call or return.  The stack pointer is adjusted to its final position
 427 (i.e. to point to the last argument for a tail call, or the activation
 428 record for a return).  The heap pointer may be moved backwards, in
 429 cases where we overallocated at the beginning of the basic block (see
 430 CgCase.lhs for discussion).
 431
 432 These functions {\em do not} deal with high-water-mark adjustment.
 433 That's done by functions which allocate stack space.
 434
 435 \begin{code}
 436 adjustSpAndHp :: VirtualSpOffset        -- New offset for Arg stack ptr
 437               -> Code
 438 adjustSpAndHp newRealSp
 439   = do  { -- Adjust stack, if necessary.
 440           -- NB: the conditional on the monad-carried realSp
 441           --     is out of line (via codeOnly), to avoid a black hole
 442         ; new_sp <- getSpRelOffset newRealSp
 443         ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
 444         ; setRealSp newRealSp                   -- where realSp==newRealSp
 445
 446           -- Adjust heap.  The virtual heap pointer may be less than the real Hp
 447           -- because the latter was advanced to deal with the worst-case branch
 448           -- of the code, and we may be in a better-case branch.  In that case,
 449           -- move the real Hp *back* and retract some ticky allocation count.
 450         ; hp_usg <- getHpUsage
 451         ; let rHp = realHp hp_usg
 452               vHp = virtHp hp_usg
 453         ; new_hp <- getHpRelOffset vHp
 454         ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
 455         ; tickyAllocHeap (vHp - rHp)            -- ...ditto
 456         ; setRealHp vHp
 457         }
 458 \end{code}