compiler/codeGen/CgTailCall.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5 % Code generation for tail calls.
   6
   7 \begin{code}
   8 module CgTailCall (
   9         cgTailCall, performTailCall,
  10         performReturn, performPrimReturn,
  11         emitKnownConReturnCode, emitAlgReturnCode,
  12         returnUnboxedTuple, ccallReturnUnboxedTuple,
  13         pushUnboxedTuple,
  14         tailCallPrimOp,
  15
  16         pushReturnAddress
  17     ) where
  18
  19 #include "HsVersions.h"
  20
  21 import CgMonad
  22 import CgBindery
  23 import CgInfoTbls
  24 import CgCallConv
  25 import CgStackery
  26 import CgHeapery
  27 import CgUtils
  28 import CgTicky
  29 import ClosureInfo
  30 import SMRep
  31 import Cmm
  32 import CmmUtils
  33 import CLabel
  34 import Type
  35 import Id
  36 import DataCon
  37 import StgSyn
  38 import TyCon
  39 import PrimOp
  40 import Outputable
  41
  42 import Control.Monad
  43
  44 -----------------------------------------------------------------------------
  45 -- Tail Calls
  46
  47 cgTailCall :: Id -> [StgArg] -> Code
  48
  49 -- Here's the code we generate for a tail call.  (NB there may be no
  50 -- arguments, in which case this boils down to just entering a variable.)
  51 --
  52 --    * Put args in the top locations of the stack.
  53 --    * Adjust the stack ptr
  54 --    * Make R1 point to the function closure if necessary.
  55 --    * Perform the call.
  56 --
  57 -- Things to be careful about:
  58 --
  59 --    * Don't overwrite stack locations before you have finished with
  60 --      them (remember you need the function and the as-yet-unmoved
  61 --      arguments).
  62 --    * Preferably, generate no code to replace x by x on the stack (a
  63 --      common situation in tail-recursion).
  64 --    * Adjust the stack high water mark appropriately.
  65 --
  66 -- Treat unboxed locals exactly like literals (above) except use the addr
  67 -- mode for the local instead of (CLit lit) in the assignment.
  68
  69 cgTailCall fun args
  70   = do  { fun_info <- getCgIdInfo fun
  71
  72         ; if isUnLiftedType (idType fun)
  73           then  -- Primitive return
  74                 ASSERT( null args )
  75             do  { fun_amode <- idInfoToAmode fun_info
  76                 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
  77
  78           else -- Normal case, fun is boxed
  79             do  { arg_amodes <- getArgAmodes args
  80                 ; performTailCall fun_info arg_amodes noStmts }
  81         }
  82
  83
  84 -- -----------------------------------------------------------------------------
  85 -- The guts of a tail-call
  86
  87 performTailCall
  88         :: CgIdInfo             -- The function
  89         -> [(CgRep,CmmExpr)]    -- Args
  90         -> CmmStmts             -- Pending simultaneous assignments
  91                                 --  *** GUARANTEED to contain only stack assignments.
  92         -> Code
  93
  94 performTailCall fun_info arg_amodes pending_assts
  95   | Just join_sp <- maybeLetNoEscape fun_info
  96   =        -- A let-no-escape is slightly different, because we
  97            -- arrange the stack arguments into pointers and non-pointers
  98            -- to make the heap check easier.  The tail-call sequence
  99            -- is very similar to returning an unboxed tuple, so we
 100            -- share some code.
 101      do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
 102         ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
 103         ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
 104         ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
 105
 106   | otherwise
 107   = do  { fun_amode <- idInfoToAmode fun_info
 108         ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
 109               opt_node_asst | nodeMustPointToIt lf_info = node_asst
 110                             | otherwise                 = noStmts
 111         ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
 112         ; this_pkg <- getThisPackage
 113
 114         ; case (getCallMethod this_pkg fun_name lf_info (length arg_amodes)) of
 115
 116             -- Node must always point to things we enter
 117             EnterIt -> do
 118                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 119                 ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
 120                 ; doFinalJump sp False (stmtC (CmmJump target [])) }
 121
 122             -- A function, but we have zero arguments.  It is already in WHNF,
 123             -- so we can just return it.
 124             -- As with any return, Node must point to it.
 125             ReturnIt -> do
 126                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 127                 ; doFinalJump sp False emitDirectReturnInstr }
 128
 129             -- A real constructor.  Don't bother entering it,
 130             -- just do the right sort of return instead.
 131             -- As with any return, Node must point to it.
 132             ReturnCon con -> do
 133                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 134                 ; doFinalJump sp False (emitKnownConReturnCode con) }
 135
 136             JumpToIt lbl -> do
 137                 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
 138                 ; doFinalJump sp False (jumpToLbl lbl) }
 139
 140             -- A slow function call via the RTS apply routines
 141             -- Node must definitely point to the thing
 142             SlowCall -> do
 143                 {  when (not (null arg_amodes)) $ do
 144                    { if (isKnownFun lf_info)
 145                         then tickyKnownCallTooFewArgs
 146                         else tickyUnknownCall
 147                    ; tickySlowCallPat (map fst arg_amodes)
 148                    }
 149
 150                 ; let (apply_lbl, args, extra_args)
 151                         = constructSlowCall arg_amodes
 152
 153                 ; directCall sp apply_lbl args extra_args
 154                         (node_asst `plusStmts` pending_assts)
 155                 }
 156
 157             -- A direct function call (possibly with some left-over arguments)
 158             DirectEntry lbl arity -> do
 159                 { if arity == length arg_amodes
 160                         then tickyKnownCallExact
 161                         else do tickyKnownCallExtraArgs
 162                                 tickySlowCallPat (map fst (drop arity arg_amodes))
 163
 164                 ; let
 165                      -- The args beyond the arity go straight on the stack
 166                      (arity_args, extra_args) = splitAt arity arg_amodes
 167
 168                 ; directCall sp lbl arity_args extra_args
 169                         (opt_node_asst `plusStmts` pending_assts)
 170                 }
 171         }
 172   where
 173     fun_name  = idName (cgIdInfoId fun_info)
 174     lf_info   = cgIdInfoLF fun_info
 175
 176
 177
 178 directCall sp lbl args extra_args assts = do
 179   let
 180         -- First chunk of args go in registers
 181         (reg_arg_amodes, stk_args) = assignCallRegs args
 182
 183         -- Any "extra" arguments are placed in frames on the
 184         -- stack after the other arguments.
 185         slow_stk_args = slowArgs extra_args
 186
 187         reg_assts = assignToRegs reg_arg_amodes
 188   --
 189   (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
 190
 191   emitSimultaneously (reg_assts     `plusStmts`
 192                       stk_assts     `plusStmts`
 193                       assts)
 194
 195   doFinalJump final_sp False (jumpToLbl lbl)
 196
 197 -- -----------------------------------------------------------------------------
 198 -- The final clean-up before we do a jump at the end of a basic block.
 199 -- This code is shared by tail-calls and returns.
 200
 201 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
 202 doFinalJump final_sp is_let_no_escape jump_code
 203   = do  { -- Adjust the high-water mark if necessary
 204           adjustStackHW final_sp
 205
 206         -- Push a return address if necessary (after the assignments
 207         -- above, in case we clobber a live stack location)
 208         --
 209         -- DONT push the return address when we're about to jump to a
 210         -- let-no-escape: the final tail call in the let-no-escape
 211         -- will do this.
 212         ; eob <- getEndOfBlockInfo
 213         ; whenC (not is_let_no_escape) (pushReturnAddress eob)
 214
 215             -- Final adjustment of Sp/Hp
 216         ; adjustSpAndHp final_sp
 217
 218             -- and do the jump
 219         ; jump_code }
 220
 221 -- -----------------------------------------------------------------------------
 222 -- A general return (just a special case of doFinalJump, above)
 223
 224 performReturn :: Code           -- The code to execute to actually do the return
 225               -> Code
 226
 227 performReturn finish_code
 228   = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
 229         ; doFinalJump args_sp False{-not a LNE-} finish_code }
 230
 231 -- -----------------------------------------------------------------------------
 232 -- Primitive Returns
 233 -- Just load the return value into the right register, and return.
 234
 235 performPrimReturn :: CgRep -> CmmExpr   -- The thing to return
 236                   -> Code
 237 performPrimReturn rep amode
 238   =  do { whenC (not (isVoidArg rep))
 239                 (stmtC (CmmAssign ret_reg amode))
 240         ; performReturn emitDirectReturnInstr }
 241   where
 242     ret_reg = dataReturnConvPrim rep
 243
 244 -- -----------------------------------------------------------------------------
 245 -- Algebraic constructor returns
 246
 247 -- Constructor is built on the heap; Node is set.
 248 -- All that remains is to do the right sort of jump.
 249
 250 emitKnownConReturnCode :: DataCon -> Code
 251 emitKnownConReturnCode con
 252   = emitAlgReturnCode (dataConTyCon con)
 253                       (CmmLit (mkIntCLit (dataConTagZ con)))
 254                         -- emitAlgReturnCode requires zero-indexed tag
 255
 256 emitAlgReturnCode :: TyCon -> CmmExpr -> Code
 257 -- emitAlgReturnCode is used both by emitKnownConReturnCode,
 258 -- and by by PrimOps that return enumerated types (i.e.
 259 -- all the comparison operators).
 260 emitAlgReturnCode tycon tag
 261  =  do  { case ctrlReturnConvAlg tycon of
 262             VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
 263                                         ; emitVectoredReturnInstr tag }
 264             UnvectoredReturn _    -> emitDirectReturnInstr
 265         }
 266
 267
 268 -- ---------------------------------------------------------------------------
 269 -- Unboxed tuple returns
 270
 271 -- These are a bit like a normal tail call, except that:
 272 --
 273 --   - The tail-call target is an info table on the stack
 274 --
 275 --   - We separate stack arguments into pointers and non-pointers,
 276 --     to make it easier to leave things in a sane state for a heap check.
 277 --     This is OK because we can never partially-apply an unboxed tuple,
 278 --     unlike a function.  The same technique is used when calling
 279 --     let-no-escape functions, because they also can't be partially
 280 --     applied.
 281
 282 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
 283 returnUnboxedTuple amodes
 284   = do  { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
 285         ; tickyUnboxedTupleReturn (length amodes)
 286         ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
 287         ; emitSimultaneously assts
 288         ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }
 289
 290 pushUnboxedTuple :: VirtualSpOffset             -- Sp at which to start pushing
 291                  -> [(CgRep, CmmExpr)]          -- amodes of the components
 292                  -> FCode (VirtualSpOffset,     -- final Sp
 293                            CmmStmts)            -- assignments (regs+stack)
 294
 295 pushUnboxedTuple sp []
 296   = return (sp, noStmts)
 297 pushUnboxedTuple sp amodes
 298   = do  { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
 299
 300                 -- separate the rest of the args into pointers and non-pointers
 301                 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
 302                 reg_arg_assts = assignToRegs reg_arg_amodes
 303
 304             -- push ptrs, then nonptrs, on the stack
 305         ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
 306         ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
 307
 308         ; returnFC (final_sp,
 309                     reg_arg_assts `plusStmts`
 310                     ptr_assts `plusStmts` nptr_assts) }
 311
 312
 313 -- -----------------------------------------------------------------------------
 314 -- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
 315 -- we want to do things in a slightly different order to normal:
 316 --
 317 --              - push return address
 318 --              - adjust stack pointer
 319 --              - r = call(args...)
 320 --              - assign regs for unboxed tuple (usually just R1 = r)
 321 --              - return to continuation
 322 --
 323 -- The return address (i.e. stack frame) must be on the stack before
 324 -- doing the call in case the call ends up in the garbage collector.
 325 --
 326 -- Sadly, the information about the continuation is lost after we push it
 327 -- (in order to avoid pushing it again), so we end up doing a needless
 328 -- indirect jump (ToDo).
 329
 330 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
 331 ccallReturnUnboxedTuple amodes before_jump
 332   = do  { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
 333
 334         -- Push a return address if necessary
 335         ; pushReturnAddress eob
 336         ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
 337             (do { adjustSpAndHp args_sp
 338                 ; before_jump
 339                 ; returnUnboxedTuple amodes })
 340     }
 341
 342 -- -----------------------------------------------------------------------------
 343 -- Calling an out-of-line primop
 344
 345 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
 346 tailCallPrimOp op args
 347  = do   {       -- We're going to perform a normal-looking tail call,
 348                 -- except that *all* the arguments will be in registers.
 349                 -- Hence the ASSERT( null leftovers )
 350           arg_amodes <- getArgAmodes args
 351         ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
 352               jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
 353
 354         ; ASSERT(null leftovers) -- no stack-resident args
 355           emitSimultaneously (assignToRegs arg_regs)
 356
 357         ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
 358         ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
 359
 360 -- -----------------------------------------------------------------------------
 361 -- Return Addresses
 362
 363 -- We always push the return address just before performing a tail call
 364 -- or return.  The reason we leave it until then is because the stack
 365 -- slot that the return address is to go into might contain something
 366 -- useful.
 367 --
 368 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
 369 -- case expression and the return address is still to be pushed.
 370 --
 371 -- There are cases where it doesn't look necessary to push the return
 372 -- address: for example, just before doing a return to a known
 373 -- continuation.  However, the continuation will expect to find the
 374 -- return address on the stack in case it needs to do a heap check.
 375
 376 pushReturnAddress :: EndOfBlockInfo -> Code
 377
 378 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
 379   = do  { sp_rel <- getSpRelOffset args_sp
 380         ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
 381
 382 -- For a polymorphic case, we have two return addresses to push: the case
 383 -- return, and stg_seq_frame_info which turns a possible vectored return
 384 -- into a direct one.
 385 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
 386   = do  { sp_rel <- getSpRelOffset (args_sp-1)
 387         ; stmtC (CmmStore sp_rel (mkLblExpr lbl))
 388         ; sp_rel <- getSpRelOffset args_sp
 389         ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }
 390
 391 pushReturnAddress _ = nopC
 392
 393 -- -----------------------------------------------------------------------------
 394 -- Misc.
 395
 396 jumpToLbl :: CLabel -> Code
 397 -- Passes no argument to the destination procedure
 398 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
 399
 400 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
 401 assignToRegs reg_args
 402   = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
 403             | (expr, reg_id) <- reg_args ]
 404 \end{code}
 405
 406
 407 %************************************************************************
 408 %*                                                                      *
 409 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
 410 %*                                                                      *
 411 %************************************************************************
 412
 413 This function adjusts the stack and heap pointers just before a tail
 414 call or return.  The stack pointer is adjusted to its final position
 415 (i.e. to point to the last argument for a tail call, or the activation
 416 record for a return).  The heap pointer may be moved backwards, in
 417 cases where we overallocated at the beginning of the basic block (see
 418 CgCase.lhs for discussion).
 419
 420 These functions {\em do not} deal with high-water-mark adjustment.
 421 That's done by functions which allocate stack space.
 422
 423 \begin{code}
 424 adjustSpAndHp :: VirtualSpOffset        -- New offset for Arg stack ptr
 425               -> Code
 426 adjustSpAndHp newRealSp
 427   = do  { -- Adjust stack, if necessary.
 428           -- NB: the conditional on the monad-carried realSp
 429           --     is out of line (via codeOnly), to avoid a black hole
 430         ; new_sp <- getSpRelOffset newRealSp
 431         ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
 432         ; setRealSp newRealSp                   -- where realSp==newRealSp
 433
 434           -- Adjust heap.  The virtual heap pointer may be less than the real Hp
 435           -- because the latter was advanced to deal with the worst-case branch
 436           -- of the code, and we may be in a better-case branch.  In that case,
 437           -- move the real Hp *back* and retract some ticky allocation count.
 438         ; hp_usg <- getHpUsage
 439         ; let rHp = realHp hp_usg
 440               vHp = virtHp hp_usg
 441         ; new_hp <- getHpRelOffset vHp
 442         ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
 443         ; tickyAllocHeap (vHp - rHp)            -- ...ditto
 444         ; setRealHp vHp
 445         }
 446 \end{code}