compiler/codeGen/CgTailCall.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   4 %
   5 % Code generation for tail calls.
   6
   7 \begin{code}
   8 {-# OPTIONS -w #-}
   9 -- The above warning supression flag is a temporary kludge.
  10 -- While working on this module you are encouraged to remove it and fix
  11 -- any warnings in the module. See
  12 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
  13 -- for details
  14
  15 module CgTailCall (
  16         cgTailCall, performTailCall,
  17         performReturn, performPrimReturn,
  18         returnUnboxedTuple, ccallReturnUnboxedTuple,
  19         pushUnboxedTuple,
  20         tailCallPrimOp,
  21
  22         pushReturnAddress
  23     ) where
  24
  25 #include "HsVersions.h"
  26
  27 import CgMonad
  28 import CgBindery
  29 import CgInfoTbls
  30 import CgCallConv
  31 import CgStackery
  32 import CgHeapery
  33 import CgUtils
  34 import CgTicky
  35 import ClosureInfo
  36 import SMRep
  37 import Cmm
  38 import CmmUtils
  39 import CLabel
  40 import Type
  41 import Id
  42 import StgSyn
  43 import PrimOp
  44 import Outputable
  45
  46 import Control.Monad
  47
  48 -----------------------------------------------------------------------------
  49 -- Tail Calls
  50
  51 cgTailCall :: Id -> [StgArg] -> Code
  52
  53 -- Here's the code we generate for a tail call.  (NB there may be no
  54 -- arguments, in which case this boils down to just entering a variable.)
  55 --
  56 --    * Put args in the top locations of the stack.
  57 --    * Adjust the stack ptr
  58 --    * Make R1 point to the function closure if necessary.
  59 --    * Perform the call.
  60 --
  61 -- Things to be careful about:
  62 --
  63 --    * Don't overwrite stack locations before you have finished with
  64 --      them (remember you need the function and the as-yet-unmoved
  65 --      arguments).
  66 --    * Preferably, generate no code to replace x by x on the stack (a
  67 --      common situation in tail-recursion).
  68 --    * Adjust the stack high water mark appropriately.
  69 --
  70 -- Treat unboxed locals exactly like literals (above) except use the addr
  71 -- mode for the local instead of (CLit lit) in the assignment.
  72
  73 cgTailCall fun args
  74   = do  { fun_info <- getCgIdInfo fun
  75
  76         ; if isUnLiftedType (idType fun)
  77           then  -- Primitive return
  78                 ASSERT( null args )
  79             do  { fun_amode <- idInfoToAmode fun_info
  80                 ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
  81
  82           else -- Normal case, fun is boxed
  83             do  { arg_amodes <- getArgAmodes args
  84                 ; performTailCall fun_info arg_amodes noStmts }
  85         }
  86
  87
  88 -- -----------------------------------------------------------------------------
  89 -- The guts of a tail-call
  90
  91 performTailCall
  92         :: CgIdInfo             -- The function
  93         -> [(CgRep,CmmExpr)]    -- Args
  94         -> CmmStmts             -- Pending simultaneous assignments
  95                                 --  *** GUARANTEED to contain only stack assignments.
  96         -> Code
  97
  98 performTailCall fun_info arg_amodes pending_assts
  99   | Just join_sp <- maybeLetNoEscape fun_info
 100   =        -- A let-no-escape is slightly different, because we
 101            -- arrange the stack arguments into pointers and non-pointers
 102            -- to make the heap check easier.  The tail-call sequence
 103            -- is very similar to returning an unboxed tuple, so we
 104            -- share some code.
 105      do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
 106         ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
 107         ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
 108         ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
 109
 110   | otherwise
 111   = do  { fun_amode <- idInfoToAmode fun_info
 112         ; let assignSt  = CmmAssign nodeReg fun_amode
 113               node_asst = oneStmt assignSt
 114               opt_node_asst | nodeMustPointToIt lf_info = node_asst
 115                             | otherwise                 = noStmts
 116         ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
 117         ; this_pkg <- getThisPackage
 118
 119         ; case (getCallMethod fun_name lf_info (length arg_amodes)) of
 120
 121             -- Node must always point to things we enter
 122             EnterIt -> do
 123                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 124                 ; let target     = entryCode (closureInfoPtr (CmmReg nodeReg))
 125                       enterClosure = stmtC (CmmJump target [])
 126                       -- If this is a scrutinee
 127                       -- let's check if the closure is a constructor
 128                       -- so we can directly jump to the alternatives switch
 129                       -- statement.
 130                       jumpInstr = getEndOfBlockInfo >>=
 131                                   maybeSwitchOnCons enterClosure
 132                 ; doFinalJump sp False jumpInstr }
 133
 134             -- A function, but we have zero arguments.  It is already in WHNF,
 135             -- so we can just return it.
 136             -- As with any return, Node must point to it.
 137             ReturnIt -> do
 138                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 139                 ; doFinalJump sp False emitReturnInstr }
 140
 141             -- A real constructor.  Don't bother entering it,
 142             -- just do the right sort of return instead.
 143             -- As with any return, Node must point to it.
 144             ReturnCon con -> do
 145                 { emitSimultaneously (node_asst `plusStmts` pending_assts)
 146                 ; doFinalJump sp False emitReturnInstr }
 147
 148             JumpToIt lbl -> do
 149                 { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
 150                 ; doFinalJump sp False (jumpToLbl lbl) }
 151
 152             -- A slow function call via the RTS apply routines
 153             -- Node must definitely point to the thing
 154             SlowCall -> do
 155                 {  when (not (null arg_amodes)) $ do
 156                    { if (isKnownFun lf_info)
 157                         then tickyKnownCallTooFewArgs
 158                         else tickyUnknownCall
 159                    ; tickySlowCallPat (map fst arg_amodes)
 160                    }
 161
 162                 ; let (apply_lbl, args, extra_args)
 163                         = constructSlowCall arg_amodes
 164
 165                 ; directCall sp apply_lbl args extra_args
 166                         (node_asst `plusStmts` pending_assts)
 167
 168                 }
 169
 170             -- A direct function call (possibly with some left-over arguments)
 171             DirectEntry lbl arity -> do
 172                 { if arity == length arg_amodes
 173                         then tickyKnownCallExact
 174                         else do tickyKnownCallExtraArgs
 175                                 tickySlowCallPat (map fst (drop arity arg_amodes))
 176
 177                 ; let
 178                      -- The args beyond the arity go straight on the stack
 179                      (arity_args, extra_args) = splitAt arity arg_amodes
 180
 181                 ; directCall sp lbl arity_args extra_args
 182                         (opt_node_asst `plusStmts` pending_assts)
 183                 }
 184         }
 185   where
 186     fun_name  = idName (cgIdInfoId fun_info)
 187     lf_info   = cgIdInfoLF fun_info
 188     untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg))
 189     -- Test if closure is a constructor
 190     maybeSwitchOnCons enterClosure eob
 191               | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob
 192               = do { is_constr <- newLabelC
 193                    -- Is the pointer tagged?
 194                    -- Yes, jump to switch statement
 195                    ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg))
 196                                 is_constr)
 197                    -- No, enter the closure.
 198                    ; enterClosure
 199                    ; labelC is_constr
 200                    ; stmtC (CmmJump (entryCode $ CmmLit (CmmLabel lbl)) [])
 201                    }
 202 {-
 203               -- This is a scrutinee for a case expression
 204               -- so let's see if we can directly inspect the closure
 205               | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob
 206               = do { no_cons <- newLabelC
 207                    -- Both the NCG and gcc optimize away the temp
 208                    ; z <- newTemp  wordRep
 209                    ; stmtC (CmmAssign z tag_expr)
 210                    ; let tag = CmmReg z
 211                    -- Is the closure a cons?
 212                    ; stmtC (CmmCondBranch (cond1 tag) no_cons)
 213                    ; stmtC (CmmCondBranch (cond2 tag) no_cons)
 214                    -- Yes, jump to switch statement
 215                    ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) [])
 216                    ; labelC no_cons
 217                    -- No, enter the closure.
 218                    ; enterClosure
 219                    }
 220 -}
 221               -- No case expression involved, enter the closure.
 222               | otherwise
 223               = do { stmtC untag_node
 224                    ; enterClosure
 225                    }
 226         where
 227           --cond1 tag  = cmmULtWord tag lowCons
 228           -- More efficient than the above?
 229 {-
 230           tag_expr   = cmmGetClosureType (CmmReg nodeReg)
 231           cond1 tag  = cmmEqWord tag (CmmLit (mkIntCLit 0))
 232           cond2 tag  = cmmUGtWord tag highCons
 233           lowCons    = CmmLit (mkIntCLit 1)
 234             -- CONSTR
 235           highCons   = CmmLit (mkIntCLit 8)
 236             -- CONSTR_NOCAF_STATIC (from ClosureType.h)
 237 -}
 238
 239
 240 directCall sp lbl args extra_args assts = do
 241   let
 242         -- First chunk of args go in registers
 243         (reg_arg_amodes, stk_args) = assignCallRegs args
 244
 245         -- Any "extra" arguments are placed in frames on the
 246         -- stack after the other arguments.
 247         slow_stk_args = slowArgs extra_args
 248
 249         reg_assts = assignToRegs reg_arg_amodes
 250   --
 251   (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
 252
 253   emitSimultaneously (reg_assts     `plusStmts`
 254                       stk_assts     `plusStmts`
 255                       assts)
 256
 257   doFinalJump final_sp False (jumpToLbl lbl)
 258
 259 -- -----------------------------------------------------------------------------
 260 -- The final clean-up before we do a jump at the end of a basic block.
 261 -- This code is shared by tail-calls and returns.
 262
 263 doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
 264 doFinalJump final_sp is_let_no_escape jump_code
 265   = do  { -- Adjust the high-water mark if necessary
 266           adjustStackHW final_sp
 267
 268         -- Push a return address if necessary (after the assignments
 269         -- above, in case we clobber a live stack location)
 270         --
 271         -- DONT push the return address when we're about to jump to a
 272         -- let-no-escape: the final tail call in the let-no-escape
 273         -- will do this.
 274         ; eob <- getEndOfBlockInfo
 275         ; whenC (not is_let_no_escape) (pushReturnAddress eob)
 276
 277             -- Final adjustment of Sp/Hp
 278         ; adjustSpAndHp final_sp
 279
 280             -- and do the jump
 281         ; jump_code }
 282
 283 -- ----------------------------------------------------------------------------
 284 -- A general return (just a special case of doFinalJump, above)
 285
 286 performReturn :: Code   -- The code to execute to actually do the return
 287               -> Code
 288
 289 performReturn finish_code
 290   = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
 291         ; doFinalJump args_sp False{-not a LNE-} finish_code }
 292
 293 -- ----------------------------------------------------------------------------
 294 -- Primitive Returns
 295 -- Just load the return value into the right register, and return.
 296
 297 performPrimReturn :: CgRep -> CmmExpr   -- The thing to return
 298                   -> Code
 299 performPrimReturn rep amode
 300   =  do { whenC (not (isVoidArg rep))
 301                 (stmtC (CmmAssign ret_reg amode))
 302         ; performReturn emitReturnInstr }
 303   where
 304     ret_reg = dataReturnConvPrim rep
 305
 306 -- ---------------------------------------------------------------------------
 307 -- Unboxed tuple returns
 308
 309 -- These are a bit like a normal tail call, except that:
 310 --
 311 --   - The tail-call target is an info table on the stack
 312 --
 313 --   - We separate stack arguments into pointers and non-pointers,
 314 --     to make it easier to leave things in a sane state for a heap check.
 315 --     This is OK because we can never partially-apply an unboxed tuple,
 316 --     unlike a function.  The same technique is used when calling
 317 --     let-no-escape functions, because they also can't be partially
 318 --     applied.
 319
 320 returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
 321 returnUnboxedTuple amodes
 322   = do  { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
 323         ; tickyUnboxedTupleReturn (length amodes)
 324         ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
 325         ; emitSimultaneously assts
 326         ; doFinalJump final_sp False{-not a LNE-} emitReturnInstr }
 327
 328 pushUnboxedTuple :: VirtualSpOffset             -- Sp at which to start pushing
 329                  -> [(CgRep, CmmExpr)]          -- amodes of the components
 330                  -> FCode (VirtualSpOffset,     -- final Sp
 331                            CmmStmts)            -- assignments (regs+stack)
 332
 333 pushUnboxedTuple sp []
 334   = return (sp, noStmts)
 335 pushUnboxedTuple sp amodes
 336   = do  { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
 337
 338                 -- separate the rest of the args into pointers and non-pointers
 339                 (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
 340                 reg_arg_assts = assignToRegs reg_arg_amodes
 341
 342             -- push ptrs, then nonptrs, on the stack
 343         ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
 344         ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
 345
 346         ; returnFC (final_sp,
 347                     reg_arg_assts `plusStmts`
 348                     ptr_assts `plusStmts` nptr_assts) }
 349
 350
 351 -- -----------------------------------------------------------------------------
 352 -- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
 353 -- we want to do things in a slightly different order to normal:
 354 --
 355 --              - push return address
 356 --              - adjust stack pointer
 357 --              - r = call(args...)
 358 --              - assign regs for unboxed tuple (usually just R1 = r)
 359 --              - return to continuation
 360 --
 361 -- The return address (i.e. stack frame) must be on the stack before
 362 -- doing the call in case the call ends up in the garbage collector.
 363 --
 364 -- Sadly, the information about the continuation is lost after we push it
 365 -- (in order to avoid pushing it again), so we end up doing a needless
 366 -- indirect jump (ToDo).
 367
 368 ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
 369 ccallReturnUnboxedTuple amodes before_jump
 370   = do  { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
 371
 372         -- Push a return address if necessary
 373         ; pushReturnAddress eob
 374         ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
 375             (do { adjustSpAndHp args_sp
 376                 ; before_jump
 377                 ; returnUnboxedTuple amodes })
 378     }
 379
 380 -- -----------------------------------------------------------------------------
 381 -- Calling an out-of-line primop
 382
 383 tailCallPrimOp :: PrimOp -> [StgArg] -> Code
 384 tailCallPrimOp op args
 385  = do   {       -- We're going to perform a normal-looking tail call,
 386                 -- except that *all* the arguments will be in registers.
 387                 -- Hence the ASSERT( null leftovers )
 388           arg_amodes <- getArgAmodes args
 389         ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
 390               jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
 391
 392         ; ASSERT(null leftovers) -- no stack-resident args
 393           emitSimultaneously (assignToRegs arg_regs)
 394
 395         ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
 396         ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
 397
 398 -- -----------------------------------------------------------------------------
 399 -- Return Addresses
 400
 401 -- We always push the return address just before performing a tail call
 402 -- or return.  The reason we leave it until then is because the stack
 403 -- slot that the return address is to go into might contain something
 404 -- useful.
 405 --
 406 -- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
 407 -- case expression and the return address is still to be pushed.
 408 --
 409 -- There are cases where it doesn't look necessary to push the return
 410 -- address: for example, just before doing a return to a known
 411 -- continuation.  However, the continuation will expect to find the
 412 -- return address on the stack in case it needs to do a heap check.
 413
 414 pushReturnAddress :: EndOfBlockInfo -> Code
 415
 416 pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _))
 417   = do  { sp_rel <- getSpRelOffset args_sp
 418         ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
 419
 420 pushReturnAddress _ = nopC
 421
 422 -- -----------------------------------------------------------------------------
 423 -- Misc.
 424
 425 jumpToLbl :: CLabel -> Code
 426 -- Passes no argument to the destination procedure
 427 jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
 428
 429 assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
 430 assignToRegs reg_args
 431   = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
 432             | (expr, reg_id) <- reg_args ]
 433 \end{code}
 434
 435
 436 %************************************************************************
 437 %*                                                                      *
 438 \subsection[CgStackery-adjust]{Adjusting the stack pointers}
 439 %*                                                                      *
 440 %************************************************************************
 441
 442 This function adjusts the stack and heap pointers just before a tail
 443 call or return.  The stack pointer is adjusted to its final position
 444 (i.e. to point to the last argument for a tail call, or the activation
 445 record for a return).  The heap pointer may be moved backwards, in
 446 cases where we overallocated at the beginning of the basic block (see
 447 CgCase.lhs for discussion).
 448
 449 These functions {\em do not} deal with high-water-mark adjustment.
 450 That's done by functions which allocate stack space.
 451
 452 \begin{code}
 453 adjustSpAndHp :: VirtualSpOffset        -- New offset for Arg stack ptr
 454               -> Code
 455 adjustSpAndHp newRealSp
 456   = do  { -- Adjust stack, if necessary.
 457           -- NB: the conditional on the monad-carried realSp
 458           --     is out of line (via codeOnly), to avoid a black hole
 459         ; new_sp <- getSpRelOffset newRealSp
 460         ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
 461         ; setRealSp newRealSp                   -- where realSp==newRealSp
 462
 463           -- Adjust heap.  The virtual heap pointer may be less than the real Hp
 464           -- because the latter was advanced to deal with the worst-case branch
 465           -- of the code, and we may be in a better-case branch.  In that case,
 466           -- move the real Hp *back* and retract some ticky allocation count.
 467         ; hp_usg <- getHpUsage
 468         ; let rHp = realHp hp_usg
 469               vHp = virtHp hp_usg
 470         ; new_hp <- getHpRelOffset vHp
 471         ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
 472         ; tickyAllocHeap (vHp - rHp)            -- ...ditto
 473         ; setRealHp vHp
 474         }
 475 \end{code}
 476
 477 Some things are unused.
 478 \begin{code}
 479 _unused :: FS.FastString
 480 _unused = undefined
 481 \end{code}