[project @ 2004-10-20 11:16:08 by simonmar]

[ghc-hetmet.git] / ghc / compiler / codeGen / CgTailCall.lhs
diff --git a/ghc/compiler/codeGen/CgTailCall.lhs b/ghc/compiler/codeGen/CgTailCall.lhs

index a22ca46..98c075d 100644 (file)
--- a/ghc/compiler/codeGen/CgTailCall.lhs
+++ b/ghc/compiler/codeGen/CgTailCall.lhs
@@ -1,5 +1,7 @@
  %
-% (c) The GRASP/AQUA Project, Glasgow University, 1992-1995
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgTailCall.lhs,v 1.40 2004/09/30 10:35:50 simonpj Exp $
  %
  %********************************************************
  %*                                                     *
@@ -8,524 +10,448 @@
  %********************************************************
  
  \begin{code}
-#include "HsVersions.h"
-
  module CgTailCall (
-       cgTailCall,
-       performReturn,
-       mkStaticAlgReturnCode, mkDynamicAlgReturnCode,
-       mkPrimReturnCode,
-
-       tailCallBusiness
-
-       -- and to make the interface self-sufficient...
+       cgTailCall, performTailCall,
+       performReturn, performPrimReturn,
+       emitKnownConReturnCode, emitAlgReturnCode,
+       returnUnboxedTuple, ccallReturnUnboxedTuple,
+       pushUnboxedTuple,
+       tailCallPrimOp,
+
+       pushReturnAddress
      ) where
  
-IMPORT_Trace
-import Pretty          -- Pretty/Outputable: rm (debugging only) ToDo
-import Outputable
+#include "HsVersions.h"
  
-import StgSyn
  import CgMonad
-import AbsCSyn
-
-import Type            ( isPrimType, Type )
-import CgBindery       ( getAtomAmodes, getCAddrMode, getCAddrModeAndInfo )
-import CgCompInfo      ( oTHER_TAG, iND_TAG )
-import CgRetConv       ( dataReturnConvPrim, ctrlReturnConvAlg, dataReturnConvAlg,
-                         mkLiveRegsBitMask,
-                         CtrlReturnConvention(..), DataReturnConvention(..)
-                       )
-import CgStackery      ( adjustRealSps, mkStkAmodes )
-import CgUsages                ( getSpARelOffset, getSpBRelOffset )
-import CLabel  ( CLabel, mkStdUpdCodePtrVecLabel, mkConUpdCodePtrVecLabel )
-import ClosureInfo     ( nodeMustPointToIt, getEntryConvention, EntryConvention(..) )
-import CmdLineOpts     ( GlobalSwitch(..) )
-import Id              ( getDataConTyCon, getDataConTag,
-                         idType, getIdPrimRep, fIRST_TAG, Id,
-                         ConTag(..)
-                       )
-import Maybes          ( assocMaybe, maybeToBool, Maybe(..) )
-import PrimRep         ( retPrimRepSize )
-import Util
-\end{code}
-
-%************************************************************************
-%*                                                                     *
-\subsection[tailcall-doc]{Documentation}
-%*                                                                     *
-%************************************************************************
-
-\begin{code}
-cgTailCall :: StgArg -> [StgArg] -> StgLiveVars -> Code
-\end{code}
-
-Here's the code we generate for a tail call.  (NB there may be no
-arguments, in which case this boils down to just entering a variable.)
-
-\begin{itemize}
-\item  Adjust the stack ptr to \tr{tailSp + #args}.
-\item  Put args in the top locations of the resulting stack.
-\item  Make Node point to the function closure.
-\item  Enter the function closure.
-\end{itemize}
-
-Things to be careful about:
-\begin{itemize}
-\item  Don't overwrite stack locations before you have finished with
-       them (remember you need the function and the as-yet-unmoved
-       arguments).
-\item  Preferably, generate no code to replace x by x on the stack (a
-       common situation in tail-recursion).
-\item  Adjust the stack high water mark appropriately.
-\end{itemize}
-
-Literals are similar to constructors; they return by putting
-themselves in an appropriate register and returning to the address on
-top of the B stack.
-
-\begin{code}
-cgTailCall (StgLitArg lit) [] live_vars
-  = performPrimReturn (CLit lit) live_vars
-\end{code}
-
-Treat unboxed locals exactly like literals (above) except use the addr
-mode for the local instead of (CLit lit) in the assignment.
-
-Case for unboxed @Ids@ first:
-\begin{code}
-cgTailCall atom@(StgVarArg fun) [] live_vars
-  | isPrimType (idType fun)
-  = getCAddrMode fun `thenFC` \ amode ->
-    performPrimReturn amode live_vars
-\end{code}
-
-The general case (@fun@ is boxed):
-\begin{code}
-cgTailCall (StgVarArg fun) args live_vars = performTailCall fun args live_vars
-\end{code}
-
-%************************************************************************
-%*                                                                     *
-\subsection[return-and-tail-call]{Return and tail call}
-%*                                                                     *
-%************************************************************************
-
-ADR-HACK
-
-  A quick bit of hacking to try to solve my void#-leaking blues...
-
-  I think I'm getting bitten by this stuff because code like
-
-  \begin{pseudocode}
-         case ds.s12 :: IoWorld of {
-             -- lvs: [ds.s12]; rhs lvs: []; uniq: c0
-           IoWorld ds.s13# -> ds.s13#;
-         } :: Universe#
-  \end{pseudocode}
-
-  causes me to try to allocate a register to return the result in.  The
-  hope is that the following will avoid such problems (and that Will
-  will do this in a cleaner way when he hits the same problem).
-
-KCAH-RDA
-
-\begin{code}
-performPrimReturn :: CAddrMode -- The thing to return
-                 -> StgLiveVars
-                 -> Code
-
-performPrimReturn amode live_vars
-  = let
-       kind = getAmodeRep amode
-       ret_reg = dataReturnConvPrim kind
-
-       assign_possibly = case kind of
-         VoidRep -> AbsCNop
-         kind -> (CAssign (CReg ret_reg) amode)
-    in
-    performReturn assign_possibly mkPrimReturnCode live_vars
-
-mkPrimReturnCode :: Sequel -> Code
-mkPrimReturnCode (UpdateCode _)        = panic "mkPrimReturnCode: Upd"
-mkPrimReturnCode sequel                = sequelToAmode sequel  `thenFC` \ dest_amode ->
-                                 absC (CReturn dest_amode DirectReturn)
-                                 -- Direct, no vectoring
-
--- All constructor arguments in registers; Node and InfoPtr are set.
--- All that remains is
---     (a) to set TagReg, if necessary
---     (b) to set InfoPtr to the info ptr, if necessary
---     (c) to do the right sort of jump.
-
-mkStaticAlgReturnCode :: Id            -- The constructor
-                     -> Maybe CLabel   -- The info ptr, if it isn't already set
-                     -> Sequel         -- where to return to
-                     -> Code
-
-mkStaticAlgReturnCode con maybe_info_lbl sequel
-  =    -- Generate profiling code if necessary
-    (case return_convention of
-       VectoredReturn sz -> profCtrC SLIT("VEC_RETURN") [mkIntCLit sz]
-       other             -> nopC
-    )                                  `thenC`
-
-       -- Set tag if necessary
-       -- This is done by a macro, because if we are short of registers
-       -- we don't set TagReg; instead the continuation gets the tag
-       -- by indexing off the info ptr
-    (case return_convention of
-
-       UnvectoredReturn no_of_constrs
-        | no_of_constrs > 1
-               -> absC (CMacroStmt SET_TAG [mkIntCLit zero_indexed_tag])
-
-       other   -> nopC
-    )                                  `thenC`
-
-       -- Generate the right jump or return
-    (case sequel of
-       UpdateCode _ -> -- Ha!  We know the constructor,
-                       -- so we can go direct to the correct
-                       -- update code for that constructor
-
-                               -- Set the info pointer, and jump
-                       set_info_ptr            `thenC`
-                       getIntSwitchChkrC       `thenFC` \ isw_chkr ->
-                       absC (CJump (CLbl (update_label isw_chkr) CodePtrRep))
-
-       CaseAlts _ (Just (alts, _)) ->  -- Ho! We know the constructor so
-                                       -- we can go right to the alternative
-
-                       -- No need to set info ptr when returning to a
-                       -- known join point. After all, the code at
-                       -- the destination knows what constructor it
-                       -- is going to handle.
-
-                       case assocMaybe alts tag of
-                          Just (alt_absC, join_lbl) -> absC (CJump (CLbl join_lbl CodePtrRep))
-                          Nothing                   -> panic "mkStaticAlgReturnCode: default"
-                               -- The Nothing case should never happen; it's the subject
-                               -- of a wad of special-case code in cgReturnCon
-
-       other ->        -- OnStack, or (CaseAlts) ret_amode Nothing)
-                       -- Set the info pointer, and jump
-                   set_info_ptr                `thenC`
-                   sequelToAmode sequel        `thenFC` \ ret_amode ->
-                   absC (CReturn ret_amode return_info)
-    )
+import CgBindery       ( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape,
+                         idInfoToAmode, cgIdInfoId, cgIdInfoLF,
+                         cgIdInfoArgRep )
+import CgInfoTbls      ( entryCode, emitDirectReturnInstr, dataConTagZ,
+                         emitVectoredReturnInstr, closureInfoPtr )
+import CgCallConv
+import CgStackery      ( setRealSp, mkStkAmodes, adjustStackHW,
+                         getSpRelOffset )
+import CgHeapery       ( setRealHp, getHpRelOffset )
+import CgUtils         ( emitSimultaneously )
+import CgTicky
+import ClosureInfo
+import SMRep           ( CgRep, isVoidArg, separateByPtrFollowness )
+import Cmm     
+import CmmUtils
+import CLabel          ( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel )
+import Type            ( isUnLiftedType )
+import Id              ( Id, idName, idUnique, idType )
+import DataCon         ( DataCon, dataConTyCon )
+import StgSyn          ( StgArg )
+import TyCon            ( TyCon )
+import PrimOp          ( PrimOp )
+import Outputable
  
+import Monad           ( when )
+
+-----------------------------------------------------------------------------
+-- Tail Calls
+
+cgTailCall :: Id -> [StgArg] -> Code
+
+-- Here's the code we generate for a tail call.  (NB there may be no
+-- arguments, in which case this boils down to just entering a variable.)
+-- 
+--    *        Put args in the top locations of the stack.
+--    *        Adjust the stack ptr
+--    *        Make R1 point to the function closure if necessary.
+--    *        Perform the call.
+--
+-- Things to be careful about:
+--
+--    *        Don't overwrite stack locations before you have finished with
+--     them (remember you need the function and the as-yet-unmoved
+--     arguments).
+--    *        Preferably, generate no code to replace x by x on the stack (a
+--     common situation in tail-recursion).
+--    *        Adjust the stack high water mark appropriately.
+-- 
+-- Treat unboxed locals exactly like literals (above) except use the addr
+-- mode for the local instead of (CLit lit) in the assignment.
+
+cgTailCall fun args
+  = do { fun_info <- getCgIdInfo fun
+
+       ; if isUnLiftedType (idType fun)
+         then  -- Primitive return
+               ASSERT( null args )
+           do  { fun_amode <- idInfoToAmode fun_info
+               ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } 
+
+         else -- Normal case, fun is boxed
+           do  { arg_amodes <- getArgAmodes args
+               ; performTailCall fun_info arg_amodes noStmts }
+       }
+               
+
+-- -----------------------------------------------------------------------------
+-- The guts of a tail-call
+
+performTailCall 
+       :: CgIdInfo             -- The function
+       -> [(CgRep,CmmExpr)]    -- Args
+       -> CmmStmts             -- Pending simultaneous assignments
+                               -- *** GUARANTEED to contain only stack assignments.
+       -> Code
+
+performTailCall fun_info arg_amodes pending_assts
+  | Just join_sp <- maybeLetNoEscape fun_info
+  =       -- A let-no-escape is slightly different, because we
+          -- arrange the stack arguments into pointers and non-pointers
+          -- to make the heap check easier.  The tail-call sequence
+          -- is very similar to returning an unboxed tuple, so we
+          -- share some code.
+     do        { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
+       ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
+       ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
+       ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
+
+  | otherwise
+  = do         { fun_amode <- idInfoToAmode fun_info
+       ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
+             opt_node_asst | nodeMustPointToIt lf_info = node_asst
+                           | otherwise                 = noStmts
+       ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
+
+       ; case (getCallMethod fun_name lf_info (length arg_amodes)) of
+
+           -- Node must always point to things we enter
+           EnterIt -> do
+               { emitSimultaneously (node_asst `plusStmts` pending_assts) 
+               ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
+               ; doFinalJump sp False (stmtC (CmmJump target [])) }
+    
+           -- A function, but we have zero arguments.  It is already in WHNF,
+           -- so we can just return it.  
+           -- As with any return, Node must point to it.
+           ReturnIt -> do
+               { emitSimultaneously (node_asst `plusStmts` pending_assts)
+               ; doFinalJump sp False emitDirectReturnInstr }
+    
+           -- A real constructor.  Don't bother entering it, 
+           -- just do the right sort of return instead.
+           -- As with any return, Node must point to it.
+           ReturnCon con -> do
+               { emitSimultaneously (node_asst `plusStmts` pending_assts)
+               ; doFinalJump sp False (emitKnownConReturnCode con) }
+
+           JumpToIt lbl -> do
+               { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
+               ; doFinalJump sp False (jumpToLbl lbl) }
+    
+           -- A slow function call via the RTS apply routines
+           -- Node must definitely point to the thing
+           SlowCall -> do 
+               { let (apply_lbl, new_amodes) = constructSlowCall arg_amodes
+
+                   -- Fill in all the arguments on the stack
+               ; (final_sp,stk_assts) <- mkStkAmodes sp new_amodes
+    
+               ; emitSimultaneously (node_asst `plusStmts` stk_assts 
+                                               `plusStmts` pending_assts)
+
+               ; when (not (null arg_amodes)) $ do
+                  { if (isKnownFun lf_info) 
+                       then tickyKnownCallTooFewArgs
+                       else tickyUnknownCall
+                  ; tickySlowCallPat (map fst arg_amodes)
+                 } 
+
+               ; doFinalJump (final_sp + 1)
+                       -- Add one, because the stg_ap functions
+                       -- expect there to be a free slot on the stk
+                     False (jumpToLbl apply_lbl)
+               }
+    
+           -- A direct function call (possibly with some left-over arguments)
+           DirectEntry lbl arity -> do
+               { let
+                    -- The args beyond the arity go straight on the stack
+                    (arity_args, extra_stk_args) = splitAt arity arg_amodes
+     
+                    -- First chunk of args go in registers
+                    (reg_arg_amodes, stk_args) = assignCallRegs arity_args
+     
+                    -- Any "extra" arguments are placed in frames on the
+                    -- stack after the other arguments.
+                    slow_stk_args = slowArgs extra_stk_args
+     
+                    reg_assts = assignToRegs reg_arg_amodes
+
+               ; if null slow_stk_args
+                       then tickyKnownCallExact
+                       else do tickyKnownCallExtraArgs
+                               tickySlowCallPat (map fst extra_stk_args)
+
+               ; (final_sp, stk_assts) <- mkStkAmodes sp 
+                                               (stk_args ++ slow_stk_args)
+
+               ; emitSimultaneously (opt_node_asst `plusStmts` 
+                                     reg_assts     `plusStmts`
+                                     stk_assts     `plusStmts`
+                                     pending_assts)
+
+               ; doFinalJump final_sp False (jumpToLbl lbl) }
+       }
    where
-    tag                      = getDataConTag con
-    tycon            = getDataConTyCon con
-    return_convention = ctrlReturnConvAlg tycon
-    zero_indexed_tag  = tag - fIRST_TAG              -- Adjust tag to be zero-indexed
-                                             -- cf AbsCUtils.mkAlgAltsCSwitch
-
-    update_label isw_chkr
-      = case (dataReturnConvAlg isw_chkr con) of
-         ReturnInHeap   -> mkStdUpdCodePtrVecLabel tycon tag
-         ReturnInRegs _ -> mkConUpdCodePtrVecLabel tycon tag
+    fun_name  = idName (cgIdInfoId fun_info)
+    lf_info   = cgIdInfoLF fun_info
  
-    return_info = case return_convention of
-                       UnvectoredReturn _ -> DirectReturn
-                       VectoredReturn _   -> StaticVectoredReturn zero_indexed_tag
  
-    set_info_ptr = case maybe_info_lbl of
-                       Nothing       -> nopC
-                       Just info_lbl -> absC (CAssign (CReg infoptr) (CLbl info_lbl DataPtrRep))
  
+-- -----------------------------------------------------------------------------
+-- The final clean-up before we do a jump at the end of a basic block.
+-- This code is shared by tail-calls and returns.
  
-mkDynamicAlgReturnCode :: TyCon -> CAddrMode -> Sequel -> Code
+doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code 
+doFinalJump final_sp is_let_no_escape jump_code
+  = do { -- Adjust the high-water mark if necessary
+         adjustStackHW final_sp
  
-mkDynamicAlgReturnCode tycon dyn_tag sequel
-  = case ctrlReturnConvAlg tycon of
-       VectoredReturn sz ->
-
-               profCtrC SLIT("VEC_RETURN") [mkIntCLit sz] `thenC`
-               sequelToAmode sequel            `thenFC` \ ret_addr ->
-               absC (CReturn ret_addr (DynamicVectoredReturn dyn_tag))
-
-       UnvectoredReturn no_of_constrs ->
+       -- Push a return address if necessary (after the assignments
+       -- above, in case we clobber a live stack location)
+       --
+       -- DONT push the return address when we're about to jump to a
+       -- let-no-escape: the final tail call in the let-no-escape
+       -- will do this.
+       ; eob <- getEndOfBlockInfo
+       ; whenC (not is_let_no_escape) (pushReturnAddress eob)
  
-               -- Set tag if necessary
-               -- This is done by a macro, because if we are short of registers
-               -- we don't set TagReg; instead the continuation gets the tag
-               -- by indexing off the info ptr
-               (if no_of_constrs > 1 then
-                       absC (CMacroStmt SET_TAG [dyn_tag])
-               else
-                       nopC
-               )                       `thenC`
+           -- Final adjustment of Sp/Hp
+       ; adjustSpAndHp final_sp
  
+           -- and do the jump
+       ; jump_code }
  
-               sequelToAmode sequel            `thenFC` \ ret_addr ->
-               -- Generate the right jump or return
-               absC (CReturn ret_addr DirectReturn)
-\end{code}
+-- -----------------------------------------------------------------------------
+-- A general return (just a special case of doFinalJump, above)
  
-\begin{code}
-performReturn :: AbstractC         -- Simultaneous assignments to perform
-             -> (Sequel -> Code)   -- The code to execute to actually do
-                                   -- the return, given an addressing mode
-                                   -- for the return address
-             -> StgLiveVars
+performReturn :: Code          -- The code to execute to actually do the return
               -> Code
  
-performReturn sim_assts finish_code live_vars
-  = getEndOfBlockInfo  `thenFC` \ (EndOfBlockInfo args_spa args_spb sequel) ->
-
-       -- Do the simultaneous assignments,
-    doSimAssts args_spa live_vars sim_assts    `thenC`
-
-       -- Adjust stack pointers
-    adjustRealSps args_spa args_spb    `thenC`
-
-       -- Do the return
-    finish_code sequel         -- "sequel" is `robust' in that it doesn't
-                               -- depend on stk-ptr values
-\end{code}
-
-\begin{code}
-performTailCall :: Id                  -- Function
-               -> [StgArg]     -- Args
-               -> StgLiveVars
-               -> Code
-
-performTailCall fun args live_vars
-  =    -- Get all the info we have about the function and args and go on to
-       -- the business end
-    getCAddrModeAndInfo fun    `thenFC` \ (fun_amode, lf_info) ->
-    getAtomAmodes args         `thenFC` \ arg_amodes ->
-
-    tailCallBusiness
-               fun fun_amode lf_info arg_amodes
-               live_vars AbsCNop {- No pending assignments -}
-
-
-tailCallBusiness :: Id -> CAddrMode    -- Function and its amode
-                -> LambdaFormInfo      -- Info about the function
-                -> [CAddrMode]         -- Arguments
-                -> StgLiveVars -- Live in continuation
-
-                -> AbstractC           -- Pending simultaneous assignments
-                                       -- *** GUARANTEED to contain only stack assignments.
-                                       --     In ptic, we don't need to look in here to
-                                       --     discover all live regs
-
-                -> Code
-
-tailCallBusiness fun fun_amode lf_info arg_amodes live_vars pending_assts
-  = isSwitchSetC EmitArityChecks               `thenFC` \ do_arity_chks ->
-
-    nodeMustPointToIt lf_info                  `thenFC` \ node_points ->
-    getEntryConvention fun lf_info
-       (map getAmodeRep arg_amodes)            `thenFC` \ entry_conv ->
-
-    getEndOfBlockInfo  `thenFC` \ (EndOfBlockInfo args_spa args_spb sequel) ->
-
-    let
-       node_asst
-         = if node_points then
-               CAssign (CReg node) fun_amode
-           else
-               AbsCNop
-
-       (arg_regs, finish_code)
-         = case entry_conv of
-             ViaNode                     ->
-               ([],
-                    mkAbstractCs [
-                       CCallProfCtrMacro SLIT("ENT_VIA_NODE") [],
-                       CAssign (CReg infoptr)
-
-                               (CMacroExpr DataPtrRep INFO_PTR [CReg node]),
-                       CJump (CMacroExpr CodePtrRep ENTRY_CODE [CReg infoptr])
-                    ])
-             StdEntry lbl Nothing        -> ([], CJump (CLbl lbl CodePtrRep))
-             StdEntry lbl (Just itbl)    -> ([], CAssign (CReg infoptr) (CLbl itbl DataPtrRep)
-                                                    `mkAbsCStmts`
-                                                 CJump (CLbl lbl CodePtrRep))
-             DirectEntry lbl arity regs  ->
-               (regs,   (if do_arity_chks
-                         then CMacroStmt SET_ARITY [mkIntCLit arity]
-                         else AbsCNop)
-                        `mkAbsCStmts` CJump (CLbl lbl CodePtrRep))
-
-       no_of_args = length arg_amodes
-
-       (reg_arg_assts, stk_arg_amodes)
-           = (mkAbstractCs (zipWithEqual assign_to_reg arg_regs arg_amodes),
-                       drop (length arg_regs) arg_amodes) -- No regs, or
-                                                          -- args beyond arity
-
-       assign_to_reg reg_id amode = CAssign (CReg reg_id) amode
-    in
-    case fun_amode of
-      CJoinPoint join_spa join_spb ->  -- Ha!  A let-no-escape thingy
-
-         ASSERT(not (args_spa > join_spa) || (args_spb > join_spb))
-             -- If ASSERTion fails: Oops: the join point has *lower*
-             -- stack ptrs than the continuation Note that we take
-             -- the SpB point without the return address here.  The
-             -- return address is put on by the let-no-escapey thing
-             -- when it finishes.
-
-         mkStkAmodes join_spa join_spb stk_arg_amodes
-                     `thenFC` \ (final_spa, final_spb, stk_arg_assts) ->
-
-               -- Do the simultaneous assignments,
-         doSimAssts join_spa live_vars
-               (mkAbstractCs [pending_assts, reg_arg_assts, stk_arg_assts])
-                       `thenC`
-
-               -- Adjust stack ptrs
-         adjustRealSps final_spa final_spb     `thenC`
-
-               -- Jump to join point
-         absC finish_code
-
-      _ -> -- else: not a let-no-escape (the common case)
-
-               -- Make instruction to save return address
-           loadRetAddrIntoRetReg sequel        `thenFC` \ ret_asst ->
-
-           mkStkAmodes args_spa args_spb stk_arg_amodes
-                                               `thenFC`
-                           \ (final_spa, final_spb, stk_arg_assts) ->
-
-               -- The B-stack space for the pushed return addess, with any args pushed
-               -- on top, is recorded in final_spb.
-
-               -- Do the simultaneous assignments,
-           doSimAssts args_spa live_vars
-               (mkAbstractCs [pending_assts, node_asst, ret_asst,
-                              reg_arg_assts, stk_arg_assts])
-                                               `thenC`
-
-               -- Final adjustment of stack pointers
-           adjustRealSps final_spa final_spb   `thenC`
-
-               -- Now decide about semi-tagging
-           isSwitchSetC DoSemiTagging          `thenFC` \ semi_tagging_on ->
-           case (semi_tagging_on, arg_amodes, node_points, sequel) of
+performReturn finish_code
+  = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
+       ; doFinalJump args_sp False{-not a LNE-} finish_code }
  
-       --
-       -- *************** The semi-tagging case ***************
-       --
-             (   True,            [],          True,        CaseAlts _ (Just (st_alts, maybe_deflt_join_details))) ->
-
-               -- Whoppee!  Semi-tagging rules OK!
-               -- (a) semi-tagging is switched on
-               -- (b) there are no arguments,
-               -- (c) Node points to the closure
-               -- (d) we have a case-alternative sequel with
-               --      some visible alternatives
-
-               -- Why is test (c) necessary?
-               -- Usually Node will point to it at this point, because we're
-               -- scrutinsing something which is either a thunk or a
-               -- constructor.
-               -- But not always!  The example I came across is when we have
-               -- a top-level Double:
-               --      lit.3 = D# 3.000
-               --      ... (case lit.3 of ...) ...
-               -- Here, lit.3 is built as a re-entrant thing, which you must enter.
-               -- (OK, the simplifier should have eliminated this, but it's
-               --  easy to deal with the case anyway.)
-               let
-                   join_details_to_code (load_regs_and_profiling_code, join_lbl)
-                       = load_regs_and_profiling_code          `mkAbsCStmts`
-                         CJump (CLbl join_lbl CodePtrRep)
-
-                   semi_tagged_alts = [ (mkMachInt (toInteger (tag - fIRST_TAG)),
-                                         join_details_to_code join_details)
-                                      | (tag, join_details) <- st_alts
-                                      ]
-
-                   enter_jump
-                     -- Enter Node (we know infoptr will have the info ptr in it)!
-                     = mkAbstractCs [
-                       CCallProfCtrMacro SLIT("RET_SEMI_FAILED")
-                                       [CMacroExpr IntRep INFO_TAG [CReg infoptr]],
-                       CJump (CMacroExpr CodePtrRep ENTRY_CODE [CReg infoptr]) ]
-               in
-                       -- Final switch
-               absC (mkAbstractCs [
-                           CAssign (CReg infoptr)
-                                   (CVal (NodeRel zeroOff) DataPtrRep),
-
-                           case maybe_deflt_join_details of
-                               Nothing ->
-                                   CSwitch (CMacroExpr IntRep INFO_TAG [CReg infoptr])
-                                       (semi_tagged_alts)
-                                       (enter_jump)
-                               Just (_, details) ->
-                                   CSwitch (CMacroExpr IntRep EVAL_TAG [CReg infoptr])
-                                    [(mkMachInt 0, enter_jump)]
-                                    (CSwitch
-                                        (CMacroExpr IntRep INFO_TAG [CReg infoptr])
-                                        (semi_tagged_alts)
-                                        (join_details_to_code details))
-               ])
+-- -----------------------------------------------------------------------------
+-- Primitive Returns
+-- Just load the return value into the right register, and return.
  
-       --
-       -- *************** The non-semi-tagging case ***************
-       --
-             other -> absC finish_code
+performPrimReturn :: CgRep -> CmmExpr  -- The thing to return
+                 -> Code
+performPrimReturn rep amode
+  =  do { whenC (not (isVoidArg rep))
+               (stmtC (CmmAssign ret_reg amode))
+       ; performReturn emitDirectReturnInstr }
+  where
+    ret_reg = dataReturnConvPrim rep
+
+-- -----------------------------------------------------------------------------
+-- Algebraic constructor returns
+
+-- Constructor is built on the heap; Node is set.
+-- All that remains is to do the right sort of jump.
+
+emitKnownConReturnCode :: DataCon -> Code
+emitKnownConReturnCode con
+  = emitAlgReturnCode (dataConTyCon con)
+                     (CmmLit (mkIntCLit (dataConTagZ con)))
+                       -- emitAlgReturnCode requires zero-indexed tag
+
+emitAlgReturnCode :: TyCon -> CmmExpr -> Code
+-- emitAlgReturnCode is used both by emitKnownConReturnCode,
+-- and by by PrimOps that return enumerated types (i.e.
+-- all the comparison operators).
+emitAlgReturnCode tycon tag
+ =  do { case ctrlReturnConvAlg tycon of
+           VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
+                                       ; emitVectoredReturnInstr tag }
+           UnvectoredReturn _    -> emitDirectReturnInstr 
+       }
+
+
+-- ---------------------------------------------------------------------------
+-- Unboxed tuple returns
+
+-- These are a bit like a normal tail call, except that:
+--
+--   - The tail-call target is an info table on the stack
+--
+--   - We separate stack arguments into pointers and non-pointers,
+--     to make it easier to leave things in a sane state for a heap check.
+--     This is OK because we can never partially-apply an unboxed tuple,
+--     unlike a function.  The same technique is used when calling
+--     let-no-escape functions, because they also can't be partially
+--     applied.
+
+returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
+returnUnboxedTuple amodes
+  = do         { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
+       ; tickyUnboxedTupleReturn (length amodes)
+       ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
+       ; emitSimultaneously assts
+       ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }
+
+pushUnboxedTuple :: VirtualSpOffset            -- Sp at which to start pushing
+                -> [(CgRep, CmmExpr)]          -- amodes of the components
+                -> FCode (VirtualSpOffset,     -- final Sp
+                          CmmStmts)            -- assignments (regs+stack)
+
+pushUnboxedTuple sp [] 
+  = return (sp, noStmts)
+pushUnboxedTuple sp amodes
+  = do { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
+       
+               -- separate the rest of the args into pointers and non-pointers
+               (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
+               reg_arg_assts = assignToRegs reg_arg_amodes
+               
+           -- push ptrs, then nonptrs, on the stack
+       ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
+       ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
+
+       ; returnFC (final_sp,
+                   reg_arg_assts `plusStmts` 
+                   ptr_assts `plusStmts` nptr_assts) }
+    
+                 
+-- -----------------------------------------------------------------------------
+-- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
+-- we want to do things in a slightly different order to normal:
+-- 
+--             - push return address
+--             - adjust stack pointer
+--             - r = call(args...)
+--             - assign regs for unboxed tuple (usually just R1 = r)
+--             - return to continuation
+-- 
+-- The return address (i.e. stack frame) must be on the stack before
+-- doing the call in case the call ends up in the garbage collector.
+-- 
+-- Sadly, the information about the continuation is lost after we push it
+-- (in order to avoid pushing it again), so we end up doing a needless
+-- indirect jump (ToDo).
+
+ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
+ccallReturnUnboxedTuple amodes before_jump
+  = do         { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
+
+       -- Push a return address if necessary
+       ; pushReturnAddress eob
+       ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
+           (do { adjustSpAndHp args_sp
+               ; before_jump
+               ; returnUnboxedTuple amodes })
+    }
+
+-- -----------------------------------------------------------------------------
+-- Calling an out-of-line primop
+
+tailCallPrimOp :: PrimOp -> [StgArg] -> Code
+tailCallPrimOp op args
+ = do  {       -- We're going to perform a normal-looking tail call, 
+               -- except that *all* the arguments will be in registers.
+               -- Hence the ASSERT( null leftovers )
+         arg_amodes <- getArgAmodes args
+       ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
+             jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
+
+       ; ASSERT(null leftovers) -- no stack-resident args
+         emitSimultaneously (assignToRegs arg_regs)
+
+       ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
+       ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
+
+-- -----------------------------------------------------------------------------
+-- Return Addresses
+
+-- | We always push the return address just before performing a tail call
+-- or return.  The reason we leave it until then is because the stack
+-- slot that the return address is to go into might contain something
+-- useful.
+-- 
+-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
+-- case expression and the return address is still to be pushed.
+-- 
+-- There are cases where it doesn't look necessary to push the return
+-- address: for example, just before doing a return to a known
+-- continuation.  However, the continuation will expect to find the
+-- return address on the stack in case it needs to do a heap check.
+
+pushReturnAddress :: EndOfBlockInfo -> Code
+
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
+  = do { sp_rel <- getSpRelOffset args_sp
+       ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
+
+-- For a polymorphic case, we have two return addresses to push: the case
+-- return, and stg_seq_frame_info which turns a possible vectored return
+-- into a direct one.
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
+  = do { sp_rel <- getSpRelOffset (args_sp-1)
+       ; stmtC (CmmStore sp_rel (mkLblExpr lbl))
+       ; sp_rel <- getSpRelOffset args_sp
+       ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }
+
+pushReturnAddress _ = nopC
+
+-- -----------------------------------------------------------------------------
+-- Misc.
+
+jumpToLbl :: CLabel -> Code
+-- Passes no argument to the destination procedure
+jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
+
+assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
+assignToRegs reg_args 
+  = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
+           | (expr, reg_id) <- reg_args ] 
  \end{code}
  
-\begin{code}
-loadRetAddrIntoRetReg :: Sequel -> FCode AbstractC
-
-loadRetAddrIntoRetReg InRetReg
-  = returnFC AbsCNop  -- Return address already there
-
-loadRetAddrIntoRetReg sequel
-  = sequelToAmode sequel      `thenFC` \ amode ->
-    returnFC (CAssign (CReg RetReg) amode)
-
-\end{code}
  
  %************************************************************************
  %*                                                                     *
-\subsection[doSimAssts]{@doSimAssts@}
+\subsection[CgStackery-adjust]{Adjusting the stack pointers}
  %*                                                                     *
  %************************************************************************
  
-@doSimAssts@ happens at the end of every block of code.
-They are separate because we sometimes do some jiggery-pokery in between.
+This function adjusts the stack and heap pointers just before a tail
+call or return.  The stack pointer is adjusted to its final position
+(i.e. to point to the last argument for a tail call, or the activation
+record for a return).  The heap pointer may be moved backwards, in
+cases where we overallocated at the beginning of the basic block (see
+CgCase.lhs for discussion).
+
+These functions {\em do not} deal with high-water-mark adjustment.
+That's done by functions which allocate stack space.
  
  \begin{code}
-doSimAssts :: VirtualSpAOffset -- tail_spa: SpA as seen by continuation
-          -> StgLiveVars       -- Live in continuation
-          -> AbstractC
-          -> Code
-
-doSimAssts tail_spa live_vars sim_assts
-  =    -- Do the simultaneous assignments
-    absC (CSimultaneous sim_assts)     `thenC`
-
-       -- Stub any unstubbed slots; the only live variables are indicated in
-       -- the end-of-block info in the monad
-    nukeDeadBindings live_vars         `thenC`
-    getUnstubbedAStackSlots tail_spa   `thenFC` \ a_slots ->
-       -- Passing in tail_spa here should actually be redundant, because
-       -- the stack should be trimmed (by nukeDeadBindings) to
-       -- exactly the tail_spa position anyhow.
-
-       -- Emit code to stub dead regs; this only generates actual
-       -- machine instructions in in the DEBUG version
-       -- *** NOT DONE YET ***
-
-    (if (null a_slots)
-     then nopC
-     else profCtrC SLIT("A_STK_STUB") [mkIntCLit (length a_slots)]     `thenC`
-         mapCs stub_A_slot a_slots
-    )
-  where
-    stub_A_slot :: VirtualSpAOffset -> Code
-    stub_A_slot offset = getSpARelOffset offset                `thenFC` \ spa_rel ->
-                        absC (CAssign  (CVal spa_rel PtrRep)
-                                       (CReg StkStubReg))
+adjustSpAndHp :: VirtualSpOffset       -- New offset for Arg stack ptr
+             -> Code
+adjustSpAndHp newRealSp 
+  = do { -- Adjust stack, if necessary.
+         -- NB: the conditional on the monad-carried realSp
+         --     is out of line (via codeOnly), to avoid a black hole
+       ; new_sp <- getSpRelOffset newRealSp
+       ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
+       ; setRealSp newRealSp                   -- where realSp==newRealSp
+
+         -- Adjust heap.  The virtual heap pointer may be less than the real Hp
+         -- because the latter was advanced to deal with the worst-case branch
+         -- of the code, and we may be in a better-case branch.  In that case,
+         -- move the real Hp *back* and retract some ticky allocation count.
+       ; hp_usg <- getHpUsage
+       ; let rHp = realHp hp_usg
+             vHp = virtHp hp_usg
+       ; new_hp <- getHpRelOffset vHp
+       ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
+       ; tickyAllocHeap (vHp - rHp)            -- ...ditto
+       ; setRealHp vHp
+       }
  \end{code}