X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FcodeGen%2FCgTailCall.lhs;h=dd7327b745f1d6ad41fb89414b049da1cb25c301;hb=04db0e9fa47ce4dfbcb73ec1752d94195f3b394e;hp=8dfd5f484a0f29a772334740b9746b46158f8e54;hpb=13386b66f4fcc1fbf2f7df13e8687510e857c848;p=ghc-hetmet.git

diff --git a/ghc/compiler/codeGen/CgTailCall.lhs b/ghc/compiler/codeGen/CgTailCall.lhs
index 8dfd5f4..dd7327b 100644
--- a/ghc/compiler/codeGen/CgTailCall.lhs
+++ b/ghc/compiler/codeGen/CgTailCall.lhs
@@ -1,7 +1,7 @@
 %
 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
 %
-% $Id: CgTailCall.lhs,v 1.31 2001/10/25 05:07:32 sof Exp $
+% $Id: CgTailCall.lhs,v 1.43 2005/06/21 10:44:41 simonmar Exp $
 %
 %********************************************************
 %*							*
@@ -11,15 +11,12 @@
 
 \begin{code}
 module CgTailCall (
-	cgTailCall,
+	cgTailCall, performTailCall,
 	performReturn, performPrimReturn,
-	mkStaticAlgReturnCode, mkDynamicAlgReturnCode,
-	mkUnboxedTupleReturnCode, returnUnboxedTuple,
-	mkPrimReturnCode,
-
-	tailCallFun,
+	emitKnownConReturnCode, emitAlgReturnCode,
+	returnUnboxedTuple, ccallReturnUnboxedTuple,
+	pushUnboxedTuple,
 	tailCallPrimOp,
-	doTailCall,
 
 	pushReturnAddress
     ) where
@@ -27,593 +24,432 @@ module CgTailCall (
 #include "HsVersions.h"
 
 import CgMonad
-import AbsCSyn
-import PprAbsC		( pprAmode )
-
-import AbsCUtils	( mkAbstractCs, getAmodeRep )
-import CgBindery	( getArgAmodes, getCAddrMode, getCAddrModeAndInfo )
-import CgRetConv	( dataReturnConvPrim,
-			  ctrlReturnConvAlg, CtrlReturnConvention(..),
-			  assignAllRegs, assignRegs
-			)
-import CgStackery	( mkTaggedStkAmodes, adjustStackHW )
-import CgUsages		( getSpRelOffset, adjustSpAndHp )
-import CgUpdate		( pushSeqFrame )
-import CLabel		( mkUpdInfoLabel, mkRtsPrimOpLabel )
-import ClosureInfo	( nodeMustPointToIt,
-			  getEntryConvention, EntryConvention(..), LambdaFormInfo
-			)
-import CmdLineOpts	( opt_DoSemiTagging )
-import Id		( Id, idType, idName )
-import DataCon		( DataCon, dataConTyCon, dataConTag, fIRST_TAG )
-import Maybes		( maybeToBool )
-import PrimRep		( PrimRep(..) )
-import StgSyn		( StgArg )
+import CgBindery	( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape,
+			  idInfoToAmode, cgIdInfoId, cgIdInfoLF,
+			  cgIdInfoArgRep )
+import CgInfoTbls	( entryCode, emitDirectReturnInstr, dataConTagZ,
+			  emitVectoredReturnInstr, closureInfoPtr )
+import CgCallConv
+import CgStackery	( setRealSp, mkStkAmodes, adjustStackHW,
+			  getSpRelOffset )
+import CgHeapery	( setRealHp, getHpRelOffset )
+import CgUtils		( emitSimultaneously )
+import CgTicky
+import ClosureInfo
+import SMRep		( CgRep, isVoidArg, separateByPtrFollowness )
+import Cmm	
+import CmmUtils
+import CLabel		( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel )
 import Type		( isUnLiftedType )
+import Id		( Id, idName, idUnique, idType )
+import DataCon		( DataCon, dataConTyCon )
+import StgSyn		( StgArg )
 import TyCon            ( TyCon )
 import PrimOp		( PrimOp )
-import Util		( zipWithEqual, splitAtList )
-import ListSetOps	( assocMaybe )
 import Outputable
-import Panic		( panic, assertPanic )
-\end{code}
 
-%************************************************************************
-%*									*
-\subsection[tailcall-doc]{Documentation}
-%*									*
-%************************************************************************
-
-\begin{code}
-cgTailCall :: Id -> [StgArg] -> Code
-\end{code}
-
-Here's the code we generate for a tail call.  (NB there may be no
-arguments, in which case this boils down to just entering a variable.)
-
-\begin{itemize}
-\item	Adjust the stack ptr to \tr{tailSp + #args}.
-\item	Put args in the top locations of the resulting stack.
-\item	Make Node point to the function closure.
-\item	Enter the function closure.
-\end{itemize}
-
-Things to be careful about:
-\begin{itemize}
-\item	Don't overwrite stack locations before you have finished with
-	them (remember you need the function and the as-yet-unmoved
-	arguments).
-\item	Preferably, generate no code to replace x by x on the stack (a
-	common situation in tail-recursion).
-\item	Adjust the stack high water mark appropriately.
-\end{itemize}
-
-Treat unboxed locals exactly like literals (above) except use the addr
-mode for the local instead of (CLit lit) in the assignment.
-
-Case for unboxed @Ids@ first:
-\begin{code}
-cgTailCall fun []
-  | isUnLiftedType (idType fun)
-  = getCAddrMode fun		`thenFC` \ amode ->
-    performPrimReturn (ppr fun) amode
-\end{code}
-
-The general case (@fun@ is boxed):
-\begin{code}
-cgTailCall fun args = performTailCall fun args
-\end{code}
-
-%************************************************************************
-%*									*
-\subsection[return-and-tail-call]{Return and tail call}
-%*									*
-%************************************************************************
+import Monad		( when )
 
-\begin{code}
-performPrimReturn :: SDoc	-- Just for debugging (sigh)
-		  -> CAddrMode	-- The thing to return
-		  -> Code
+-----------------------------------------------------------------------------
+-- Tail Calls
 
-performPrimReturn doc amode
-  = let
-	kind = getAmodeRep amode
-	ret_reg = WARN( case kind of { PtrRep -> True; other -> False }, text "primRet" <+> doc <+> pprAmode amode )
-		  dataReturnConvPrim kind
-
-	assign_possibly = case kind of
-	  VoidRep -> AbsCNop
-	  kind -> (CAssign (CReg ret_reg) amode)
-    in
-    performReturn assign_possibly (mkPrimReturnCode doc)
-
-mkPrimReturnCode :: SDoc 		-- Debugging only
-		 -> Sequel
-		 -> Code
-mkPrimReturnCode doc UpdateCode	= pprPanic "mkPrimReturnCode: Upd" doc
-mkPrimReturnCode doc sequel	= sequelToAmode sequel	`thenFC` \ dest_amode ->
-				  absC (CReturn dest_amode DirectReturn)
-				  -- Direct, no vectoring
+cgTailCall :: Id -> [StgArg] -> Code
 
--- Constructor is built on the heap; Node is set.
--- All that remains is
---	(a) to set TagReg, if necessary
---	(c) to do the right sort of jump.
-
-mkStaticAlgReturnCode :: DataCon	-- The constructor
-		      -> Sequel		-- where to return to
-		      -> Code
-
-mkStaticAlgReturnCode con sequel
-  =	-- Generate profiling code if necessary
-    (case return_convention of
-	VectoredReturn sz -> profCtrC SLIT("TICK_VEC_RETURN") [mkIntCLit sz]
-	other		  -> nopC
-    )					`thenC`
-
-	-- Set tag if necessary
-	-- This is done by a macro, because if we are short of registers
-	-- we don't set TagReg; instead the continuation gets the tag
-	-- by indexing off the info ptr
-    (case return_convention of
-
-	UnvectoredReturn no_of_constrs
-	 | no_of_constrs > 1
-		-> absC (CMacroStmt SET_TAG [mkIntCLit zero_indexed_tag])
-
-	other	-> nopC
-    )					`thenC`
-
-	-- Generate the right jump or return
-    (case sequel of
-	UpdateCode ->	-- Ha!  We can go direct to the update code,
-			-- (making sure to jump to the *correct* update
-			--  code.)
-    			absC (CReturn (CLbl mkUpdInfoLabel CodePtrRep)
-				      return_info)
-
-	CaseAlts _ (Just (alts, _)) ->	-- Ho! We know the constructor so
-					-- we can go right to the alternative
-
-		case assocMaybe alts tag of
-		   Just (alt_absC, join_lbl) -> 
-			absC (CJump (CLbl join_lbl CodePtrRep))
-		   Nothing -> panic "mkStaticAlgReturnCode: default"
-				-- The Nothing case should never happen; 
-				-- it's the subject of a wad of special-case 
-				-- code in cgReturnCon
-
-	-- can't be a SeqFrame, because we're returning a constructor
-
-	other ->	-- OnStack, or (CaseAlts ret_amode Nothing)
-		    sequelToAmode sequel	`thenFC` \ ret_amode ->
-		    absC (CReturn ret_amode return_info)
-    )
+-- Here's the code we generate for a tail call.  (NB there may be no
+-- arguments, in which case this boils down to just entering a variable.)
+-- 
+--    *	Put args in the top locations of the stack.
+--    *	Adjust the stack ptr
+--    *	Make R1 point to the function closure if necessary.
+--    *	Perform the call.
+--
+-- Things to be careful about:
+--
+--    *	Don't overwrite stack locations before you have finished with
+-- 	them (remember you need the function and the as-yet-unmoved
+-- 	arguments).
+--    *	Preferably, generate no code to replace x by x on the stack (a
+-- 	common situation in tail-recursion).
+--    *	Adjust the stack high water mark appropriately.
+-- 
+-- Treat unboxed locals exactly like literals (above) except use the addr
+-- mode for the local instead of (CLit lit) in the assignment.
+
+cgTailCall fun args
+  = do	{ fun_info <- getCgIdInfo fun
+
+	; if isUnLiftedType (idType fun)
+	  then 	-- Primitive return
+		ASSERT( null args )
+	    do	{ fun_amode <- idInfoToAmode fun_info
+		; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } 
+
+	  else -- Normal case, fun is boxed
+	    do  { arg_amodes <- getArgAmodes args
+		; performTailCall fun_info arg_amodes noStmts }
+	}
+		
+
+-- -----------------------------------------------------------------------------
+-- The guts of a tail-call
+
+performTailCall 
+	:: CgIdInfo		-- The function
+	-> [(CgRep,CmmExpr)]	-- Args
+	-> CmmStmts		-- Pending simultaneous assignments
+				--  *** GUARANTEED to contain only stack assignments.
+	-> Code
 
+performTailCall fun_info arg_amodes pending_assts
+  | Just join_sp <- maybeLetNoEscape fun_info
+  = 	   -- A let-no-escape is slightly different, because we
+	   -- arrange the stack arguments into pointers and non-pointers
+	   -- to make the heap check easier.  The tail-call sequence
+	   -- is very similar to returning an unboxed tuple, so we
+	   -- share some code.
+     do	{ (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
+	; emitSimultaneously (pending_assts `plusStmts` arg_assts)
+	; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
+	; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
+
+  | otherwise
+  = do 	{ fun_amode <- idInfoToAmode fun_info
+	; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
+	      opt_node_asst | nodeMustPointToIt lf_info = node_asst
+			    | otherwise		        = noStmts
+	; EndOfBlockInfo sp _ <- getEndOfBlockInfo
+	; hmods <- getHomeModules
+
+	; case (getCallMethod hmods fun_name lf_info (length arg_amodes)) of
+
+	    -- Node must always point to things we enter
+	    EnterIt -> do
+		{ emitSimultaneously (node_asst `plusStmts` pending_assts) 
+		; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
+		; doFinalJump sp False (stmtC (CmmJump target [])) }
+    
+	    -- A function, but we have zero arguments.  It is already in WHNF,
+	    -- so we can just return it.  
+	    -- As with any return, Node must point to it.
+	    ReturnIt -> do
+		{ emitSimultaneously (node_asst `plusStmts` pending_assts)
+		; doFinalJump sp False emitDirectReturnInstr }
+    
+	    -- A real constructor.  Don't bother entering it, 
+	    -- just do the right sort of return instead.
+	    -- As with any return, Node must point to it.
+	    ReturnCon con -> do
+		{ emitSimultaneously (node_asst `plusStmts` pending_assts)
+		; doFinalJump sp False (emitKnownConReturnCode con) }
+
+	    JumpToIt lbl -> do
+		{ emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
+		; doFinalJump sp False (jumpToLbl lbl) }
+    
+	    -- A slow function call via the RTS apply routines
+	    -- Node must definitely point to the thing
+	    SlowCall -> do 
+		{  when (not (null arg_amodes)) $ do
+		   { if (isKnownFun lf_info) 
+			then tickyKnownCallTooFewArgs
+			else tickyUnknownCall
+		   ; tickySlowCallPat (map fst arg_amodes) 
+		   }
+
+		; let (apply_lbl, args, extra_args) 
+			= constructSlowCall arg_amodes
+
+		; directCall sp apply_lbl args extra_args 
+			(node_asst `plusStmts` pending_assts)
+		}
+    
+	    -- A direct function call (possibly with some left-over arguments)
+	    DirectEntry lbl arity -> do
+		{ if arity == length arg_amodes
+			then tickyKnownCallExact
+			else do tickyKnownCallExtraArgs
+				tickySlowCallPat (map fst (drop arity arg_amodes))
+
+ 		; let
+		     -- The args beyond the arity go straight on the stack
+		     (arity_args, extra_args) = splitAt arity arg_amodes
+     
+		; directCall sp lbl arity_args extra_args
+			(opt_node_asst `plusStmts` pending_assts)
+	        }
+	}
   where
-    tag		      = dataConTag   con
-    tycon	      = dataConTyCon con
-    return_convention = ctrlReturnConvAlg tycon
-    zero_indexed_tag  = tag - fIRST_TAG	      -- Adjust tag to be zero-indexed
-					      -- cf AbsCUtils.mkAlgAltsCSwitch
-
-    return_info = 
-       case return_convention of
-		UnvectoredReturn _ -> DirectReturn
-		VectoredReturn _   -> StaticVectoredReturn zero_indexed_tag
+    fun_name  = idName (cgIdInfoId fun_info)
+    lf_info   = cgIdInfoLF fun_info
 
-mkUnboxedTupleReturnCode :: Sequel -> Code
-mkUnboxedTupleReturnCode sequel
-    = case sequel of
-	-- can't update with an unboxed tuple!
-	UpdateCode -> panic "mkUnboxedTupleReturnCode"
 
-	CaseAlts _ (Just ([(_,(alt_absC,join_lbl))], _)) ->
-			absC (CJump (CLbl join_lbl CodePtrRep))
 
-	-- can't be a SeqFrame
+directCall sp lbl args extra_args assts = do
+  let
+	-- First chunk of args go in registers
+	(reg_arg_amodes, stk_args) = assignCallRegs args
+     
+	-- Any "extra" arguments are placed in frames on the
+	-- stack after the other arguments.
+	slow_stk_args = slowArgs extra_args
 
-	other ->	-- OnStack, or (CaseAlts ret_amode something)
-		    sequelToAmode sequel	`thenFC` \ ret_amode ->
-		    absC (CReturn ret_amode DirectReturn)
+	reg_assts = assignToRegs reg_arg_amodes
+  --
+  (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
 
--- This function is used by PrimOps that return enumerated types (i.e.
--- all the comparison operators).
+  emitSimultaneously (reg_assts     `plusStmts`
+		      stk_assts     `plusStmts`
+		      assts)
 
-mkDynamicAlgReturnCode :: TyCon -> CAddrMode -> Sequel -> Code
+  doFinalJump final_sp False (jumpToLbl lbl)
 
-mkDynamicAlgReturnCode tycon dyn_tag sequel
-  = case ctrlReturnConvAlg tycon of
-	VectoredReturn sz ->
+-- -----------------------------------------------------------------------------
+-- The final clean-up before we do a jump at the end of a basic block.
+-- This code is shared by tail-calls and returns.
 
-		profCtrC SLIT("TICK_VEC_RETURN") [mkIntCLit sz] `thenC`
-		sequelToAmode sequel		`thenFC` \ ret_addr ->
-		absC (CReturn ret_addr (DynamicVectoredReturn dyn_tag))
+doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code 
+doFinalJump final_sp is_let_no_escape jump_code
+  = do	{ -- Adjust the high-water mark if necessary
+	  adjustStackHW final_sp
 
-	UnvectoredReturn no_of_constrs ->
+	-- Push a return address if necessary (after the assignments
+	-- above, in case we clobber a live stack location)
+	--
+	-- DONT push the return address when we're about to jump to a
+	-- let-no-escape: the final tail call in the let-no-escape
+	-- will do this.
+	; eob <- getEndOfBlockInfo
+	; whenC (not is_let_no_escape) (pushReturnAddress eob)
 
-		-- Set tag if necessary
-		-- This is done by a macro, because if we are short of registers
-		-- we don't set TagReg; instead the continuation gets the tag
-		-- by indexing off the info ptr
-		(if no_of_constrs > 1 then
-			absC (CMacroStmt SET_TAG [dyn_tag])
-		else
-			nopC
-		)			`thenC`
+	    -- Final adjustment of Sp/Hp
+	; adjustSpAndHp final_sp
 
+	    -- and do the jump
+	; jump_code }
 
-		sequelToAmode sequel		`thenFC` \ ret_addr ->
-		-- Generate the right jump or return
-		absC (CReturn ret_addr DirectReturn)
-\end{code}
+-- -----------------------------------------------------------------------------
+-- A general return (just a special case of doFinalJump, above)
 
-\begin{code}
-performReturn :: AbstractC	    -- Simultaneous assignments to perform
-	      -> (Sequel -> Code)   -- The code to execute to actually do
-				    -- the return, given an addressing mode
-				    -- for the return address
+performReturn :: Code		-- The code to execute to actually do the return
 	      -> Code
 
--- this is just a special case of doTailCall, later.
-performReturn sim_assts finish_code
-  = getEndOfBlockInfo	`thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
-
-	-- Do the simultaneous assignments,
-    doSimAssts sim_assts	 	`thenC`
-
-	-- push a return address if necessary
-	-- (after the assignments above, in case we clobber a live
-	--  stack location)
-    pushReturnAddress eob		`thenC`
-
-	-- Adjust Sp/Hp
-    adjustSpAndHp args_sp		`thenC`
-
-	-- Do the return
-    finish_code sequel		-- "sequel" is `robust' in that it doesn't
-				-- depend on stk-ptr values
-\end{code}
-
-Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
-we want to do things in a slightly different order to normal:
-
-		- push return address
-		- adjust stack pointer
-		- r = call(args...)
-		- assign regs for unboxed tuple (usually just R1 = r)
-		- return to continuation
-
-The return address (i.e. stack frame) must be on the stack before
-doing the call in case the call ends up in the garbage collector.
-
-Sadly, the information about the continuation is lost after we push it
-(in order to avoid pushing it again), so we end up doing a needless
-indirect jump (ToDo).
+performReturn finish_code
+  = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
+	; doFinalJump args_sp False{-not a LNE-} finish_code }
 
-\begin{code}
-returnUnboxedTuple :: [CAddrMode] -> Code -> Code
-returnUnboxedTuple amodes before_jump
-  = getEndOfBlockInfo	`thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
-
-	-- push a return address if necessary
-    pushReturnAddress eob		`thenC`
-    setEndOfBlockInfo (EndOfBlockInfo args_sp (OnStack args_sp)) (
-
-	-- Adjust Sp/Hp
-    adjustSpAndHp args_sp		`thenC`
-
-    before_jump				`thenC`
-
-    let (ret_regs, leftovers) = assignRegs [] (map getAmodeRep amodes)
-    in
-
-    profCtrC SLIT("TICK_RET_UNBOXED_TUP") [mkIntCLit (length amodes)] `thenC`
+-- -----------------------------------------------------------------------------
+-- Primitive Returns
+-- Just load the return value into the right register, and return.
 
-    doTailCall amodes ret_regs
-		mkUnboxedTupleReturnCode
-		(length leftovers)  {- fast args arity -}
-		AbsCNop {-no pending assigments-}
-		Nothing {-not a let-no-escape-}
-		False   {-node doesn't point-}
-     )
-\end{code}
+performPrimReturn :: CgRep -> CmmExpr	-- The thing to return
+		  -> Code
+performPrimReturn rep amode
+  =  do { whenC (not (isVoidArg rep))
+		(stmtC (CmmAssign ret_reg amode))
+	; performReturn emitDirectReturnInstr }
+  where
+    ret_reg = dataReturnConvPrim rep
 
-\begin{code}
-performTailCall :: Id -> [StgArg] -> Code
-performTailCall fun args
-  = getCAddrModeAndInfo fun			`thenFC` \ (fun', fun_amode, lf_info) ->
-    getArgAmodes args				`thenFC` \ arg_amodes ->
-    tailCallFun fun' fun_amode lf_info arg_amodes AbsCNop{- No pending assignments -}
-\end{code}
+-- -----------------------------------------------------------------------------
+-- Algebraic constructor returns
 
-Generating code for a tail call to a function (or closure)
+-- Constructor is built on the heap; Node is set.
+-- All that remains is to do the right sort of jump.
 
-\begin{code}
-tailCallFun
-	 :: Id				-- Function
-	 -> CAddrMode
-	 -> LambdaFormInfo
-	 -> [CAddrMode]			-- Arguments
-	 -> AbstractC			-- Pending simultaneous assignments
-					  -- *** GUARANTEED to contain only stack 
-					  -- assignments.
-					-- In ptic, we don't need to look in 
-					-- here to discover all live regs
-	 -> Code
-
-tailCallFun fun fun_amode lf_info arg_amodes pending_assts
-  = nodeMustPointToIt lf_info			`thenFC` \ node_points ->
-	-- we use the name of fun', the Id from the environment, rather than
-	-- fun from the STG tree, in case it is a top-level name that we globalised
-	-- (see cgTopRhsClosure).
-    getEntryConvention (idName fun) lf_info
-	(map getAmodeRep arg_amodes)		`thenFC` \ entry_conv ->
-    let
-	node_asst
-	  = if node_points then
-		CAssign (CReg node) fun_amode
-	    else
-		AbsCNop
-
-	(arg_regs, finish_code, arity)
-	  = case entry_conv of
-	      ViaNode ->
-		([],
-		     profCtrC SLIT("TICK_ENT_VIA_NODE") [] `thenC`
-		     absC (CJump (CMacroExpr CodePtrRep ENTRY_CODE 
-			        [CVal (nodeRel 0) DataPtrRep]))
-		     , 0)
-	      StdEntry lbl -> ([], absC (CJump (CLbl lbl CodePtrRep)), 0)
-	      DirectEntry lbl arity regs  ->
-		(regs,	 absC (CJump (CLbl lbl CodePtrRep)), 
-	         arity - length regs)
-
-	-- set up for a let-no-escape if necessary
-	join_sp = case fun_amode of
-			CJoinPoint sp -> Just sp
-			other         -> Nothing
-    in
-    doTailCall arg_amodes arg_regs (const finish_code) arity
-		(mkAbstractCs [node_asst,pending_assts]) join_sp node_points
-
-
--- this generic tail call code is used for both function calls and returns.
-
-doTailCall 
-	:: [CAddrMode] 			-- args to pass to function
-	-> [MagicId]			-- registers to use
-	-> (Sequel->Code)		-- code to perform jump
-	-> Int				-- number of "fast" stack arguments
-	-> AbstractC			-- pending assignments
-	-> Maybe VirtualSpOffset	-- sp offset to trim stack to: 
-					-- USED iff destination is a let-no-escape
-	-> Bool				-- node points to the closure to enter
-	-> Code
+emitKnownConReturnCode :: DataCon -> Code
+emitKnownConReturnCode con
+  = emitAlgReturnCode (dataConTyCon con)
+		      (CmmLit (mkIntCLit (dataConTagZ con)))
+			-- emitAlgReturnCode requires zero-indexed tag
 
-doTailCall arg_amodes arg_regs finish_code arity pending_assts
-		maybe_join_sp node_points
-  = getEndOfBlockInfo	`thenFC` \ eob@(EndOfBlockInfo args_sp sequel) ->
-
-    let
-	(reg_arg_amodes, stk_arg_amodes) = splitAtList arg_regs arg_amodes
-	    -- We get some stk_arg_amodes if (a) no regs, or 
-	    --				     (b) args beyond arity
-
-	reg_arg_assts
-	  = mkAbstractCs (zipWithEqual "assign_to_reg2" 
-				assign_to_reg arg_regs reg_arg_amodes)
-
-	assign_to_reg reg_id amode = CAssign (CReg reg_id) amode
-
-	join_sp = case maybe_join_sp of
-			Just sp -> ASSERT(not (args_sp > sp)) sp
-	      -- If ASSERTion fails: Oops: the join point has *lower*
-	      -- stack ptrs than the continuation Note that we take
-	      -- the Sp point without the return address here.	 The
-	      -- return address is put on by the let-no-escapey thing
-	      -- when it finishes.
-			Nothing -> args_sp
-
-	(fast_stk_amodes, tagged_stk_amodes) = 
-		splitAt arity stk_arg_amodes
-
-	-- eager blackholing, at the end of the basic block.
-	(r1_tmp_asst, bh_asst)
-	 = case sequel of
-#if 0
-	-- no: UpdateCode doesn't tell us that we're in a thunk's entry code.
-	-- we might be in a case continuation later down the line.  Also,
-	-- we might have pushed a return address on the stack, if we're in
-	-- a case scrut, and still be in the thunk's entry code.
-		UpdateCode -> 
-		   (CAssign node_save nodeReg,
-		    CAssign (CVal (CIndex node_save (mkIntCLit 0) PtrRep) 
-				  PtrRep)
-			    (CLbl mkBlackHoleInfoTableLabel DataPtrRep))
-		   where
-		     node_save = CTemp (mkPseudoUnique1 2) DataPtrRep
-#endif
-		_ -> (AbsCNop, AbsCNop)
-    in
-	-- We can omit tags on the arguments passed to the fast entry point, 
-	-- but we have to be careful to fill in the tags on any *extra*
-	-- arguments we're about to push on the stack.
-
-	mkTaggedStkAmodes join_sp tagged_stk_amodes `thenFC`
-			    \ (fast_sp, tagged_arg_assts, tag_assts) ->
-
-	mkTaggedStkAmodes fast_sp fast_stk_amodes `thenFC`
-			    \ (final_sp, fast_arg_assts, _) ->
-
-	-- adjust the high-water mark if necessary
-	adjustStackHW final_sp	`thenC`
-
-		-- The stack space for the pushed return addess, 
-		-- with any args pushed on top, is recorded in final_sp.
-	
-			-- Do the simultaneous assignments,
-	doSimAssts (mkAbstractCs [r1_tmp_asst,
-				  pending_assts,
-			          reg_arg_assts, 
-				  fast_arg_assts, 
-				  tagged_arg_assts,
-			          tag_assts])	`thenC`
-	absC bh_asst `thenC`
-	
-		-- push a return address if necessary
-		-- (after the assignments above, in case we clobber a live
-		--  stack location)
-
-		-- DONT push the return address when we're about
-		-- to jump to a let-no-escape: the final tail call
-		-- in the let-no-escape will do this.
-	(if (maybeToBool maybe_join_sp)
-		then nopC
-		else pushReturnAddress eob)		`thenC`
-
-		-- Final adjustment of Sp/Hp
-	adjustSpAndHp final_sp		`thenC`
+emitAlgReturnCode :: TyCon -> CmmExpr -> Code
+-- emitAlgReturnCode is used both by emitKnownConReturnCode,
+-- and by by PrimOps that return enumerated types (i.e.
+-- all the comparison operators).
+emitAlgReturnCode tycon tag
+ =  do	{ case ctrlReturnConvAlg tycon of
+	    VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
+	 			     	; emitVectoredReturnInstr tag }
+	    UnvectoredReturn _    -> emitDirectReturnInstr 
+	}
+
+
+-- ---------------------------------------------------------------------------
+-- Unboxed tuple returns
+
+-- These are a bit like a normal tail call, except that:
+--
+--   - The tail-call target is an info table on the stack
+--
+--   - We separate stack arguments into pointers and non-pointers,
+--     to make it easier to leave things in a sane state for a heap check.
+--     This is OK because we can never partially-apply an unboxed tuple,
+--     unlike a function.  The same technique is used when calling
+--     let-no-escape functions, because they also can't be partially
+--     applied.
+
+returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
+returnUnboxedTuple amodes
+  = do 	{ eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
+	; tickyUnboxedTupleReturn (length amodes)
+	; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
+	; emitSimultaneously assts
+	; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }
+
+pushUnboxedTuple :: VirtualSpOffset		-- Sp at which to start pushing
+		 -> [(CgRep, CmmExpr)]		-- amodes of the components
+		 -> FCode (VirtualSpOffset,	-- final Sp
+			   CmmStmts)		-- assignments (regs+stack)
+
+pushUnboxedTuple sp [] 
+  = return (sp, noStmts)
+pushUnboxedTuple sp amodes
+  = do	{ let	(reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
 	
-		-- Now decide about semi-tagging
-	let
-		semi_tagging_on = opt_DoSemiTagging
-	in
-	case (semi_tagging_on, arg_amodes, node_points, sequel) of
+		-- separate the rest of the args into pointers and non-pointers
+		(ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
+		reg_arg_assts = assignToRegs reg_arg_amodes
+		
+	    -- push ptrs, then nonptrs, on the stack
+	; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
+	; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
+
+	; returnFC (final_sp,
+	  	    reg_arg_assts `plusStmts` 
+		    ptr_assts `plusStmts` nptr_assts) }
+    
+		  
+-- -----------------------------------------------------------------------------
+-- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
+-- we want to do things in a slightly different order to normal:
+-- 
+-- 		- push return address
+-- 		- adjust stack pointer
+-- 		- r = call(args...)
+-- 		- assign regs for unboxed tuple (usually just R1 = r)
+-- 		- return to continuation
+-- 
+-- The return address (i.e. stack frame) must be on the stack before
+-- doing the call in case the call ends up in the garbage collector.
+-- 
+-- Sadly, the information about the continuation is lost after we push it
+-- (in order to avoid pushing it again), so we end up doing a needless
+-- indirect jump (ToDo).
+
+ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
+ccallReturnUnboxedTuple amodes before_jump
+  = do 	{ eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
+
+	-- Push a return address if necessary
+	; pushReturnAddress eob
+	; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
+	    (do	{ adjustSpAndHp args_sp
+		; before_jump
+  		; returnUnboxedTuple amodes })
+    }
+
+-- -----------------------------------------------------------------------------
+-- Calling an out-of-line primop
 
-	--
-	-- *************** The semi-tagging case ***************
-	--
-	{- XXX leave this out for now.
-	      (	  True,		   [],		True,	     CaseAlts _ (Just (st_alts, maybe_deflt_join_details))) ->
-
-		-- Whoppee!  Semi-tagging rules OK!
-		-- (a) semi-tagging is switched on
-		-- (b) there are no arguments,
-		-- (c) Node points to the closure
-		-- (d) we have a case-alternative sequel with
-		--	some visible alternatives
-
-		-- Why is test (c) necessary?
-		-- Usually Node will point to it at this point, because we're
-		-- scrutinsing something which is either a thunk or a
-		-- constructor.
-		-- But not always!  The example I came across is when we have
-		-- a top-level Double:
-		--	lit.3 = D# 3.000
-		--	... (case lit.3 of ...) ...
-		-- Here, lit.3 is built as a re-entrant thing, which you must enter.
-		-- (OK, the simplifier should have eliminated this, but it's
-		--  easy to deal with the case anyway.)
-		let
-		    join_details_to_code (load_regs_and_profiling_code, join_lbl)
-			= load_regs_and_profiling_code		`mkAbsCStmts`
-			  CJump (CLbl join_lbl CodePtrRep)
-
-		    semi_tagged_alts = [ (mkMachInt (fromInt (tag - fIRST_TAG)),
-					  join_details_to_code join_details)
-				       | (tag, join_details) <- st_alts
-				       ]
-
-		    enter_jump
-		      -- Enter Node (we know infoptr will have the info ptr in it)!
-		      = mkAbstractCs [
-			CCallProfCtrMacro SLIT("RET_SEMI_FAILED")
-					[CMacroExpr IntRep INFO_TAG [CReg infoptr]],
-			CJump (CMacroExpr CodePtrRep ENTRY_CODE [CReg infoptr]) ]
-		in
-			-- Final switch
-		absC (mkAbstractCs [
-			    CAssign (CReg infoptr)
-				    (CVal (NodeRel zeroOff) DataPtrRep),
-
-			    case maybe_deflt_join_details of
-				Nothing ->
-				    CSwitch (CMacroExpr IntRep INFO_TAG [CReg infoptr])
-					(semi_tagged_alts)
-					(enter_jump)
-				Just (_, details) ->
-				    CSwitch (CMacroExpr IntRep EVAL_TAG [CReg infoptr])
-				     [(mkMachInt 0, enter_jump)]
-				     (CSwitch
-					 (CMacroExpr IntRep INFO_TAG [CReg infoptr])
-					 (semi_tagged_alts)
-					 (join_details_to_code details))
-		])
-		-}
+tailCallPrimOp :: PrimOp -> [StgArg] -> Code
+tailCallPrimOp op args
+ = do	{	-- We're going to perform a normal-looking tail call, 
+		-- except that *all* the arguments will be in registers.
+		-- Hence the ASSERT( null leftovers )
+	  arg_amodes <- getArgAmodes args
+	; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
+	      jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
+
+	; ASSERT(null leftovers) -- no stack-resident args
+ 	  emitSimultaneously (assignToRegs arg_regs)
+
+	; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
+	; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
+
+-- -----------------------------------------------------------------------------
+-- Return Addresses
+
+-- We always push the return address just before performing a tail call
+-- or return.  The reason we leave it until then is because the stack
+-- slot that the return address is to go into might contain something
+-- useful.
+-- 
+-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
+-- case expression and the return address is still to be pushed.
+-- 
+-- There are cases where it doesn't look necessary to push the return
+-- address: for example, just before doing a return to a known
+-- continuation.  However, the continuation will expect to find the
+-- return address on the stack in case it needs to do a heap check.
 
-	--
-	-- *************** The non-semi-tagging case ***************
-	--
-	      other -> finish_code sequel
-\end{code}
+pushReturnAddress :: EndOfBlockInfo -> Code
 
-%************************************************************************
-%*									*
-\subsection[tailCallPrimOp]{@tailCallPrimOp@}
-%*									*
-%************************************************************************
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
+  = do	{ sp_rel <- getSpRelOffset args_sp
+	; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
 
-\begin{code}
-tailCallPrimOp :: PrimOp -> [StgArg] -> Code
-tailCallPrimOp op args =
-    -- we're going to perform a normal-looking tail call, 
-    -- except that *all* the arguments will be in registers.
-    getArgAmodes args		`thenFC` \ arg_amodes ->
-    let (arg_regs, leftovers) = assignAllRegs [] (map getAmodeRep arg_amodes)
-    in
-    ASSERT(null leftovers) -- no stack-resident args
-    doTailCall arg_amodes arg_regs 
-	(const (absC (CJump (CLbl (mkRtsPrimOpLabel op) CodePtrRep))))
-	0       {- arity shouldn't matter, all args in regs -}
-	AbsCNop {- no pending assignments -}
-	Nothing {- not a let-no-escape -}
-	False   {- node doesn't point -}
-\end{code}
+-- For a polymorphic case, we have two return addresses to push: the case
+-- return, and stg_seq_frame_info which turns a possible vectored return
+-- into a direct one.
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
+  = do	{ sp_rel <- getSpRelOffset (args_sp-1)
+	; stmtC (CmmStore sp_rel (mkLblExpr lbl))
+	; sp_rel <- getSpRelOffset args_sp
+	; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }
 
-%************************************************************************
-%*									*
-\subsection[doSimAssts]{@doSimAssts@}
-%*									*
-%************************************************************************
+pushReturnAddress _ = nopC
 
-@doSimAssts@ happens at the end of every block of code.
-They are separate because we sometimes do some jiggery-pokery in between.
+-- -----------------------------------------------------------------------------
+-- Misc.
 
-\begin{code}
-doSimAssts :: AbstractC -> Code
+jumpToLbl :: CLabel -> Code
+-- Passes no argument to the destination procedure
+jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
 
-doSimAssts sim_assts
-  = absC (CSimultaneous sim_assts)
+assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
+assignToRegs reg_args 
+  = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
+	    | (expr, reg_id) <- reg_args ] 
 \end{code}
 
+
 %************************************************************************
 %*									*
-\subsection[retAddr]{@Return Addresses@}
+\subsection[CgStackery-adjust]{Adjusting the stack pointers}
 %*									*
 %************************************************************************
 
-We always push the return address just before performing a tail call
-or return.  The reason we leave it until then is because the stack
-slot that the return address is to go into might contain something
-useful.
-
-If the end of block info is CaseAlts, then we're in the scrutinee of a
-case expression and the return address is still to be pushed.
+This function adjusts the stack and heap pointers just before a tail
+call or return.  The stack pointer is adjusted to its final position
+(i.e. to point to the last argument for a tail call, or the activation
+record for a return).  The heap pointer may be moved backwards, in
+cases where we overallocated at the beginning of the basic block (see
+CgCase.lhs for discussion).
 
-There are cases where it doesn't look necessary to push the return
-address: for example, just before doing a return to a known
-continuation.  However, the continuation will expect to find the
-return address on the stack in case it needs to do a heap check.
+These functions {\em do not} deal with high-water-mark adjustment.
+That's done by functions which allocate stack space.
 
 \begin{code}
-pushReturnAddress :: EndOfBlockInfo -> Code
-pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts amode _)) =
-    getSpRelOffset args_sp			 `thenFC` \ sp_rel ->
-    absC (CAssign (CVal sp_rel RetRep) amode)
-pushReturnAddress (EndOfBlockInfo args_sp sequel@(SeqFrame amode _)) =
-    pushSeqFrame args_sp			 `thenFC` \ ret_sp ->
-    getSpRelOffset ret_sp			 `thenFC` \ sp_rel ->
-    absC (CAssign (CVal sp_rel RetRep) amode)
-pushReturnAddress _ = nopC
+adjustSpAndHp :: VirtualSpOffset 	-- New offset for Arg stack ptr
+	      -> Code
+adjustSpAndHp newRealSp 
+  = do	{ -- Adjust stack, if necessary.
+	  -- NB: the conditional on the monad-carried realSp
+	  --     is out of line (via codeOnly), to avoid a black hole
+	; new_sp <- getSpRelOffset newRealSp
+	; checkedAbsC (CmmAssign spReg new_sp)	-- Will generate no code in the case
+	; setRealSp newRealSp			-- where realSp==newRealSp
+
+	  -- Adjust heap.  The virtual heap pointer may be less than the real Hp
+	  -- because the latter was advanced to deal with the worst-case branch
+	  -- of the code, and we may be in a better-case branch.  In that case,
+ 	  -- move the real Hp *back* and retract some ticky allocation count.
+	; hp_usg <- getHpUsage
+	; let rHp = realHp hp_usg
+	      vHp = virtHp hp_usg
+	; new_hp <- getHpRelOffset vHp
+	; checkedAbsC (CmmAssign hpReg new_hp)	-- Generates nothing when vHp==rHp
+	; tickyAllocHeap (vHp - rHp)		-- ...ditto
+	; setRealHp vHp
+	}
 \end{code}