%
% (c) The AQUA Project, Glasgow University, 1994
%
%************************************************************************
%*                                                                      *
\section[StgThreads.lhc]{Threaded Threads Support}
%*									*
%************************************************************************

Some of the threads support is done in threaded code.  How's that for ambiguous
overloading?

\begin{code}

#ifdef CONCURRENT

#define MAIN_REG_MAP	    /* STG world */
#include "rtsdefs.h"

#if 0
#ifdef PAR
#include "Statistics.h"
#endif
#endif

\end{code}

%************************************************************************
%*                                                                      *
\subsection[thread-objects]{Special objects for thread support}
%*									*
%************************************************************************

TSO's are Thread State Objects, where the thread context is stored when the
thread is sleeping, and where we have slots for STG registers that don't 
live in real machine registers.

\begin{code}

TSO_ITBL();

STGFUN(TSO_entry)
{
    FB_
    fflush(stdout);
    fprintf(stderr, "TSO Entry: panic");
    abort();
    FE_
}

\end{code}

Stack objects are chunks of stack words allocated out of the heap and
linked together in a chain.

\begin{code}

STKO_ITBL();

STGFUN(StkO_entry)
{
    FB_
    fflush(stdout);
    fprintf(stderr, "StkO Entry: panic");
    abort();
    FE_

}

#ifndef PAR

STKO_STATIC_ITBL();

STGFUN(StkO_static_entry)
{
    FB_
    fflush(stdout);
    fprintf(stderr, "StkO_static Entry: panic");
    abort();
    FE_

}

#endif

\end{code}

Blocking queues are essentially black holes with threads attached.  These
are the threads to be awakened when the closure is updated.

\begin{code}

EXTFUN(EnterNodeCode);

STGFUN(BQ_entry)
{   
    FB_

#if defined(GRAN)
    /* Before overwriting TSO_LINK */
    STGCALL3(void,(),GranSimBlock,CurrentTSO,CurrentProc,Node);	
#endif

    TSO_LINK(CurrentTSO) = (P_) BQ_ENTRIES(Node);
    BQ_ENTRIES(Node) = (W_) CurrentTSO;

    LivenessReg = LIVENESS_R1;
    SaveAllStgRegs();
    TSO_PC1(CurrentTSO) = EnterNodeCode;

    if (DO_QP_PROF) {
    	QP_Event1("GR", CurrentTSO);
    }
#ifdef PAR
    if(RTSflags.ParFlags.granSimStats) {
        /* Note that CURRENT_TIME may perform an unsafe call */
	TIME now = CURRENT_TIME;
        TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
        TSO_BLOCKCOUNT(CurrentTSO)++;
	TSO_QUEUE(CurrentTSO) = Q_BLOCKED;
        TSO_BLOCKEDAT(CurrentTSO) = now;
        DumpGranEvent(GR_BLOCK, CurrentTSO);
    }
#endif
#if defined(GRAN)
    ReSchedule(SAME_THREAD); /* NB: GranSimBlock activated next thread */
#else
    ReSchedule(0);
#endif
    FE_
}

BQ_ITBL();

\end{code}

Revertible black holes are needed in the parallel world, to handle
negative acknowledgements of messages containing updatable closures.
The idea is that when the original message is transmitted, the closure
is turned into a revertible black hole...an object which acts like a
black hole when local threads try to enter it, but which can be
reverted back to the original closure if necessary.

It's actually a lot like a blocking queue (BQ) entry, because
revertible black holes are initially set up with an empty blocking
queue.

The combination of GrAnSim with revertible black holes has not been
checked, yet. -- HWL

\begin{code}

#if defined(PAR) || defined(GRAN)

STGFUN(RBH_entry)
{
    FB_

#  if defined(GRAN)
    /* Before overwriting TSO_LINK */
    STGCALL3(void,(),GranSimBlock,CurrentTSO,CurrentProc,Node);	
#  endif

    switch (INFO_TYPE(InfoPtr)) {
    case INFO_SPEC_RBH_TYPE:
	TSO_LINK(CurrentTSO) = (P_) SPEC_RBH_BQ(Node);
	SPEC_RBH_BQ(Node) = (W_) CurrentTSO;
	break;
    case INFO_GEN_RBH_TYPE:
	TSO_LINK(CurrentTSO) = (P_) GEN_RBH_BQ(Node);
	GEN_RBH_BQ(Node) = (W_) CurrentTSO;
	break;
    default:
	fflush(stdout);
	fprintf(stderr, "Panic: non-{SPEC,GEN} RBH %#lx (IP %#lx)\n", Node, InfoPtr);
	EXIT(EXIT_FAILURE);
    }

    LivenessReg = LIVENESS_R1;
    SaveAllStgRegs();
    TSO_PC1(CurrentTSO) = EnterNodeCode;

    if (DO_QP_PROF) {
	QP_Event1("GR", CurrentTSO);
    }

#  ifdef PAR
    if(RTSflags.ParFlags.granSimStats) {
        /* Note that CURRENT_TIME may perform an unsafe call */
	TIME now = CURRENT_TIME;
        TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO);
        TSO_BLOCKCOUNT(CurrentTSO)++;
	TSO_QUEUE(CurrentTSO) = Q_BLOCKED;
        TSO_BLOCKEDAT(CurrentTSO) = now;
        DumpGranEvent(GR_BLOCK, CurrentTSO);
    }
#  endif
#  if defined(GRAN)
    ReSchedule(SAME_THREAD);  /* NB: GranSimBlock activated next thread */
#  else
    ReSchedule(0);
#  endif

    FE_
}

#endif

\end{code}

%************************************************************************
%*                                                                      *
\subsection[thread-entrypoints]{Scheduler-Thread Interfaces}
%*									*
%************************************************************************

The normal way of entering a thread is through \tr{resumeThread},
which short-circuits any indirections to the TSO and StkO, sets up STG
registers, and jumps to the saved PC.

\begin{code}
STGFUN(resumeThread)
{
    FB_

    while(IS_INDIRECTION(INFO_PTR(CurrentTSO))) {
    	CurrentTSO = (P_) IND_CLOSURE_PTR(CurrentTSO);
    }

#ifdef PAR
    if (RTSflags.ParFlags.granSimStats) {
	TSO_QUEUE(CurrentTSO) = Q_RUNNING;
	/* Note that CURRENT_TIME may perform an unsafe call */
        TSO_BLOCKEDAT(CurrentTSO) = CURRENT_TIME;
    }
#endif

    CurrentRegTable = TSO_INTERNAL_PTR(CurrentTSO);

    while(IS_INDIRECTION(INFO_PTR(SAVE_StkO))) {
    	SAVE_StkO = (P_) IND_CLOSURE_PTR(SAVE_StkO);
    }
    RestoreAllStgRegs();

    SET_TASK_ACTIVITY(ST_REDUCING);
    RESTORE_CCC(TSO_CCC(CurrentTSO));
    JMP_(TSO_PC1(CurrentTSO));
    FE_
}
\end{code}

Since we normally context switch during a heap check, it is possible
that we will return to a previously suspended thread without
sufficient heap for the thread to continue.  However, we have cleverly
stashed away the heap requirements in @TSO_ARG1@ so that we can decide
whether or not to perform a garbage collection before resuming the
thread.  The actual thread resumption address (either @EnterNodeCode@
or elsewhere) is stashed in @TSO_PC2@.

\begin{code}
STGFUN(CheckHeapCode)
{
    FB_

    ALLOC_HEAP(TSO_ARG1(CurrentTSO)); /* ticky profiling */
    if ((Hp += TSO_ARG1(CurrentTSO)) > HpLim) {
    	ReallyPerformThreadGC(TSO_ARG1(CurrentTSO), rtsFalse);
    	JMP_(resumeThread);
    }
    SET_TASK_ACTIVITY(ST_REDUCING);
    RESUME_(TSO_PC2(CurrentTSO));
    FE_
}
\end{code}

Often, a thread starts (or rather, resumes) by entering the closure
that Node points to.  Here's a tiny code fragment to do just that.
The saved PC in the TSO can be set to @EnterNodeCode@ whenever we
want this to happen upon resumption of the thread.

\begin{code}
STGFUN(EnterNodeCode)
{
    FB_
    ENT_VIA_NODE();
    InfoPtr=(D_)(INFO_PTR(Node));
    JMP_(ENTRY_CODE(InfoPtr));
    FE_
}
\end{code}

Then, there are the occasions when we just want to pick up where we
left off.  We use \tr{RESUME_} here instead of \tr{JMP_}, because when
we return to a call site, the Alpha is going to try to load \tr{%gp}
from \tr{%ra} rather than \tr{%pv}, and \tr{JMP_} only sets \tr{%pv}.
Resuming to the start of a function is currently okay, but an
extremely bad practice.  As we add support for more architectures, we
can expect the difference between \tr{RESUME_} and \tr{JMP_} to become
more acute.

\begin{code}
STGFUN(Continue)
{
    FB_

    SET_TASK_ACTIVITY(ST_REDUCING);
    RESUME_(TSO_PC2(CurrentTSO));
    FE_
}
\end{code}

%************************************************************************
%*									*
\subsection[stack-chunk-underflow-code]{Underflow code for stack chunks}
%*									*
%************************************************************************

\begin{code}
#ifndef PAR
\end{code}

On a uniprocessor, stack underflow causes us no great headaches.  The
old value of RetReg is squirreled away at the base of the top stack
object (the one that's about to get blown away).  We just yank it
outta there and perform the same kind of return that got us here in
the first place.

This simplicity is due to the fact that we never have to fetch a stack
object on underflow.

\begin{code}

#define DO_RETURN_TEMPLATE(label, cont)	    	\
    STGFUN(label)   	    	    	    	\
    {				    	    	\
      P_ temp;	    	    	    	    	\
      FB_	    	    	    	    	\
      temp = STKO_LINK(StkOReg);  	    	\
      RetReg = STKO_RETURN(StkOReg);	    	\
      StkOReg = temp;	    	    	    	\
      RestoreStackStgRegs();	    	    	\
      JMP_(cont);		    	    	\
      FE_	    	    	    	    	\
    }

DO_RETURN_TEMPLATE(UnderflowDirectReturn, DIRECT(((P_)RetReg)))
DO_RETURN_TEMPLATE(UnderflowVect0, ((P_)RetReg)[RVREL(0)])
DO_RETURN_TEMPLATE(UnderflowVect1, ((P_)RetReg)[RVREL(1)])
DO_RETURN_TEMPLATE(UnderflowVect2, ((P_)RetReg)[RVREL(2)])
DO_RETURN_TEMPLATE(UnderflowVect3, ((P_)RetReg)[RVREL(3)])
DO_RETURN_TEMPLATE(UnderflowVect4, ((P_)RetReg)[RVREL(4)])

DO_RETURN_TEMPLATE(UnderflowVect5, ((P_)RetReg)[RVREL(5)])
DO_RETURN_TEMPLATE(UnderflowVect6, ((P_)RetReg)[RVREL(6)])
DO_RETURN_TEMPLATE(UnderflowVect7, ((P_)RetReg)[RVREL(7)])

DO_RETURN_TEMPLATE(StackUnderflowEnterNode, EnterNodeCode)

#else

\end{code}

In the parallel world, we may have to fetch the StkO from a remote
location before we can load up the stack registers and perform the
return.  Our convention is that we load RetReg up with the exact
continuation address (after a vector table lookup, if necessary),
and tail-call the code to fetch the stack object.  (Of course, if
the stack object is already local, we then just jump to the 
continuation address.)

\begin{code}

STGFUN(CommonUnderflow)
{
    P_ temp;

    FB_
    temp = STKO_LINK(StkOReg);

    /* fprintf(stderr,"Stk Underflow from: %lx to: %lx size abandoned: %d\n",StkOReg,temp,STKO_CLOSURE_CTS_SIZE(StkOReg)); */

    /* change the guy we are abandoning into something
       that will not be "interesting" on the mutables
       list.  (As long as it is there, it will be
       scavenged in GC, and we cannot guarantee that
       it is still a "sane" StkO object).  (And, besides,
       why continue to keep it [and all it pts to] alive?)
       Will & Phil 95/10
    */
    FREEZE_MUT_HDR(StkOReg, ImMutArrayOfPtrs_info);
    MUTUPLE_CLOSURE_SIZE(StkOReg) = MUTUPLE_VHS;

    StkOReg = temp;
    /* ToDo: Fetch the remote stack object here! */
    RestoreStackStgRegs();
    JMP_(RetReg);
    FE_
}

#define DO_RETURN_TEMPLATE(label, cont)	    	\
    STGFUN(label)   	    	    	    	\
    {				    	    	\
      FB_	    	    	    	    	\
      RetReg = STKO_RETURN(StkOReg);	    	\
      RetReg = (StgRetAddr)(cont);		\
      LivenessReg = INFO_LIVENESS(InfoPtr);	\
      JMP_(CommonUnderflow);	    	    	\
      FE_	    	    	    	    	\
    }

DO_RETURN_TEMPLATE(UnderflowDirectReturn, DIRECT(((P_)RetReg)))
DO_RETURN_TEMPLATE(UnderflowVect0, ((P_)RetReg)[RVREL(0)])
DO_RETURN_TEMPLATE(UnderflowVect1, ((P_)RetReg)[RVREL(1)])
DO_RETURN_TEMPLATE(UnderflowVect2, ((P_)RetReg)[RVREL(2)])
DO_RETURN_TEMPLATE(UnderflowVect3, ((P_)RetReg)[RVREL(3)])
DO_RETURN_TEMPLATE(UnderflowVect4, ((P_)RetReg)[RVREL(4)])
DO_RETURN_TEMPLATE(UnderflowVect5, ((P_)RetReg)[RVREL(5)])
DO_RETURN_TEMPLATE(UnderflowVect6, ((P_)RetReg)[RVREL(6)])
DO_RETURN_TEMPLATE(UnderflowVect7, ((P_)RetReg)[RVREL(7)])

STGFUN(PrimUnderflow)
{
    FB_
    RetReg = STKO_RETURN(StkOReg);
    RetReg = (StgRetAddr)DIRECT(((P_)RetReg));
    LivenessReg = NO_LIVENESS;
    JMP_(CommonUnderflow);
    FE_
}

/* 
 * This one is similar, but isn't part of the return vector.  It's only used
 * when we fall off of a stack chunk and want to enter Node rather than
 * returning through RetReg.  (This occurs during UpdatePAP, when the updatee
 * isn't on the current stack chunk.)  It can't be done with the template,
 * because R2 is dead, and R1 points to a PAP.  Only R1 is live.
 */

STGFUN(StackUnderflowEnterNode)
{
    FB_
    RetReg = (StgRetAddr)(EnterNodeCode);
    LivenessReg = LIVENESS_R1;
    JMP_(CommonUnderflow);
    FE_
}

#endif

const W_
vtbl_Underflow[] = {
    /* "MAX_VECTORED_RTN" elements (see GhcConstants.lh) */
    (W_) UnderflowVect0,
    (W_) UnderflowVect1,
    (W_) UnderflowVect2,
    (W_) UnderflowVect3,
    (W_) UnderflowVect4,
    (W_) UnderflowVect5,
    (W_) UnderflowVect6,
    (W_) UnderflowVect7
};

\end{code}

\begin{code}

IFN_(seqDirectReturn) {
    void *cont;

    FB_
    RetReg = (StgRetAddr) SpB[BREL(0)];
    cont = (void *) SpB[BREL(1)];
    /* SpB += BREL(2); */
    JMP_(cont);
    FE_
}

/*
   NB: For direct returns to work properly, the name of the routine must be
   the same as the name of the vector table with vtbl_ removed and DirectReturn
   appended.  This is all the mangler understands.
 */

const W_
vtbl_seq[] = {
    (W_) seqDirectReturn,
    (W_) seqDirectReturn,
    (W_) seqDirectReturn,
    (W_) seqDirectReturn,
    (W_) seqDirectReturn,
    (W_) seqDirectReturn,
    (W_) seqDirectReturn,
    (W_) seqDirectReturn
};

#endif /* CONCURRENT */
\end{code}