% % (c) The AQUA Project, Glasgow University, 1994 % %************************************************************************ %* * \section[StgThreads.lhc]{Threaded Threads Support} %* * %************************************************************************ Some of the threads support is done in threaded code. How's that for ambiguous overloading? \begin{code} #ifdef CONCURRENT #define MAIN_REG_MAP /* STG world */ #include "rtsdefs.h" #if 0 #ifdef PAR #include "Statistics.h" #endif #endif \end{code} %************************************************************************ %* * \subsection[thread-objects]{Special objects for thread support} %* * %************************************************************************ TSO's are Thread State Objects, where the thread context is stored when the thread is sleeping, and where we have slots for STG registers that don't live in real machine registers. \begin{code} TSO_ITBL(); STGFUN(TSO_entry) { FB_ fflush(stdout); fprintf(stderr, "TSO Entry: panic"); abort(); FE_ } \end{code} Stack objects are chunks of stack words allocated out of the heap and linked together in a chain. \begin{code} STKO_ITBL(); STGFUN(StkO_entry) { FB_ fflush(stdout); fprintf(stderr, "StkO Entry: panic"); abort(); FE_ } #ifndef PAR STKO_STATIC_ITBL(); STGFUN(StkO_static_entry) { FB_ fflush(stdout); fprintf(stderr, "StkO_static Entry: panic"); abort(); FE_ } #endif \end{code} Blocking queues are essentially black holes with threads attached. These are the threads to be awakened when the closure is updated. \begin{code} EXTFUN(EnterNodeCode); STGFUN(BQ_entry) { FB_ #if defined(GRAN) /* Before overwriting TSO_LINK */ STGCALL3(void,(),GranSimBlock,CurrentTSO,CurrentProc,Node); #endif TSO_LINK(CurrentTSO) = (P_) BQ_ENTRIES(Node); BQ_ENTRIES(Node) = (W_) CurrentTSO; LivenessReg = LIVENESS_R1; SaveAllStgRegs(); TSO_PC1(CurrentTSO) = EnterNodeCode; if (DO_QP_PROF) { QP_Event1("GR", CurrentTSO); } #ifdef PAR if(RTSflags.ParFlags.granSimStats) { /* Note that CURRENT_TIME may perform an unsafe call */ TIME now = CURRENT_TIME; TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO); TSO_BLOCKCOUNT(CurrentTSO)++; TSO_QUEUE(CurrentTSO) = Q_BLOCKED; TSO_BLOCKEDAT(CurrentTSO) = now; DumpGranEvent(GR_BLOCK, CurrentTSO); } #endif #if defined(GRAN) ReSchedule(SAME_THREAD); /* NB: GranSimBlock activated next thread */ #else ReSchedule(0); #endif FE_ } BQ_ITBL(); \end{code} Revertible black holes are needed in the parallel world, to handle negative acknowledgements of messages containing updatable closures. The idea is that when the original message is transmitted, the closure is turned into a revertible black hole...an object which acts like a black hole when local threads try to enter it, but which can be reverted back to the original closure if necessary. It's actually a lot like a blocking queue (BQ) entry, because revertible black holes are initially set up with an empty blocking queue. The combination of GrAnSim with revertible black holes has not been checked, yet. -- HWL \begin{code} #if defined(PAR) || defined(GRAN) STGFUN(RBH_entry) { FB_ # if defined(GRAN) /* Before overwriting TSO_LINK */ STGCALL3(void,(),GranSimBlock,CurrentTSO,CurrentProc,Node); # endif switch (INFO_TYPE(InfoPtr)) { case INFO_SPEC_RBH_TYPE: TSO_LINK(CurrentTSO) = (P_) SPEC_RBH_BQ(Node); SPEC_RBH_BQ(Node) = (W_) CurrentTSO; break; case INFO_GEN_RBH_TYPE: TSO_LINK(CurrentTSO) = (P_) GEN_RBH_BQ(Node); GEN_RBH_BQ(Node) = (W_) CurrentTSO; break; default: fflush(stdout); fprintf(stderr, "Panic: non-{SPEC,GEN} RBH %#lx (IP %#lx)\n", Node, InfoPtr); EXIT(EXIT_FAILURE); } LivenessReg = LIVENESS_R1; SaveAllStgRegs(); TSO_PC1(CurrentTSO) = EnterNodeCode; if (DO_QP_PROF) { QP_Event1("GR", CurrentTSO); } # ifdef PAR if(RTSflags.ParFlags.granSimStats) { /* Note that CURRENT_TIME may perform an unsafe call */ TIME now = CURRENT_TIME; TSO_EXECTIME(CurrentTSO) += now - TSO_BLOCKEDAT(CurrentTSO); TSO_BLOCKCOUNT(CurrentTSO)++; TSO_QUEUE(CurrentTSO) = Q_BLOCKED; TSO_BLOCKEDAT(CurrentTSO) = now; DumpGranEvent(GR_BLOCK, CurrentTSO); } # endif # if defined(GRAN) ReSchedule(SAME_THREAD); /* NB: GranSimBlock activated next thread */ # else ReSchedule(0); # endif FE_ } #endif \end{code} %************************************************************************ %* * \subsection[thread-entrypoints]{Scheduler-Thread Interfaces} %* * %************************************************************************ The normal way of entering a thread is through \tr{resumeThread}, which short-circuits any indirections to the TSO and StkO, sets up STG registers, and jumps to the saved PC. \begin{code} STGFUN(resumeThread) { FB_ while(IS_INDIRECTION(INFO_PTR(CurrentTSO))) { CurrentTSO = (P_) IND_CLOSURE_PTR(CurrentTSO); } #ifdef PAR if (RTSflags.ParFlags.granSimStats) { TSO_QUEUE(CurrentTSO) = Q_RUNNING; /* Note that CURRENT_TIME may perform an unsafe call */ TSO_BLOCKEDAT(CurrentTSO) = CURRENT_TIME; } #endif CurrentRegTable = TSO_INTERNAL_PTR(CurrentTSO); while(IS_INDIRECTION(INFO_PTR(SAVE_StkO))) { SAVE_StkO = (P_) IND_CLOSURE_PTR(SAVE_StkO); } RestoreAllStgRegs(); SET_TASK_ACTIVITY(ST_REDUCING); RESTORE_CCC(TSO_CCC(CurrentTSO)); JMP_(TSO_PC1(CurrentTSO)); FE_ } \end{code} Since we normally context switch during a heap check, it is possible that we will return to a previously suspended thread without sufficient heap for the thread to continue. However, we have cleverly stashed away the heap requirements in @TSO_ARG1@ so that we can decide whether or not to perform a garbage collection before resuming the thread. The actual thread resumption address (either @EnterNodeCode@ or elsewhere) is stashed in @TSO_PC2@. \begin{code} STGFUN(CheckHeapCode) { FB_ ALLOC_HEAP(TSO_ARG1(CurrentTSO)); /* ticky profiling */ if ((Hp += TSO_ARG1(CurrentTSO)) > HpLim) { ReallyPerformThreadGC(TSO_ARG1(CurrentTSO), rtsFalse); JMP_(resumeThread); } SET_TASK_ACTIVITY(ST_REDUCING); RESUME_(TSO_PC2(CurrentTSO)); FE_ } \end{code} Often, a thread starts (or rather, resumes) by entering the closure that Node points to. Here's a tiny code fragment to do just that. The saved PC in the TSO can be set to @EnterNodeCode@ whenever we want this to happen upon resumption of the thread. \begin{code} STGFUN(EnterNodeCode) { FB_ ENT_VIA_NODE(); InfoPtr=(D_)(INFO_PTR(Node)); JMP_(ENTRY_CODE(InfoPtr)); FE_ } \end{code} Then, there are the occasions when we just want to pick up where we left off. We use \tr{RESUME_} here instead of \tr{JMP_}, because when we return to a call site, the Alpha is going to try to load \tr{%gp} from \tr{%ra} rather than \tr{%pv}, and \tr{JMP_} only sets \tr{%pv}. Resuming to the start of a function is currently okay, but an extremely bad practice. As we add support for more architectures, we can expect the difference between \tr{RESUME_} and \tr{JMP_} to become more acute. \begin{code} STGFUN(Continue) { FB_ SET_TASK_ACTIVITY(ST_REDUCING); RESUME_(TSO_PC2(CurrentTSO)); FE_ } \end{code} %************************************************************************ %* * \subsection[stack-chunk-underflow-code]{Underflow code for stack chunks} %* * %************************************************************************ \begin{code} #ifndef PAR \end{code} On a uniprocessor, stack underflow causes us no great headaches. The old value of RetReg is squirreled away at the base of the top stack object (the one that's about to get blown away). We just yank it outta there and perform the same kind of return that got us here in the first place. This simplicity is due to the fact that we never have to fetch a stack object on underflow. \begin{code} #define DO_RETURN_TEMPLATE(label, cont) \ STGFUN(label) \ { \ P_ temp; \ FB_ \ temp = STKO_LINK(StkOReg); \ RetReg = STKO_RETURN(StkOReg); \ StkOReg = temp; \ RestoreStackStgRegs(); \ JMP_(cont); \ FE_ \ } DO_RETURN_TEMPLATE(UnderflowDirectReturn, DIRECT(((P_)RetReg))) DO_RETURN_TEMPLATE(UnderflowVect0, ((P_)RetReg)[RVREL(0)]) DO_RETURN_TEMPLATE(UnderflowVect1, ((P_)RetReg)[RVREL(1)]) DO_RETURN_TEMPLATE(UnderflowVect2, ((P_)RetReg)[RVREL(2)]) DO_RETURN_TEMPLATE(UnderflowVect3, ((P_)RetReg)[RVREL(3)]) DO_RETURN_TEMPLATE(UnderflowVect4, ((P_)RetReg)[RVREL(4)]) DO_RETURN_TEMPLATE(UnderflowVect5, ((P_)RetReg)[RVREL(5)]) DO_RETURN_TEMPLATE(UnderflowVect6, ((P_)RetReg)[RVREL(6)]) DO_RETURN_TEMPLATE(UnderflowVect7, ((P_)RetReg)[RVREL(7)]) DO_RETURN_TEMPLATE(StackUnderflowEnterNode, EnterNodeCode) #else \end{code} In the parallel world, we may have to fetch the StkO from a remote location before we can load up the stack registers and perform the return. Our convention is that we load RetReg up with the exact continuation address (after a vector table lookup, if necessary), and tail-call the code to fetch the stack object. (Of course, if the stack object is already local, we then just jump to the continuation address.) \begin{code} STGFUN(CommonUnderflow) { P_ temp; FB_ temp = STKO_LINK(StkOReg); /* fprintf(stderr,"Stk Underflow from: %lx to: %lx size abandoned: %d\n",StkOReg,temp,STKO_CLOSURE_CTS_SIZE(StkOReg)); */ /* change the guy we are abandoning into something that will not be "interesting" on the mutables list. (As long as it is there, it will be scavenged in GC, and we cannot guarantee that it is still a "sane" StkO object). (And, besides, why continue to keep it [and all it pts to] alive?) Will & Phil 95/10 */ FREEZE_MUT_HDR(StkOReg, ImMutArrayOfPtrs_info); MUTUPLE_CLOSURE_SIZE(StkOReg) = MUTUPLE_VHS; StkOReg = temp; /* ToDo: Fetch the remote stack object here! */ RestoreStackStgRegs(); JMP_(RetReg); FE_ } #define DO_RETURN_TEMPLATE(label, cont) \ STGFUN(label) \ { \ FB_ \ RetReg = STKO_RETURN(StkOReg); \ RetReg = (StgRetAddr)(cont); \ LivenessReg = INFO_LIVENESS(InfoPtr); \ JMP_(CommonUnderflow); \ FE_ \ } DO_RETURN_TEMPLATE(UnderflowDirectReturn, DIRECT(((P_)RetReg))) DO_RETURN_TEMPLATE(UnderflowVect0, ((P_)RetReg)[RVREL(0)]) DO_RETURN_TEMPLATE(UnderflowVect1, ((P_)RetReg)[RVREL(1)]) DO_RETURN_TEMPLATE(UnderflowVect2, ((P_)RetReg)[RVREL(2)]) DO_RETURN_TEMPLATE(UnderflowVect3, ((P_)RetReg)[RVREL(3)]) DO_RETURN_TEMPLATE(UnderflowVect4, ((P_)RetReg)[RVREL(4)]) DO_RETURN_TEMPLATE(UnderflowVect5, ((P_)RetReg)[RVREL(5)]) DO_RETURN_TEMPLATE(UnderflowVect6, ((P_)RetReg)[RVREL(6)]) DO_RETURN_TEMPLATE(UnderflowVect7, ((P_)RetReg)[RVREL(7)]) STGFUN(PrimUnderflow) { FB_ RetReg = STKO_RETURN(StkOReg); RetReg = (StgRetAddr)DIRECT(((P_)RetReg)); LivenessReg = NO_LIVENESS; JMP_(CommonUnderflow); FE_ } /* * This one is similar, but isn't part of the return vector. It's only used * when we fall off of a stack chunk and want to enter Node rather than * returning through RetReg. (This occurs during UpdatePAP, when the updatee * isn't on the current stack chunk.) It can't be done with the template, * because R2 is dead, and R1 points to a PAP. Only R1 is live. */ STGFUN(StackUnderflowEnterNode) { FB_ RetReg = (StgRetAddr)(EnterNodeCode); LivenessReg = LIVENESS_R1; JMP_(CommonUnderflow); FE_ } #endif const W_ vtbl_Underflow[] = { /* "MAX_VECTORED_RTN" elements (see GhcConstants.lh) */ (W_) UnderflowVect0, (W_) UnderflowVect1, (W_) UnderflowVect2, (W_) UnderflowVect3, (W_) UnderflowVect4, (W_) UnderflowVect5, (W_) UnderflowVect6, (W_) UnderflowVect7 }; \end{code} \begin{code} IFN_(seqDirectReturn) { void *cont; FB_ RetReg = (StgRetAddr) SpB[BREL(0)]; cont = (void *) SpB[BREL(1)]; /* SpB += BREL(2); */ JMP_(cont); FE_ } /* NB: For direct returns to work properly, the name of the routine must be the same as the name of the vector table with vtbl_ removed and DirectReturn appended. This is all the mangler understands. */ const W_ vtbl_seq[] = { (W_) seqDirectReturn, (W_) seqDirectReturn, (W_) seqDirectReturn, (W_) seqDirectReturn, (W_) seqDirectReturn, (W_) seqDirectReturn, (W_) seqDirectReturn, (W_) seqDirectReturn }; #endif /* CONCURRENT */ \end{code}