+++ /dev/null
-%
-% (c) The GRASP/AQUA Project, Glasgow University, 1995 - 1996
-% Hans Wolfgang Loidl
-%
-% Time-stamp: <Sun Oct 19 1997 23:39:59 Stardate: [-30]0119.72 hwloidl>
-%
-%************************************************************************
-%* *
-\section[GranSim.lc]{Granularity Simulator Routines}
-%* *
-%************************************************************************
-
-Macros for dealing with the new and improved GA field for simulating
-parallel execution. Based on @CONCURRENT@ package. The GA field now
-contains a mask, where the n-th bit stands for the n-th processor,
-where this data can be found. In case of multiple copies, several bits
-are set. The total number of processors is bounded by @MAX_PROC@,
-which should be <= the length of a word in bits. -- HWL
-
-\begin{code}
-#if defined(GRAN) || defined(PAR)
-
-#ifndef _AIX
-#define NON_POSIX_SOURCE /* gettimeofday */
-#endif
-
-#include "rtsdefs.h"
-
-/* qaStaH nuq Sovpu' ngoqvam ghItlhpu'bogh nuv 'e' vItul */
-# if defined(HAVE_GETCLOCK)
-# if defined(HAVE_SYS_TIMERS_H)
-# define POSIX_4D9 1
-# include <sys/timers.h>
-# endif
-# else
-# if defined(HAVE_GETTIMEOFDAY)
-# if defined(HAVE_SYS_TIME_H)
-# include <sys/time.h>
-# endif
-# else
-# ifdef HAVE_TIME_H
-# include <time.h>
-# endif
-# endif
-# endif
-\end{code}
-
-
-%****************************************************************
-%* *
-\subsection[GranSim-data-types]{Basic data types and set-up variables for GranSim}
-%* *
-%****************************************************************
-
-\begin{code}
-
-/* See GranSim.lh for the definition of the enum gran_event_types */
-char *gran_event_names[] = {
- "START", "START(Q)",
- "STEALING", "STOLEN", "STOLEN(Q)",
- "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)",
- "SCHEDULE", "DESCHEDULE",
- "END",
- "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED",
- "ALLOC",
- "TERMINATE",
- "SYSTEM_START", "SYSTEM_END", /* only for debugging */
- "??"
-};
-
-#if defined(GRAN)
-char *proc_status_names[] = {
- "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy",
- "UnknownProcStatus"
-};
-
-#define RAND_MAX 0x7fffffff /* 2^31-1 = 0x80000000 - 1 (see lrand48(3) */
-
-unsigned CurrentProc = 0;
-rtsBool IgnoreEvents = rtsFalse; /* HACK only for testing */
-
-#if 0 && (defined(GCap) || defined(GCgn))
-closq ex_RBH_q = NULL;
-#endif
-#endif /* GRAN */
-\end{code}
-
-The following variables control the behaviour of GrAnSim. In general, there
-is one RTS option for enabling each of these features. In getting the
-desired setup of GranSim the following questions have to be answered:
-\begin{itemize}
-\item {\em Which scheduling algorithm} to use (@RTSflags.GranFlags.DoFairSchedule@)?
- Currently only unfair scheduling is supported.
-\item What to do when remote data is fetched (@RTSflags.GranFlags.DoReScheduleOnFetch@)?
- Either block and wait for the
- data or reschedule and do some other work.
- Thus, if this variable is true, asynchronous communication is
- modelled. Block on fetch mainly makes sense for incremental fetching.
-
- There is also a simplified fetch variant available
- (@RTSflags.GranFlags.SimplifiedFetch@). This variant does not use events to model
- communication. It is faster but the results will be less accurate.
-\item How aggressive to be in getting work after a reschedule on fetch
- (@RTSflags.GranFlags.FetchStrategy@)?
- This is determined by the so-called {\em fetching
- strategy\/}. Currently, there are four possibilities:
- \begin{enumerate}
- \item Only run a runnable thread.
- \item Turn a spark into a thread, if necessary.
- \item Steal a remote spark, if necessary.
- \item Steal a runnable thread from another processor, if necessary.
- \end{itemize}
- The variable @RTSflags.GranFlags.FetchStrategy@ determines how far to go in this list
- when rescheduling on a fetch.
-\item Should sparks or threads be stolen first when looking for work
- (@RTSflags.GranFlags.DoStealThreadsFirst@)?
- The default is to steal sparks first (much cheaper).
-\item Should the RTS use a lazy thread creation scheme
- (@RTSflags.GranFlags.DoAlwaysCreateThreads@)? By default yes i.e.\ sparks are only
- turned into threads when work is needed. Also note, that sparks
- can be discarded by the RTS (this is done in the case of an overflow
- of the spark pool). Setting @RTSflags.GranFlags.DoAlwaysCreateThreads@ to @True@ forces
- the creation of threads at the next possibility (i.e.\ when new work
- is demanded the next time).
-\item Should data be fetched closure-by-closure or in packets
- (@RTSflags.GranFlags.DoGUMMFetching@)? The default strategy is a GRIP-like incremental
- (i.e.\ closure-by-closure) strategy. This makes sense in a
- low-latency setting but is bad in a high-latency system. Setting
- @RTSflags.GranFlags.DoGUMMFetching@ to @True@ enables bulk (packet) fetching. Other
- parameters determine the size of the packets (@pack_buffer_size@) and the number of
- thunks that should be put into one packet (@RTSflags.GranFlags.ThunksToPack@).
-\item If there is no other possibility to find work, should runnable threads
- be moved to an idle processor (@RTSflags.GranFlags.DoThreadMigration@)? In any case, the
- RTS tried to get sparks (either local or remote ones) first. Thread
- migration is very expensive, since a whole TSO has to be transferred
- and probably data locality becomes worse in the process. Note, that
- the closure, which will be evaluated next by that TSO is not
- transferred together with the TSO (that might block another thread).
-\item Should the RTS distinguish between sparks created by local nodes and
- stolen sparks (@RTSflags.GranFlags.PreferSparksOfLocalNodes@)? The idea is to improve
- data locality by preferring sparks of local nodes (it is more likely
- that the data for those sparks is already on the local processor).
- However, such a distinction also imposes an overhead on the spark
- queue management, and typically a large number of sparks are
- generated during execution. By default this variable is set to @False@.
-\item Should the RTS use granularity control mechanisms? The idea of a
- granularity control mechanism is to make use of granularity
- information provided via annotation of the @par@ construct in order
- to prefer bigger threads when either turning a spark into a thread or
- when choosing the next thread to schedule. Currently, three such
- mechanisms are implemented:
- \begin{itemize}
- \item Cut-off: The granularity information is interpreted as a
- priority. If a threshold priority is given to the RTS, then
- only those sparks with a higher priority than the threshold
- are actually created. Other sparks are immediately discarded.
- This is similar to a usual cut-off mechanism often used in
- parallel programs, where parallelism is only created if the
- input data is lage enough. With this option, the choice is
- hidden in the RTS and only the threshold value has to be
- provided as a parameter to the runtime system.
- \item Priority Sparking: This mechanism keeps priorities for sparks
- and chooses the spark with the highest priority when turning
- a spark into a thread. After that the priority information is
- discarded. The overhead of this mechanism comes from
- maintaining a sorted spark queue.
- \item Priority Scheduling: This mechanism keeps the granularity
- information for threads, to. Thus, on each reschedule the
- largest thread is chosen. This mechanism has a higher
- overhead, as the thread queue is sorted, too.
- \end{itemize}
-\end{itemize}
-
-\begin{code}
-#if defined(GRAN)
-
-/* Do we need to reschedule following a fetch? */
-rtsBool NeedToReSchedule = rtsFalse;
-TIME TimeOfNextEvent, EndOfTimeSlice; /* checked from the threaded world! */
-/* I_ avoidedCS=0; */ /* Unused!! ToDo: Remake libraries and nuke this var */
-
-/* For internal use (event statistics) only */
-char *event_names[] =
- { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD",
- "MOVESPARK", "MOVETHREAD", "FINDWORK",
- "FETCHNODE", "FETCHREPLY",
- "GLOBALBLOCK", "UNBLOCKTHREAD"
- };
-
-# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
-I_ noOfEvents = 0;
-I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
-I_ fetch_misses = 0;
-I_ tot_low_pri_sparks = 0;
-
-I_ rs_sp_count=0, rs_t_count=0, ntimes_total=0, fl_total=0, no_of_steals=0;
-
-/* Variables for gathering packet and queue statistics */
-I_ tot_packets = 0, tot_packet_size = 0, tot_cuts = 0, tot_thunks = 0;
-I_ tot_sq_len = 0, tot_sq_probes = 0, tot_sparks = 0, withered_sparks = 0;
-I_ tot_add_threads = 0, tot_tq_len = 0, non_end_add_threads = 0;
-# endif
-
-# if defined(GRAN_COUNT)
-/* Count the number of updates that are done. Mainly for testing, but
- could be useful for other purposes, too. */
-I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0,
- BQ_lens = 0;
-# endif
-
-/* Prototypes */
-I_ HandleFetchRequest(P_, PROC, P_);
-/* void HandleFetchRequest(P_, PROC, P_); changed for GUMMFeching */
-static I_ blockFetch(P_ tso, PROC proc, P_ bh);
-
-#endif /* GRAN */
-\end{code}
-
-%****************************************************************
-%* *
-\subsection[global-address-op]{Global Address Operations}
-%* *
-%****************************************************************
-
-These functions perform operations on the global-address (ga) part
-of a closure. The ga is the only new field (1 word) in a closure introduced
-by GrAnSim. It serves as a bitmask, indicating on which processor
-the closure is residing. Since threads are described by Thread State
-Object (TSO), which is nothing but another kind of closure, this
-scheme allows gives placement information about threads.
-
-A ga is just a bitmask, so the operations on them are mainly bitmask
-manipulating functions. Note, that there are important macros like PROCS,
-IS_LOCAL_TO etc. They are defined in @GrAnSim.lh@.
-
-NOTE: In GrAnSim-light we don't maintain placement information. This
-allows to simulate an arbitrary number of processors. The price we have
-to be is the lack of costing any communication properly. In short,
-GrAnSim-light is meant to reveal the maximal parallelism in a program.
-From an implementation point of view the important thing is:
-{\em GrAnSim-light does not maintain global-addresses}.
-
-\begin{code}
-#if defined(GRAN)
-
-/* ga_to_proc returns the first processor marked in the bitmask ga.
- Normally only one bit in ga should be set. But for PLCs all bits
- are set. That shouldn't hurt since we only need IS_LOCAL_TO for PLCs */
-
-PROC
-ga_to_proc(W_ ga)
-{
- PROC i;
- for (i = 0; i < MAX_PROC && !IS_LOCAL_TO(ga, i); i++);
- return (i);
-}
-
-/* NB: This takes a *node* rather than just a ga as input */
-PROC
-where_is(P_ node)
-{ return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */
-
-rtsBool
-any_idle() {
- I_ i;
- rtsBool any_idle;
- for(i=0, any_idle=rtsFalse;
- !any_idle && i<RTSflags.GranFlags.proc;
- any_idle = any_idle || IS_IDLE(i), i++)
- {} ;
-}
-
-int
-idlers() {
- I_ i, j;
- for(i=0, j=0;
- i<RTSflags.GranFlags.proc;
- j += IS_IDLE(i)?1:0, i++)
- {} ;
- return j;
-}
-#endif /* GRAN */
-\end{code}
-
-%****************************************************************
-%* *
-\subsection[event-queue]{The Global Event Queue}
-%* *
-%****************************************************************
-
-The following routines implement an ADT of an event-queue (FIFO).
-ToDo: Put that in an own file(?)
-
-\begin{code}
-#if defined(GRAN)
-
-/* Pointer to the global event queue; events are currently malloc'ed */
-eventq EventHd = NULL;
-
-eventq
-get_next_event()
-{
- static eventq entry = NULL;
-
- if(EventHd == NULL)
- {
- fprintf(stderr,"No next event. This may be caused by a circular data dependency in the program.\n");
- EXIT(EXIT_FAILURE);
- }
-
- if(entry != NULL)
- free((char *)entry);
-
-# if defined(GRAN_CHECK) && defined(GRAN)
- if (RTSflags.GranFlags.debug & 0x20) { /* count events */
- noOfEvents++;
- event_counts[EVENT_TYPE(EventHd)]++;
- }
-# endif
-
- entry = EventHd;
- EventHd = EVENT_NEXT(EventHd);
- return(entry);
-}
-
-/* When getting the time of the next event we ignore CONTINUETHREAD events:
- we don't want to be interrupted before the end of the current time slice
- unless there is something important to handle.
-*/
-TIME
-get_time_of_next_event()
-{
- eventq event = EventHd;
-
- while (event != NULL && EVENT_TYPE(event)==CONTINUETHREAD) {
- event = EVENT_NEXT(event);
- }
- if(event == NULL)
- return ((TIME) 0);
- else
- return (EVENT_TIME(event));
-}
-
-/* ToDo: replace malloc/free with a free list */
-
-static
-insert_event(newentry)
-eventq newentry;
-{
- EVTTYPE evttype = EVENT_TYPE(newentry);
- eventq event, *prev;
-
- /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */
-
- /* Search the queue and insert at the right point:
- FINDWORK before everything, CONTINUETHREAD after everything.
-
- This ensures that we find any available work after all threads have
- executed the current cycle. This level of detail would normally be
- irrelevant, but matters for ridiculously low latencies...
- */
-
- /* Changed the ordering: Now FINDWORK comes after everything but
- CONTINUETHREAD. This makes sure that a MOVESPARK comes before a
- FINDWORK. This is important when a GranSimSparkAt happens and
- DoAlwaysCreateThreads is turned on. Also important if a GC occurs
- when trying to build a new thread (see much_spark) -- HWL 02/96 */
-
- if(EventHd == NULL)
- EventHd = newentry;
- else {
- for (event = EventHd, prev=&EventHd;
- event != NULL;
- prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event)) {
- switch (evttype) {
- case FINDWORK: if ( EVENT_TIME(event) < EVENT_TIME(newentry) ||
- ( (EVENT_TIME(event) == EVENT_TIME(newentry)) &&
- (EVENT_TYPE(event) != CONTINUETHREAD) ) )
- continue;
- else
- break;
- case CONTINUETHREAD: if ( EVENT_TIME(event) <= EVENT_TIME(newentry) )
- continue;
- else
- break;
- default: if ( EVENT_TIME(event) < EVENT_TIME(newentry) ||
- ((EVENT_TIME(event) == EVENT_TIME(newentry)) &&
- (EVENT_TYPE(event) == EVENT_TYPE(newentry))) )
- continue;
- else
- break;
- }
- /* Insert newentry here (i.e. before event) */
- *prev = newentry;
- EVENT_NEXT(newentry) = event;
- break;
- }
- if (event == NULL)
- *prev = newentry;
- }
-}
-
-void
-new_event(proc,creator,time,evttype,tso,node,spark)
-PROC proc, creator;
-TIME time;
-EVTTYPE evttype;
-P_ tso, node;
-sparkq spark;
-{
- eventq newentry = (eventq) stgMallocBytes(sizeof(struct event), "new_event");
-
- EVENT_PROC(newentry) = proc;
- EVENT_CREATOR(newentry) = creator;
- EVENT_TIME(newentry) = time;
- EVENT_TYPE(newentry) = evttype;
- EVENT_TSO(newentry) = tso;
- EVENT_NODE(newentry) = node;
- EVENT_SPARK(newentry) = spark;
- EVENT_GC_INFO(newentry) = 0;
- EVENT_NEXT(newentry) = NULL;
-
- insert_event(newentry);
-}
-
-void
-prepend_event(eventq event) /* put event at beginning of EventQueue */
-{ /* only used for GC! */
- EVENT_NEXT(event) = EventHd;
- EventHd = event;
-}
-
-eventq
-grab_event() /* undo prepend_event i.e. get the event */
-{ /* at the head of EventQ but don't free anything */
- eventq event = EventHd;
-
- if(EventHd == NULL) {
- fprintf(stderr,"No next event (in grab_event). This may be caused by a circular data dependency in the program.\n");
- EXIT(EXIT_FAILURE);
- }
-
- EventHd = EVENT_NEXT(EventHd);
- return (event);
-}
-
-void
-traverse_eventq_for_gc()
-{
- eventq event = EventHd;
- W_ bufsize;
- P_ closure, tso, buffer, bufptr;
- PROC proc, creator;
-
- /* Traverse eventq and replace every FETCHREPLY by a FETCHNODE for the
- orig closure (root of packed graph). This means that a graph, which is
- between processors at the time of GC is fetched again at the time when
- it would have arrived, had there been no GC. Slightly inaccurate but
- safe for GC.
- This is only needed for GUM style fetchng. */
- if (!RTSflags.GranFlags.DoGUMMFetching)
- return;
-
- for(event = EventHd; event!=NULL; event=EVENT_NEXT(event)) {
- if (EVENT_TYPE(event)==FETCHREPLY) {
- buffer = EVENT_NODE(event);
- ASSERT(buffer[PACK_FLAG_LOCN]==MAGIC_PACK_FLAG); /* It's a pack buffer */
- bufsize = buffer[PACK_SIZE_LOCN];
- closure= (P_)buffer[PACK_HDR_SIZE];
- tso = (P_)buffer[PACK_TSO_LOCN];
- proc = EVENT_PROC(event);
- creator = EVENT_CREATOR(event); /* similar to unpacking */
- for (bufptr=buffer+PACK_HDR_SIZE; bufptr<(buffer+bufsize);
- bufptr++) {
- if ( (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_SPEC_RBH_TYPE) ||
- (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_GEN_RBH_TYPE) ) {
- convertFromRBH((P_)*bufptr);
- }
- }
- free(buffer);
- EVENT_TYPE(event) = FETCHNODE;
- EVENT_PROC(event) = creator;
- EVENT_CREATOR(event) = proc;
- EVENT_NODE(event) = closure;
- EVENT_TSO(event) = tso;
- EVENT_GC_INFO(event) = 0;
- }
- }
-}
-
-void
-print_event(event)
-eventq event;
-{
-
- char str_tso[16], str_node[16];
-
- sprintf(str_tso,((EVENT_TSO(event)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"),
- EVENT_TSO(event));
- sprintf(str_node,((EVENT_NODE(event)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"),
- EVENT_NODE(event));
-
- if (event==NULL)
- fprintf(stderr,"Evt: NIL\n");
- else
- fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO %s (%x), node %s\n",
- event_names[EVENT_TYPE(event)],EVENT_TYPE(event),
- EVENT_PROC(event), EVENT_CREATOR(event), EVENT_TIME(event),
- str_tso, TSO_ID(EVENT_TSO(event)), str_node
- /*, EVENT_SPARK(event), EVENT_NEXT(event)*/ );
-
-}
-
-void
-print_eventq(hd)
-eventq hd;
-{
- eventq x;
-
- fprintf(stderr,"Event Queue with root at %x:\n",hd);
- for (x=hd; x!=NULL; x=EVENT_NEXT(x)) {
- print_event(x);
- }
-}
-
-void
-print_spark(spark)
- sparkq spark;
-{
- char str[16];
-
- sprintf(str,((SPARK_NODE(spark)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"),
- (W_) SPARK_NODE(spark));
-
- if (spark==NULL)
- fprintf(stderr,"Spark: NIL\n");
- else
- fprintf(stderr,"Spark: Node %8s, Name %#6lx, Exported %5s, Prev %#6x, Next %#6x\n",
- str, SPARK_NAME(spark),
- ((SPARK_EXPORTED(spark))?"True":"False"),
- SPARK_PREV(spark), SPARK_NEXT(spark) );
-}
-
-void
-print_sparkq(hd)
-sparkq hd;
-{
- sparkq x;
-
- fprintf(stderr,"Spark Queue with root at %x:\n",hd);
- for (x=hd; x!=NULL; x=SPARK_NEXT(x)) {
- print_spark(x);
- }
-}
-
-
-#endif /* GRAN */
-\end{code}
-
-%****************************************************************************
-%
-\subsection[entry-points]{Routines directly called from Haskell world}
-%
-%****************************************************************************
-
-The @GranSim...@ routines in here are directly called via macros from the
-threaded world.
-
-First some auxiliary routines.
-
-\begin{code}
-#if defined(GRAN)
-/* Take the current thread off the thread queue and thereby activate the */
-/* next thread. It's assumed that the next ReSchedule after this uses */
-/* NEW_THREAD as param. */
-/* This fct is called from GranSimBlock and GranSimFetch */
-
-void
-ActivateNextThread (PROC proc)
-{
- ASSERT(RunnableThreadsHd[proc]!=PrelBase_Z91Z93_closure);
-
- RunnableThreadsHd[proc] = TSO_LINK(RunnableThreadsHd[proc]);
- if(RunnableThreadsHd[proc]==PrelBase_Z91Z93_closure) {
- MAKE_IDLE(proc);
- RunnableThreadsTl[proc] = PrelBase_Z91Z93_closure;
- } else {
- CurrentTime[proc] += RTSflags.GranFlags.gran_threadcontextswitchtime;
- if (RTSflags.GranFlags.granSimStats &&
- (!RTSflags.GranFlags.Light || (RTSflags.GranFlags.debug & 0x20000)))
- DumpRawGranEvent(proc,0,GR_SCHEDULE,RunnableThreadsHd[proc],
- PrelBase_Z91Z93_closure,0);
- }
-}
-\end{code}
-
-Now the main stg-called routines:
-
-\begin{code}
-/* ------------------------------------------------------------------------ */
-/* The following GranSim... fcts are stg-called from the threaded world. */
-/* ------------------------------------------------------------------------ */
-
-/* Called from HEAP_CHK -- NB: node and liveness are junk here now.
- They are left temporarily to avoid complete recompilation.
- KH
-*/
-void
-GranSimAllocate(n,node,liveness)
-I_ n;
-P_ node;
-W_ liveness;
-{
- TSO_ALLOCS(CurrentTSO) += n;
- ++TSO_BASICBLOCKS(CurrentTSO);
-
- if (RTSflags.GranFlags.granSimStats_Heap) {
- DumpRawGranEvent(CurrentProc,0,GR_ALLOC,CurrentTSO,
- PrelBase_Z91Z93_closure,n);
- }
-
- TSO_EXECTIME(CurrentTSO) += RTSflags.GranFlags.gran_heapalloc_cost;
- CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_heapalloc_cost;
-}
-
-/*
- Subtract the values added above, if a heap check fails and
- so has to be redone.
-*/
-void
-GranSimUnallocate(n,node,liveness)
-W_ n;
-P_ node;
-W_ liveness;
-{
- TSO_ALLOCS(CurrentTSO) -= n;
- --TSO_BASICBLOCKS(CurrentTSO);
-
- TSO_EXECTIME(CurrentTSO) -= RTSflags.GranFlags.gran_heapalloc_cost;
- CurrentTime[CurrentProc] -= RTSflags.GranFlags.gran_heapalloc_cost;
-}
-
-/* NB: We now inline this code via GRAN_EXEC rather than calling this fct */
-void
-GranSimExec(ariths,branches,loads,stores,floats)
-W_ ariths,branches,loads,stores,floats;
-{
- W_ cost = RTSflags.GranFlags.gran_arith_cost*ariths +
- RTSflags.GranFlags.gran_branch_cost*branches +
- RTSflags.GranFlags.gran_load_cost * loads +
- RTSflags.GranFlags.gran_store_cost*stores +
- RTSflags.GranFlags.gran_float_cost*floats;
-
- TSO_EXECTIME(CurrentTSO) += cost;
- CurrentTime[CurrentProc] += cost;
-}
-
-
-/*
- Fetch the node if it isn't local
- -- result indicates whether fetch has been done.
-
- This is GRIP-style single item fetching.
-*/
-
-/* This function in Threads.lc is only needed for SimplifiedFetch */
-FetchNode PROTO((P_ node,PROC CurrentProc));
-
-I_
-GranSimFetch(node /* , liveness_mask */ )
-P_ node;
-/* I_ liveness_mask; */
-{
- if (RTSflags.GranFlags.Light) {
- /* Always reschedule in GrAnSim-Light to prevent one TSO from
- running off too far
- new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
- CONTINUETHREAD,CurrentTSO,node,NULL);
- */
- NeedToReSchedule = rtsFalse;
- return(0);
- }
-
- /* Note: once a node has been fetched, this test will be passed */
- if(!IS_LOCAL_TO(PROCS(node),CurrentProc))
- {
- /* Add mpacktime to the remote PE for the reply */
- {
- PROC p = where_is(node);
- TIME fetchtime;
-
-# ifdef GRAN_CHECK
- if ( ( RTSflags.GranFlags.debug & 0x40 ) &&
- p == CurrentProc )
- fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p);
-# endif /* GRAN_CHECK */
-
- CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
- /* NB: Fetch is counted on arrival (FETCHREPLY) */
-
- if (RTSflags.GranFlags.SimplifiedFetch)
- {
- FetchNode(node,CurrentProc);
- CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime+
- RTSflags.GranFlags.gran_fetchtime+
- RTSflags.GranFlags.gran_munpacktime;
- return(1);
- }
-
- fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p]) +
- RTSflags.GranFlags.gran_latency;
-
- new_event(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL);
- if (!RTSflags.GranFlags.DoReScheduleOnFetch)
- MAKE_FETCHING(CurrentProc);
- ++OutstandingFetches[CurrentProc];
-
- if (fetchtime<TimeOfNextEvent)
- TimeOfNextEvent = fetchtime;
-
- /* About to block */
- TSO_BLOCKEDAT(CurrentTSO) = CurrentTime[CurrentProc];
-
- if (RTSflags.GranFlags.DoReScheduleOnFetch)
- {
- /* Remove CurrentTSO from the queue
- -- assumes head of queue == CurrentTSO */
- if(!RTSflags.GranFlags.DoFairSchedule)
- {
- if(RTSflags.GranFlags.granSimStats)
- DumpRawGranEvent(CurrentProc,p,GR_FETCH,CurrentTSO,
- node,0);
-
- ActivateNextThread(CurrentProc);
-
-# if defined(GRAN_CHECK)
- if (RTSflags.GranFlags.debug & 0x10) {
- if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
- fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
- CurrentTSO,CurrentTime[CurrentProc]);
- EXIT(EXIT_FAILURE);
- } else {
- TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
- }
- }
-# endif
- TSO_LINK(CurrentTSO) = PrelBase_Z91Z93_closure;
- /* CurrentTSO = PrelBase_Z91Z93_closure; */
-
- /* ThreadQueueHd is now the next TSO to schedule or NULL */
- /* CurrentTSO is pointed to by the FETCHNODE event */
- }
- else /* fair scheduling currently not supported -- HWL */
- {
- fprintf(stderr,"Reschedule-on-fetch is not yet compatible with fair scheduling\n");
- EXIT(EXIT_FAILURE);
- }
- }
- else /* !RTSflags.GranFlags.DoReScheduleOnFetch */
- {
- /* Note: CurrentProc is still busy as it's blocked on fetch */
- if(RTSflags.GranFlags.granSimStats)
- DumpRawGranEvent(CurrentProc,p,GR_FETCH,CurrentTSO,node,0);
-
-# if defined(GRAN_CHECK)
- if (RTSflags.GranFlags.debug & 0x04)
- BlockedOnFetch[CurrentProc] = CurrentTSO; /*- rtsTrue; -*/
- if (RTSflags.GranFlags.debug & 0x10) {
- if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
- fprintf(stderr,"FETCHNODE: TSO 0x%x has fetch-mask set @ %d\n",
- CurrentTSO,CurrentTime[CurrentProc]);
- EXIT(EXIT_FAILURE);
- } else {
- TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
- }
- CurrentTSO = PrelBase_Z91Z93_closure;
- }
-# endif
- }
- CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
-
- /* Rescheduling is necessary */
- NeedToReSchedule = rtsTrue;
-
- return(1);
- }
- }
- return(0);
-}
-
-void
-GranSimSpark(local,node)
-W_ local;
-P_ node;
-{
- /* ++SparksAvail; Nope; do that in add_to_spark_queue */
- if(RTSflags.GranFlags.granSimStats_Sparks)
- DumpRawGranEvent(CurrentProc,(PROC)0,SP_SPARK,PrelBase_Z91Z93_closure,node,
- spark_queue_len(CurrentProc,ADVISORY_POOL)-1);
-
- /* Force the PE to take notice of the spark */
- if(RTSflags.GranFlags.DoAlwaysCreateThreads) {
- new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
- FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
- if (CurrentTime[CurrentProc]<TimeOfNextEvent)
- TimeOfNextEvent = CurrentTime[CurrentProc];
- }
-
- if(local)
- ++TSO_LOCALSPARKS(CurrentTSO);
- else
- ++TSO_GLOBALSPARKS(CurrentTSO);
-}
-
-void
-GranSimSparkAt(spark,where,identifier)
-sparkq spark;
-P_ where; /* This should be a node; alternatively could be a GA */
-I_ identifier;
-{
- PROC p = where_is(where);
- GranSimSparkAtAbs(spark,p,identifier);
-}
-
-void
-GranSimSparkAtAbs(spark,proc,identifier)
-sparkq spark;
-PROC proc;
-I_ identifier;
-{
- TIME exporttime;
-
- if ( spark == (sparkq)NULL) /* Note: Granularity control might have */
- return; /* turned a spark into a NULL. */
-
- /* ++SparksAvail; Nope; do that in add_to_spark_queue */
- if(RTSflags.GranFlags.granSimStats_Sparks)
- DumpRawGranEvent(proc,0,SP_SPARKAT,PrelBase_Z91Z93_closure,SPARK_NODE(spark),
- spark_queue_len(proc,ADVISORY_POOL));
-
- if (proc!=CurrentProc) {
- CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime;
- exporttime = (CurrentTime[proc] > CurrentTime[CurrentProc]?
- CurrentTime[proc]: CurrentTime[CurrentProc])
- + RTSflags.GranFlags.gran_latency;
- } else {
- exporttime = CurrentTime[CurrentProc];
- }
-
- if ( RTSflags.GranFlags.Light )
- /* Need CurrentTSO in event field to associate costs with creating
- spark even in a GrAnSim Light setup */
- new_event(proc,CurrentProc,exporttime,
- MOVESPARK,CurrentTSO,PrelBase_Z91Z93_closure,spark);
- else
- new_event(proc,CurrentProc,exporttime,
- MOVESPARK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,spark);
- /* Bit of a hack to treat placed sparks the same as stolen sparks */
- ++OutstandingFishes[proc];
-
- /* Force the PE to take notice of the spark (FINDWORK is put after a
- MOVESPARK into the sparkq!) */
- if(RTSflags.GranFlags.DoAlwaysCreateThreads) {
- new_event(CurrentProc,CurrentProc,exporttime+1,
- FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL);
- }
-
- if (exporttime<TimeOfNextEvent)
- TimeOfNextEvent = exporttime;
-
- if (proc!=CurrentProc) {
- CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime;
- ++TSO_GLOBALSPARKS(CurrentTSO);
- } else {
- ++TSO_LOCALSPARKS(CurrentTSO);
- }
-}
-
-/* This function handles local and global blocking */
-/* It's called either from threaded code (RBH_entry, BH_entry etc) or */
-/* from blockFetch when trying to fetch an BH or RBH */
-
-void
-GranSimBlock(P_ tso, PROC proc, P_ node)
-{
- PROC node_proc = where_is(node);
-
- ASSERT(tso==RunnableThreadsHd[proc]);
-
- if(RTSflags.GranFlags.granSimStats)
- DumpRawGranEvent(proc,node_proc,GR_BLOCK,tso,node,0);
-
- ++TSO_BLOCKCOUNT(tso);
- /* Distinction between local and global block is made in blockFetch */
- TSO_BLOCKEDAT(tso) = CurrentTime[proc];
-
- CurrentTime[proc] += RTSflags.GranFlags.gran_threadqueuetime;
- ActivateNextThread(proc);
- TSO_LINK(tso) = PrelBase_Z91Z93_closure; /* not really necessary; only for testing */
-}
-
-#endif /* GRAN */
-
-\end{code}
-
-%****************************************************************************
-%
-\subsection[GrAnSim-profile]{Writing profiling info for GrAnSim}
-%
-%****************************************************************************
-
-Event dumping routines.
-
-\begin{code}
-
-/*
- * If you're not using GNUC and you're on a 32-bit machine, you're
- * probably out of luck here. However, since CONCURRENT currently
- * requires GNUC, I'm not too worried about it. --JSM
- */
-
-#if !defined(GRAN)
-
-static ullong startTime = 0;
-
-ullong
-msTime(STG_NO_ARGS)
-{
-# ifdef HAVE_GETCLOCK
- struct timespec tv;
-
- if (getclock(TIMEOFDAY, &tv) != 0) {
- fflush(stdout);
- fprintf(stderr, "Clock failed\n");
- EXIT(EXIT_FAILURE);
- }
- return tv.tv_sec * LL(1000) + tv.tv_nsec / LL(1000000) - startTime;
-# else
-# ifdef HAVE_GETTIMEOFDAY
- struct timeval tv;
-
- if (gettimeofday(&tv, NULL) != 0) {
- fflush(stdout);
- fprintf(stderr, "Clock failed\n");
- EXIT(EXIT_FAILURE);
- }
- return tv.tv_sec * LL(1000) + tv.tv_usec / LL(1000) - startTime;
-# else
- time_t t;
- if ((t = time(NULL)) == (time_t) -1) {
- fflush(stdout);
- fprintf(stderr, "Clock failed\n");
- EXIT(EXIT_FAILURE);
- }
- return t * LL(1000);
-# endif
-# endif
-}
-
-#endif /* !GRAN */
-
-#if defined(GRAN) || defined(PAR)
-
-void
-DumpGranEvent(name, tso)
-enum gran_event_types name;
-P_ tso;
-{
- DumpRawGranEvent(CURRENT_PROC, (PROC)0, name, tso, PrelBase_Z91Z93_closure, 0);
-}
-
-void
-DumpRawGranEvent(proc, p, name, tso, node, len)
-PROC proc, p; /* proc ... where it happens; p ... where node lives */
-enum gran_event_types name;
-P_ tso, node;
-I_ len;
-{
- W_ id;
- char time_string[500], node_str[16]; /*ToDo: kill magic constants */
- ullong_format_string(TIME_ON_PROC(proc), time_string, rtsFalse/*no commas!*/);
-#if defined(GRAN)
- if (RTSflags.GranFlags.granSimStats_suppressed)
- return;
-#endif
-
- id = tso == NULL ? -1 : TSO_ID(tso);
- if (node==PrelBase_Z91Z93_closure)
- strcpy(node_str,"________"); /* "PrelBase_Z91Z93_closure"); */
- else
- sprintf(node_str,"0x%-6lx",node);
-
- if (name > GR_EVENT_MAX)
- name = GR_EVENT_MAX;
-
- if(GRANSIMSTATS_BINARY)
- /* ToDo: fix code for writing binary GrAnSim statistics */
- switch (name) {
- case GR_START:
- case GR_STARTQ:
- grputw(name);
- grputw(proc);
- abort(); /* die please: a single word */
- /* doesn't represent long long times */
- grputw(TIME_ON_PROC(proc));
- grputw((W_)node);
- break;
- case GR_FETCH:
- case GR_REPLY:
- case GR_BLOCK:
- grputw(name);
- grputw(proc);
- abort(); /* die please: a single word */
- /* doesn't represent long long times */
- grputw(TIME_ON_PROC(proc)); /* this line is bound to */
- grputw(id); /* do the wrong thing */
- break;
- default:
- grputw(name);
- grputw(proc);
- abort(); /* die please: a single word */
- /* doesn't represent long long times */
- grputw(TIME_ON_PROC(proc));
- grputw((W_)node);
- }
- else
- switch (name) {
- case GR_START:
- case GR_STARTQ:
- /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */
- /* using spark queue length as optional argument ^^^^^^^^^ */
- fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n",
- /* using spark name as optional argument ^^^^^^ */
- proc,time_string,gran_event_names[name],
- id,node_str,len);
- break;
- case GR_FETCH:
- case GR_REPLY:
- case GR_BLOCK:
- case GR_STOLEN:
- case GR_STOLENQ:
- fprintf(gr_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n",
- proc, time_string, gran_event_names[name],
- id,node_str,p);
- break;
- case GR_RESUME:
- case GR_RESUMEQ:
- case GR_SCHEDULE:
- case GR_DESCHEDULE:
- fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx \n",
- proc,time_string,gran_event_names[name],id);
- break;
- case GR_STEALING:
- fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \t(by %2u)\n",
- proc,time_string,gran_event_names[name],id,p);
- break;
- case GR_ALLOC:
- fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \tallocating %u words\n",
- proc,time_string,gran_event_names[name],id,len);
- break;
- default:
- fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n",
- proc,time_string,gran_event_names[name],id,node_str,len);
- }
-}
-
-
-#if defined(GRAN)
-/* Only needed for special dynamic spark labelling support */
-void
-DumpStartEventAt(time, proc, p, name, tso, node, len)
-TIME time;
-PROC proc, p; /* proc ... where it happens; p ... where node lives */
-enum gran_event_types name;
-P_ tso, node;
-I_ len;
-{
- W_ id;
- char time_string[500], node_str[16]; /*ToDo: kill magic constants */
- ullong_format_string(time, time_string, rtsFalse/*no commas!*/);
- /* ^^^^ only important change to DumpRawGranEvent */
- if (RTSflags.GranFlags.granSimStats_suppressed)
- return;
-
- id = tso == NULL ? -1 : TSO_ID(tso);
- if (node==PrelBase_Z91Z93_closure)
- strcpy(node_str,"________"); /* "Z91Z93_closure"); */
- else
- sprintf(node_str,"0x%-6lx",node);
-
- if (name > GR_EVENT_MAX)
- name = GR_EVENT_MAX;
-
- if(GRANSIMSTATS_BINARY)
- /* ToDo: fix code for writing binary GrAnSim statistics */
- switch (name) {
- case GR_START:
- case GR_STARTQ:
- grputw(name);
- grputw(proc);
- abort(); /* die please: a single word */
- /* doesn't represent long long times */
- grputw(TIME_ON_PROC(proc));
- grputw((W_)node);
- break;
- default:
- fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n",
- gran_event_names[name]);
- }
- else
- switch (name) {
- case GR_START:
- case GR_STARTQ:
- /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */
- /* using spark queue length as optional argument ^^^^^^^^^ */
- fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n",
- /* using spark name as optional argument ^^^^^^ */
- proc,time_string,gran_event_names[name],
- id,node_str,len);
- break;
- default:
- fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n",
- gran_event_names[name]);
- }
-}
-#endif /* GRAN */
-
-void
-DumpGranInfo(proc, tso, mandatory_thread)
-PROC proc;
-P_ tso;
-rtsBool mandatory_thread;
-{
- char time_string[500]; /* ToDo: kill magic constant */
- ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
-
-#if defined(GRAN)
- if (RTSflags.GranFlags.granSimStats_suppressed)
- return;
-#endif
-
- if (GRANSIMSTATS_BINARY) {
- grputw(GR_END);
- grputw(proc);
- abort(); /* die please: a single word doesn't represent long long times */
- grputw(CURRENT_TIME); /* this line is bound to fail */
- grputw(TSO_ID(tso));
-#ifdef PAR
- grputw(0);
- grputw(0);
- grputw(0);
- grputw(0);
- grputw(0);
- grputw(0);
- grputw(0);
- grputw(0);
- grputw(0);
- grputw(0);
- grputw(0);
- grputw(0);
-#else
- grputw(TSO_SPARKNAME(tso));
- grputw(TSO_STARTEDAT(tso));
- grputw(TSO_EXPORTED(tso));
- grputw(TSO_BASICBLOCKS(tso));
- grputw(TSO_ALLOCS(tso));
- grputw(TSO_EXECTIME(tso));
- grputw(TSO_BLOCKTIME(tso));
- grputw(TSO_BLOCKCOUNT(tso));
- grputw(TSO_FETCHTIME(tso));
- grputw(TSO_FETCHCOUNT(tso));
- grputw(TSO_LOCALSPARKS(tso));
- grputw(TSO_GLOBALSPARKS(tso));
-#endif
- grputw(mandatory_thread);
- } else {
-
- /*
- * NB: DumpGranEvent cannot be used because PE may be wrong
- * (as well as the extra info)
- */
- fprintf(gr_file, "PE %2u [%s]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n"
- ,proc
- ,time_string
- ,TSO_ID(tso)
- ,TSO_SPARKNAME(tso)
- ,TSO_STARTEDAT(tso)
- ,TSO_EXPORTED(tso) ? 'T' : 'F'
- ,TSO_BASICBLOCKS(tso)
- ,TSO_ALLOCS(tso)
- ,TSO_EXECTIME(tso)
- ,TSO_BLOCKTIME(tso)
- ,TSO_BLOCKCOUNT(tso)
- ,TSO_FETCHTIME(tso)
- ,TSO_FETCHCOUNT(tso)
- ,TSO_LOCALSPARKS(tso)
- ,TSO_GLOBALSPARKS(tso)
- ,mandatory_thread ? 'T' : 'F'
- );
- }
-}
-
-void
-DumpTSO(tso)
-P_ tso;
-{
- fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n"
- ,tso
- ,TSO_NAME(tso)
- ,TSO_ID(tso)
- ,TSO_LINK(tso)
- ,TSO_TYPE(tso)==T_MAIN?"MAIN":
- TSO_TYPE(tso)==T_FAIL?"FAIL":
- TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
- TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
- "???"
- );
-
- fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx), SWITCH %lx0x\n"
- ,TSO_PC1(tso)
- ,TSO_PC2(tso)
- ,TSO_ARG1(tso)
- /* ,TSO_ARG2(tso) */
- ,TSO_SWITCH(tso)
- );
-
- fprintf(gr_file,"TSO %lx: SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n"
- ,TSO_ID(tso)
- ,TSO_SPARKNAME(tso)
- ,TSO_STARTEDAT(tso)
- ,TSO_EXPORTED(tso)?'T':'F'
- ,TSO_BASICBLOCKS(tso)
- ,TSO_ALLOCS(tso)
- ,TSO_EXECTIME(tso)
- ,TSO_BLOCKTIME(tso)
- ,TSO_BLOCKCOUNT(tso)
- ,TSO_FETCHTIME(tso)
- ,TSO_FETCHCOUNT(tso)
- ,TSO_LOCALSPARKS(tso)
- ,TSO_GLOBALSPARKS(tso)
- );
-}
-
-/*
- Output a terminate event and an 8-byte time.
-*/
-
-void
-grterminate(v)
-TIME v;
-{
-#if defined(GRAN)
- if (RTSflags.GranFlags.granSimStats_suppressed)
- return;
-#endif
-
- DumpGranEvent(GR_TERMINATE, PrelBase_Z91Z93_closure);
-
- if (sizeof(TIME) == 4) {
- putc('\0', gr_file);
- putc('\0', gr_file);
- putc('\0', gr_file);
- putc('\0', gr_file);
- } else {
- putc(v >> 56l, gr_file);
- putc((v >> 48l) & 0xffl, gr_file);
- putc((v >> 40l) & 0xffl, gr_file);
- putc((v >> 32l) & 0xffl, gr_file);
- }
- putc((v >> 24l) & 0xffl, gr_file);
- putc((v >> 16l) & 0xffl, gr_file);
- putc((v >> 8l) & 0xffl, gr_file);
- putc(v & 0xffl, gr_file);
-}
-
-/*
- Length-coded output: first 3 bits contain length coding
-
- 00x 1 byte
- 01x 2 bytes
- 10x 4 bytes
- 110 8 bytes
- 111 5 or 9 bytes
-*/
-
-void
-grputw(v)
-TIME v;
-{
-#if defined(GRAN)
- if (RTSflags.GranFlags.granSimStats_suppressed)
- return;
-#endif
-
- if (v <= 0x3fl) { /* length v = 1 byte */
- fputc(v & 0x3f, gr_file);
- } else if (v <= 0x3fffl) { /* length v = 2 byte */
- fputc((v >> 8l) | 0x40l, gr_file);
- fputc(v & 0xffl, gr_file);
- } else if (v <= 0x3fffffffl) { /* length v = 4 byte */
- fputc((v >> 24l) | 0x80l, gr_file);
- fputc((v >> 16l) & 0xffl, gr_file);
- fputc((v >> 8l) & 0xffl, gr_file);
- fputc(v & 0xffl, gr_file);
- } else if (sizeof(TIME) == 4) {
- fputc(0x70, gr_file);
- fputc((v >> 24l) & 0xffl, gr_file);
- fputc((v >> 16l) & 0xffl, gr_file);
- fputc((v >> 8l) & 0xffl, gr_file);
- fputc(v & 0xffl, gr_file);
- } else {
- if (v <= 0x3fffffffffffffl)
- putc((v >> 56l) | 0x60l, gr_file);
- else {
- putc(0x70, gr_file);
- putc((v >> 56l) & 0xffl, gr_file);
- }
-
- putc((v >> 48l) & 0xffl, gr_file);
- putc((v >> 40l) & 0xffl, gr_file);
- putc((v >> 32l) & 0xffl, gr_file);
- putc((v >> 24l) & 0xffl, gr_file);
- putc((v >> 16l) & 0xffl, gr_file);
- putc((v >> 8l) & 0xffl, gr_file);
- putc(v & 0xffl, gr_file);
- }
-}
-
-#endif /* GRAN || PAR */
-\end{code}
-
-%****************************************************************************
-%
-\subsection[gr-simulation]{Granularity Simulation}
-%
-%****************************************************************************
-
-General routines for GranSim. Mainly, startup and shutdown routines, called
-from @main.lc@.
-
-\begin{code}
-#if defined(GRAN)
-FILE *gr_file = NULL;
-char gr_filename[STATS_FILENAME_MAXLEN];
-/* I_ do_gr_sim = 0; */ /* In GrAnSim setup always do simulation */
-
-int
-init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
-char *prog_argv[], *rts_argv[];
-int prog_argc, rts_argc;
-{
- I_ i;
-
- char *extension = RTSflags.GranFlags.granSimStats_Binary ? "gb" : "gr";
-
- if (RTSflags.GranFlags.granSimStats_suppressed)
- return;
-
- sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension);
-
- if ((gr_file = fopen(gr_filename, "w")) == NULL) {
- fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename);
- EXIT(EXIT_FAILURE);
- }
-# if 0 /* that's obsolete now, I think -- HWL */
- if (RTSflags.GranFlags.DoReScheduleOnFetch)
- setbuf(gr_file, NULL);
-# endif
-
- fputs("Granularity Simulation for ", gr_file);
- for (i = 0; i < prog_argc; ++i) {
- fputs(prog_argv[i], gr_file);
- fputc(' ', gr_file);
- }
-
- if (rts_argc > 0) {
- fputs("+RTS ", gr_file);
-
- for (i = 0; i < rts_argc; ++i) {
- fputs(rts_argv[i], gr_file);
- fputc(' ', gr_file);
- }
- }
-
- fputs("\nStart time: ", gr_file);
- fputs(time_str(), gr_file); /* defined in main.lc */
- fputc('\n', gr_file);
-
- fputs("\n\n--------------------\n\n", gr_file);
-
- fputs("General Parameters:\n\n", gr_file);
-
- if (RTSflags.GranFlags.Light)
- fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n",
- RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair",
- RTSflags.GranFlags.DoThreadMigration?"":"Don't ",
- RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
- RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" :
- RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" :
- "Block on Fetch");
- else
- fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n",
- RTSflags.GranFlags.proc,RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair",
- RTSflags.GranFlags.DoThreadMigration?"":"Don't ",
- RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
- RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" :
- RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" :
- "Block on Fetch");
-
- if (RTSflags.GranFlags.DoGUMMFetching)
- if (RTSflags.GranFlags.ThunksToPack)
- fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n",
- RTSflags.GranFlags.ThunksToPack,
- RTSflags.GranFlags.packBufferSize);
- else
- fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n",
- RTSflags.GranFlags.packBufferSize);
- else
- fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n");
-
- fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n",
- RTSflags.GranFlags.FetchStrategy,
- RTSflags.GranFlags.FetchStrategy==0 ?
- " block (block-on-fetch)":
- RTSflags.GranFlags.FetchStrategy==1 ?
- "only run runnable threads":
- RTSflags.GranFlags.FetchStrategy==2 ?
- "create threads only from local sparks":
- RTSflags.GranFlags.FetchStrategy==3 ?
- "create threads from local or global sparks":
- RTSflags.GranFlags.FetchStrategy==4 ?
- "create sparks and steal threads if necessary":
- "unknown");
-
- if (RTSflags.GranFlags.DoPrioritySparking)
- fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n");
-
- if (RTSflags.GranFlags.DoPriorityScheduling)
- fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n");
-
- fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n",
- RTSflags.GranFlags.gran_threadcreatetime,
- RTSflags.GranFlags.gran_threadqueuetime);
- fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n",
- RTSflags.GranFlags.gran_threaddescheduletime,
- RTSflags.GranFlags.gran_threadscheduletime);
- fprintf(gr_file, "Thread Context-Switch Time %lu\n",
- RTSflags.GranFlags.gran_threadcontextswitchtime);
- fputs("\n\n--------------------\n\n", gr_file);
-
- fputs("Communication Metrics:\n\n", gr_file);
- fprintf(gr_file,
- "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n",
- RTSflags.GranFlags.gran_latency,
- RTSflags.GranFlags.gran_additional_latency,
- RTSflags.GranFlags.gran_fetchtime,
- RTSflags.GranFlags.gran_gunblocktime,
- RTSflags.GranFlags.gran_lunblocktime);
- fprintf(gr_file,
- "Message Creation %lu (+ %lu after send), Message Read %lu\n",
- RTSflags.GranFlags.gran_mpacktime,
- RTSflags.GranFlags.gran_mtidytime,
- RTSflags.GranFlags.gran_munpacktime);
- fputs("\n\n--------------------\n\n", gr_file);
-
- fputs("Instruction Metrics:\n\n", gr_file);
- fprintf(gr_file, "Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n",
- RTSflags.GranFlags.gran_arith_cost,
- RTSflags.GranFlags.gran_branch_cost,
- RTSflags.GranFlags.gran_load_cost,
- RTSflags.GranFlags.gran_store_cost,
- RTSflags.GranFlags.gran_float_cost,
- RTSflags.GranFlags.gran_heapalloc_cost);
- fputs("\n\n++++++++++++++++++++\n\n", gr_file);
-
- if (RTSflags.GranFlags.granSimStats_Binary)
- grputw(sizeof(TIME));
-
- return (0);
-}
-
-void
-end_gr_simulation(STG_NO_ARGS)
-{
- char time_string[500]; /* ToDo: kill magic constant */
- ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
-
- if (RTSflags.GranFlags.granSimStats_suppressed)
- return;
-
-#if defined(GRAN_CHECK) && defined(GRAN)
- /* Print event stats */
- if (RTSflags.GranFlags.debug & 0x20) {
- int i;
-
- fprintf(stderr,"Event statistics (number of events: %d):\n",
- noOfEvents);
- for (i=0; i<=MAX_EVENT; i++) {
- fprintf(stderr," %s (%d): \t%ld \t%f%%\t%f%%\n",
- event_names[i],i,event_counts[i],
- (float)(100*event_counts[i])/(float)(noOfEvents),
- (i==CONTINUETHREAD ? 0.0 :
- (float)(100*(event_counts[i])/(float)(noOfEvents-event_counts[CONTINUETHREAD])) ));
- }
- fprintf(stderr,"Randomized steals: %u sparks, %u threads \n \t(Sparks: #%u (avg ntimes=%f; avg fl=%f) \n",
- rs_sp_count, rs_t_count, no_of_steals,
- (float)ntimes_total/(float)STG_MAX(no_of_steals,1),
- (float)fl_total/(float)STG_MAX(no_of_steals,1));
- fprintf(stderr,"Moved sparks: %d Withered sparks: %d (%.2f %%)\n",
- tot_sparks,withered_sparks,
- ( tot_sparks == 0 ? 0 :
- (float)(100*withered_sparks)/(float)(tot_sparks)) );
- /* Print statistics about priority sparking */
- if (RTSflags.GranFlags.DoPrioritySparking) {
- fprintf(stderr,"About Priority Sparking:\n");
- fprintf(stderr," Total no. NewThreads: %d Avg. spark queue len: %.2f \n", tot_sq_probes, (float)tot_sq_len/(float)tot_sq_probes);
- }
- /* Print statistics about priority sparking */
- if (RTSflags.GranFlags.DoPriorityScheduling) {
- fprintf(stderr,"About Priority Scheduling:\n");
- fprintf(stderr," Total no. of StartThreads: %d (non-end: %d) Avg. thread queue len: %.2f\n",
- tot_add_threads, non_end_add_threads,
- (float)tot_tq_len/(float)tot_add_threads);
- }
- /* Print packet statistics if GUMM fetching is turned on */
- if (RTSflags.GranFlags.DoGUMMFetching) {
- fprintf(stderr,"Packet statistcs:\n");
- fprintf(stderr," Total no. of packets: %d Avg. packet size: %.2f \n", tot_packets, (float)tot_packet_size/(float)tot_packets);
- fprintf(stderr," Total no. of thunks: %d Avg. thunks/packet: %.2f \n", tot_thunks, (float)tot_thunks/(float)tot_packets);
- fprintf(stderr," Total no. of cuts: %d Avg. cuts/packet: %.2f\n", tot_cuts, (float)tot_cuts/(float)tot_packets);
- /*
- if (closure_queue_overflows>0)
- fprintf(stderr," Number of closure queue overflows: %u\n",
- closure_queue_overflows);
- */
- }
- }
-
- if (RTSflags.GranFlags.PrintFetchMisses)
- fprintf(stderr,"Number of fetch misses: %d\n",fetch_misses);
-
-# if defined(GRAN_COUNT)
- fprintf(stderr,"Update count statistics:\n");
- fprintf(stderr," Total number of updates: %u\n",nUPDs);
- fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n",
- nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ);
- fprintf(stderr," Number of PAPs: %u\n",nPAPs);
-# endif
-
-#endif /* GRAN_CHECK */
-
- fprintf(stderr, "Simulation finished after @ %s @ cycles. Look at %s for details.\n",
- time_string,gr_filename);
- if (RTSflags.GranFlags.granSimStats)
- fclose(gr_file);
-}
-#elif defined(PAR)
-FILE *gr_file = NULL;
-char gr_filename[STATS_FILENAME_MAXLEN];
-
-/* I_ do_sp_profile = 0; */
-
-void
-init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv)
- char *prog_argv[], *rts_argv[];
- int prog_argc, rts_argc;
-{
- int i;
-
- char *extension = RTSflags.ParFlags.granSimStats_Binary ? "gb" : "gr";
-
- sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension);
-
- if ((gr_file = fopen(gr_filename, "w")) == NULL) {
- fprintf(stderr, "Can't open activity report file %s\n", gr_filename);
- EXIT(EXIT_FAILURE);
- }
-
- for (i = 0; i < prog_argc; ++i) {
- fputs(prog_argv[i], gr_file);
- fputc(' ', gr_file);
- }
-
- if (rts_argc > 0) {
- fputs("+RTS ", gr_file);
-
- for (i = 0; i < rts_argc; ++i) {
- fputs(rts_argv[i], gr_file);
- fputc(' ', gr_file);
- }
- }
- fputc('\n', gr_file);
-
- fputs("Start-Time: ", gr_file);
- fputs(time_str(), gr_file); /* defined in main.lc */
- fputc('\n', gr_file);
-
- startTime = CURRENT_TIME;
-
- if (startTime > LL(1000000000)) {
- /* This shouldn't overflow twice */
- fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE,
- (TIME) (startTime / LL(1000000000)),
- (TIME) (startTime % LL(1000000000)));
- } else {
- fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime);
- }
-
- if (RTSflags.ParFlags.granSimStats_Binary)
- grputw(sizeof(TIME));
-}
-#endif /* PAR */
-
-#endif /* GRAN || PAR */
-\end{code}
-
-