X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fruntime%2Fmain%2FGranSim.lc;h=cdaee56da61a93fcdd0ff31427e77662fcc9a6f4;hb=967cc47f37cb93a5e2b6df7822c9a646f0428247;hp=de603ddbc2cebe1a0a9654619959493a65dec20f;hpb=e7d21ee4f8ac907665a7e170c71d59e13a01da09;p=ghc-hetmet.git diff --git a/ghc/runtime/main/GranSim.lc b/ghc/runtime/main/GranSim.lc index de603dd..cdaee56 100644 --- a/ghc/runtime/main/GranSim.lc +++ b/ghc/runtime/main/GranSim.lc @@ -1,5 +1,8 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1995 +% (c) The GRASP/AQUA Project, Glasgow University, 1995 - 1996 +% Hans Wolfgang Loidl +% +% Time-stamp: % %************************************************************************ %* * @@ -17,47 +20,239 @@ which should be <= the length of a word in bits. -- HWL \begin{code} #if defined(GRAN) || defined(PAR) +#ifndef _AIX #define NON_POSIX_SOURCE /* gettimeofday */ +#endif -#include "rtsdefs.h" +#include "rtsdefs.h" + +/* qaStaH nuq Sovpu' ngoqvam ghItlhpu'bogh nuv 'e' vItul */ +# if defined(HAVE_GETCLOCK) +# if defined(HAVE_SYS_TIMERS_H) +# define POSIX_4D9 1 +# include +# endif +# else +# if defined(HAVE_GETTIMEOFDAY) +# if defined(HAVE_SYS_TIME_H) +# include +# endif +# else +# ifdef HAVE_TIME_H +# include +# endif +# endif +# endif +\end{code} -#ifdef HAVE_GETCLOCK +%**************************************************************** +%* * +\subsection[GranSim-data-types]{Basic data types and set-up variables for GranSim} +%* * +%**************************************************************** -#ifdef HAVE_SYS_TIMERS_H -#define POSIX_4D9 1 -#include -#endif +\begin{code} -#else -#ifdef HAVE_GETTIMEOFDAY +/* See GranSim.lh for the definition of the enum gran_event_types */ +char *gran_event_names[] = { + "START", "START(Q)", + "STEALING", "STOLEN", "STOLEN(Q)", + "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)", + "SCHEDULE", "DESCHEDULE", + "END", + "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED", + "ALLOC", + "TERMINATE", + "SYSTEM_START", "SYSTEM_END", /* only for debugging */ + "??" +}; -#ifdef HAVE_SYS_TIME_H -#include -#endif +#if defined(GRAN) +char *proc_status_names[] = { + "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy", + "UnknownProcStatus" +}; -#else +#define RAND_MAX 0x7fffffff /* 2^31-1 = 0x80000000 - 1 (see lrand48(3) */ -#ifdef HAVE_TIME_H -#include -#endif +unsigned CurrentProc = 0; +rtsBool IgnoreEvents = rtsFalse; /* HACK only for testing */ +#if 0 && (defined(GCap) || defined(GCgn)) +closq ex_RBH_q = NULL; #endif -#endif +#endif /* GRAN */ +\end{code} + +The following variables control the behaviour of GrAnSim. In general, there +is one RTS option for enabling each of these features. In getting the +desired setup of GranSim the following questions have to be answered: +\begin{itemize} +\item {\em Which scheduling algorithm} to use (@RTSflags.GranFlags.DoFairSchedule@)? + Currently only unfair scheduling is supported. +\item What to do when remote data is fetched (@RTSflags.GranFlags.DoReScheduleOnFetch@)? + Either block and wait for the + data or reschedule and do some other work. + Thus, if this variable is true, asynchronous communication is + modelled. Block on fetch mainly makes sense for incremental fetching. + + There is also a simplified fetch variant available + (@RTSflags.GranFlags.SimplifiedFetch@). This variant does not use events to model + communication. It is faster but the results will be less accurate. +\item How aggressive to be in getting work after a reschedule on fetch + (@RTSflags.GranFlags.FetchStrategy@)? + This is determined by the so-called {\em fetching + strategy\/}. Currently, there are four possibilities: + \begin{enumerate} + \item Only run a runnable thread. + \item Turn a spark into a thread, if necessary. + \item Steal a remote spark, if necessary. + \item Steal a runnable thread from another processor, if necessary. + \end{itemize} + The variable @RTSflags.GranFlags.FetchStrategy@ determines how far to go in this list + when rescheduling on a fetch. +\item Should sparks or threads be stolen first when looking for work + (@RTSflags.GranFlags.DoStealThreadsFirst@)? + The default is to steal sparks first (much cheaper). +\item Should the RTS use a lazy thread creation scheme + (@RTSflags.GranFlags.DoAlwaysCreateThreads@)? By default yes i.e.\ sparks are only + turned into threads when work is needed. Also note, that sparks + can be discarded by the RTS (this is done in the case of an overflow + of the spark pool). Setting @RTSflags.GranFlags.DoAlwaysCreateThreads@ to @True@ forces + the creation of threads at the next possibility (i.e.\ when new work + is demanded the next time). +\item Should data be fetched closure-by-closure or in packets + (@RTSflags.GranFlags.DoGUMMFetching@)? The default strategy is a GRIP-like incremental + (i.e.\ closure-by-closure) strategy. This makes sense in a + low-latency setting but is bad in a high-latency system. Setting + @RTSflags.GranFlags.DoGUMMFetching@ to @True@ enables bulk (packet) fetching. Other + parameters determine the size of the packets (@pack_buffer_size@) and the number of + thunks that should be put into one packet (@RTSflags.GranFlags.ThunksToPack@). +\item If there is no other possibility to find work, should runnable threads + be moved to an idle processor (@RTSflags.GranFlags.DoThreadMigration@)? In any case, the + RTS tried to get sparks (either local or remote ones) first. Thread + migration is very expensive, since a whole TSO has to be transferred + and probably data locality becomes worse in the process. Note, that + the closure, which will be evaluated next by that TSO is not + transferred together with the TSO (that might block another thread). +\item Should the RTS distinguish between sparks created by local nodes and + stolen sparks (@RTSflags.GranFlags.PreferSparksOfLocalNodes@)? The idea is to improve + data locality by preferring sparks of local nodes (it is more likely + that the data for those sparks is already on the local processor). + However, such a distinction also imposes an overhead on the spark + queue management, and typically a large number of sparks are + generated during execution. By default this variable is set to @False@. +\item Should the RTS use granularity control mechanisms? The idea of a + granularity control mechanism is to make use of granularity + information provided via annotation of the @par@ construct in order + to prefer bigger threads when either turning a spark into a thread or + when choosing the next thread to schedule. Currently, three such + mechanisms are implemented: + \begin{itemize} + \item Cut-off: The granularity information is interpreted as a + priority. If a threshold priority is given to the RTS, then + only those sparks with a higher priority than the threshold + are actually created. Other sparks are immediately discarded. + This is similar to a usual cut-off mechanism often used in + parallel programs, where parallelism is only created if the + input data is lage enough. With this option, the choice is + hidden in the RTS and only the threshold value has to be + provided as a parameter to the runtime system. + \item Priority Sparking: This mechanism keeps priorities for sparks + and chooses the spark with the highest priority when turning + a spark into a thread. After that the priority information is + discarded. The overhead of this mechanism comes from + maintaining a sorted spark queue. + \item Priority Scheduling: This mechanism keeps the granularity + information for threads, to. Thus, on each reschedule the + largest thread is chosen. This mechanism has a higher + overhead, as the thread queue is sorted, too. + \end{itemize} +\end{itemize} -void grputw PROTO((TIME v)); +\begin{code} +#if defined(GRAN) + +/* Do we need to reschedule following a fetch? */ +rtsBool NeedToReSchedule = rtsFalse; +TIME TimeOfNextEvent, EndOfTimeSlice; /* checked from the threaded world! */ +/* I_ avoidedCS=0; */ /* Unused!! ToDo: Remake libraries and nuke this var */ + +/* For internal use (event statistics) only */ +char *event_names[] = + { "STARTTHREAD", "CONTINUETHREAD", "RESUMETHREAD", + "MOVESPARK", "MOVETHREAD", "FINDWORK", + "FETCHNODE", "FETCHREPLY", + "GLOBALBLOCK", "UNBLOCKTHREAD" + }; + +# if defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ +I_ noOfEvents = 0; +I_ event_counts[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + +I_ fetch_misses = 0; +I_ tot_low_pri_sparks = 0; + +I_ rs_sp_count=0, rs_t_count=0, ntimes_total=0, fl_total=0, no_of_steals=0; + +/* Variables for gathering packet and queue statistics */ +I_ tot_packets = 0, tot_packet_size = 0, tot_cuts = 0, tot_thunks = 0; +I_ tot_sq_len = 0, tot_sq_probes = 0, tot_sparks = 0, withered_sparks = 0; +I_ tot_add_threads = 0, tot_tq_len = 0, non_end_add_threads = 0; +# endif + +# if defined(GRAN_COUNT) +/* Count the number of updates that are done. Mainly for testing, but + could be useful for other purposes, too. */ +I_ nUPDs = 0, nUPDs_old = 0, nUPDs_new = 0, nUPDs_BQ = 0, nPAPs = 0, + BQ_lens = 0; +# endif + +/* Prototypes */ +I_ HandleFetchRequest(P_, PROC, P_); +/* void HandleFetchRequest(P_, PROC, P_); changed for GUMMFeching */ +static I_ blockFetch(P_ tso, PROC proc, P_ bh); + +#endif /* GRAN */ +\end{code} +%**************************************************************** +%* * +\subsection[global-address-op]{Global Address Operations} +%* * +%**************************************************************** + +These functions perform operations on the global-address (ga) part +of a closure. The ga is the only new field (1 word) in a closure introduced +by GrAnSim. It serves as a bitmask, indicating on which processor +the closure is residing. Since threads are described by Thread State +Object (TSO), which is nothing but another kind of closure, this +scheme allows gives placement information about threads. + +A ga is just a bitmask, so the operations on them are mainly bitmask +manipulating functions. Note, that there are important macros like PROCS, +IS_LOCAL_TO etc. They are defined in @GrAnSim.lh@. + +NOTE: In GrAnSim-light we don't maintain placement information. This +allows to simulate an arbitrary number of processors. The price we have +to be is the lack of costing any communication properly. In short, +GrAnSim-light is meant to reveal the maximal parallelism in a program. +From an implementation point of view the important thing is: +{\em GrAnSim-light does not maintain global-addresses}. + +\begin{code} #if defined(GRAN) -/* Pointer to the event queue; events are currently malloc'ed */ -static eventq EventHd = NULL; +/* ga_to_proc returns the first processor marked in the bitmask ga. + Normally only one bit in ga should be set. But for PLCs all bits + are set. That shouldn't hurt since we only need IS_LOCAL_TO for PLCs */ + PROC ga_to_proc(W_ ga) { PROC i; - for (i = 0; i < MAX_PROC && !IS_LOCAL_TO(ga, i); i++); - return (i); } @@ -66,61 +261,155 @@ PROC where_is(P_ node) { return (ga_to_proc(PROCS(node))); } /* Access the GA field of the node */ -#if 0 -PROC -no_of_copies(W_ ga) /* DaH lo'lu'Qo'; currently unused */ -{ - PROC i, n; - - for (i = 0, n = 0; i < MAX_PROC; i++) - if (IS_LOCAL_TO(ga, i)) - n++;; +rtsBool +any_idle() { + I_ i; + rtsBool any_idle; + for(i=0, any_idle=rtsFalse; + !any_idle && i=CONTINUETHREAD1) ? - CONTINUETHREAD : - EVENT_TYPE(EventHd)]++; + event_counts[EVENT_TYPE(EventHd)]++; } -#endif +# endif entry = EventHd; EventHd = EVENT_NEXT(EventHd); return(entry); } -/* ToDo: replace malloc/free with a free list */ +/* When getting the time of the next event we ignore CONTINUETHREAD events: + we don't want to be interrupted before the end of the current time slice + unless there is something important to handle. +*/ +TIME +get_time_of_next_event() +{ + eventq event = EventHd; + + while (event != NULL && EVENT_TYPE(event)==CONTINUETHREAD) { + event = EVENT_NEXT(event); + } + if(event == NULL) + return ((TIME) 0); + else + return (EVENT_TIME(event)); +} -/* NB: newevent unused (WDP 95/07) */ +/* ToDo: replace malloc/free with a free list */ static -newevent(proc,creator,time,evttype,tso,node,spark) - PROC proc, creator; - TIME time; - EVTTYPE evttype; - P_ tso, node; - sparkq spark; +insert_event(newentry) +eventq newentry; +{ + EVTTYPE evttype = EVENT_TYPE(newentry); + eventq event, *prev; + + /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */ + + /* Search the queue and insert at the right point: + FINDWORK before everything, CONTINUETHREAD after everything. + + This ensures that we find any available work after all threads have + executed the current cycle. This level of detail would normally be + irrelevant, but matters for ridiculously low latencies... + */ + + /* Changed the ordering: Now FINDWORK comes after everything but + CONTINUETHREAD. This makes sure that a MOVESPARK comes before a + FINDWORK. This is important when a GranSimSparkAt happens and + DoAlwaysCreateThreads is turned on. Also important if a GC occurs + when trying to build a new thread (see much_spark) -- HWL 02/96 */ + + if(EventHd == NULL) + EventHd = newentry; + else { + for (event = EventHd, prev=&EventHd; + event != NULL; + prev = &(EVENT_NEXT(event)), event = EVENT_NEXT(event)) { + switch (evttype) { + case FINDWORK: if ( EVENT_TIME(event) < EVENT_TIME(newentry) || + ( (EVENT_TIME(event) == EVENT_TIME(newentry)) && + (EVENT_TYPE(event) != CONTINUETHREAD) ) ) + continue; + else + break; + case CONTINUETHREAD: if ( EVENT_TIME(event) <= EVENT_TIME(newentry) ) + continue; + else + break; + default: if ( EVENT_TIME(event) < EVENT_TIME(newentry) || + ((EVENT_TIME(event) == EVENT_TIME(newentry)) && + (EVENT_TYPE(event) == EVENT_TYPE(newentry))) ) + continue; + else + break; + } + /* Insert newentry here (i.e. before event) */ + *prev = newentry; + EVENT_NEXT(newentry) = event; + break; + } + if (event == NULL) + *prev = newentry; + } +} + +void +new_event(proc,creator,time,evttype,tso,node,spark) +PROC proc, creator; +TIME time; +EVTTYPE evttype; +P_ tso, node; +sparkq spark; { - eventq newentry = (eventq) xmalloc(sizeof(struct event)); + eventq newentry = (eventq) stgMallocBytes(sizeof(struct event), "new_event"); EVENT_PROC(newentry) = proc; EVENT_CREATOR(newentry) = creator; @@ -129,36 +418,501 @@ newevent(proc,creator,time,evttype,tso,node,spark) EVENT_TSO(newentry) = tso; EVENT_NODE(newentry) = node; EVENT_SPARK(newentry) = spark; + EVENT_GC_INFO(newentry) = 0; EVENT_NEXT(newentry) = NULL; insert_event(newentry); } -#endif /* GRAN ; HWL */ +void +prepend_event(eventq event) /* put event at beginning of EventQueue */ +{ /* only used for GC! */ + EVENT_NEXT(event) = EventHd; + EventHd = event; +} + +eventq +grab_event() /* undo prepend_event i.e. get the event */ +{ /* at the head of EventQ but don't free anything */ + eventq event = EventHd; + + if(EventHd == NULL) { + fprintf(stderr,"No next event (in grab_event). This may be caused by a circular data dependency in the program.\n"); + EXIT(EXIT_FAILURE); + } + + EventHd = EVENT_NEXT(EventHd); + return (event); +} + +void +traverse_eventq_for_gc() +{ + eventq event = EventHd; + W_ bufsize; + P_ closure, tso, buffer, bufptr; + PROC proc, creator; + + /* Traverse eventq and replace every FETCHREPLY by a FETCHNODE for the + orig closure (root of packed graph). This means that a graph, which is + between processors at the time of GC is fetched again at the time when + it would have arrived, had there been no GC. Slightly inaccurate but + safe for GC. + This is only needed for GUM style fetchng. */ + if (!RTSflags.GranFlags.DoGUMMFetching) + return; + + for(event = EventHd; event!=NULL; event=EVENT_NEXT(event)) { + if (EVENT_TYPE(event)==FETCHREPLY) { + buffer = EVENT_NODE(event); + ASSERT(buffer[PACK_FLAG_LOCN]==MAGIC_PACK_FLAG); /* It's a pack buffer */ + bufsize = buffer[PACK_SIZE_LOCN]; + closure= (P_)buffer[PACK_HDR_SIZE]; + tso = (P_)buffer[PACK_TSO_LOCN]; + proc = EVENT_PROC(event); + creator = EVENT_CREATOR(event); /* similar to unpacking */ + for (bufptr=buffer+PACK_HDR_SIZE; bufptr<(buffer+bufsize); + bufptr++) { + if ( (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_SPEC_RBH_TYPE) || + (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_GEN_RBH_TYPE) ) { + convertFromRBH((P_)*bufptr); + } + } + free(buffer); + EVENT_TYPE(event) = FETCHNODE; + EVENT_PROC(event) = creator; + EVENT_CREATOR(event) = proc; + EVENT_NODE(event) = closure; + EVENT_TSO(event) = tso; + EVENT_GC_INFO(event) = 0; + } + } +} + +void +print_event(event) +eventq event; +{ + + char str_tso[16], str_node[16]; + + sprintf(str_tso,((EVENT_TSO(event)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"), + EVENT_TSO(event)); + sprintf(str_node,((EVENT_NODE(event)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"), + EVENT_NODE(event)); + + if (event==NULL) + fprintf(stderr,"Evt: NIL\n"); + else + fprintf(stderr,"Evt: %s (%u), PE %u [%u], Time %lu, TSO %s (%x), node %s\n", + event_names[EVENT_TYPE(event)],EVENT_TYPE(event), + EVENT_PROC(event), EVENT_CREATOR(event), EVENT_TIME(event), + str_tso, TSO_ID(EVENT_TSO(event)), str_node + /*, EVENT_SPARK(event), EVENT_NEXT(event)*/ ); + +} + +void +print_eventq(hd) +eventq hd; +{ + eventq x; + + fprintf(stderr,"Event Queue with root at %x:\n",hd); + for (x=hd; x!=NULL; x=EVENT_NEXT(x)) { + print_event(x); + } +} + +void +print_spark(spark) + sparkq spark; +{ + char str[16]; + + sprintf(str,((SPARK_NODE(spark)==PrelBase_Z91Z93_closure) ? "______" : "%#6lx"), + (W_) SPARK_NODE(spark)); + + if (spark==NULL) + fprintf(stderr,"Spark: NIL\n"); + else + fprintf(stderr,"Spark: Node %8s, Name %#6lx, Exported %5s, Prev %#6x, Next %#6x\n", + str, SPARK_NAME(spark), + ((SPARK_EXPORTED(spark))?"True":"False"), + SPARK_PREV(spark), SPARK_NEXT(spark) ); +} + +void +print_sparkq(hd) +sparkq hd; +{ + sparkq x; + + fprintf(stderr,"Spark Queue with root at %x:\n",hd); + for (x=hd; x!=NULL; x=SPARK_NEXT(x)) { + print_spark(x); + } +} + + +#endif /* GRAN */ \end{code} %**************************************************************************** % -\subsection[GrAnSim-profile]{Writing profiling info for GrAnSim} +\subsection[entry-points]{Routines directly called from Haskell world} % %**************************************************************************** -Event dumping routines. +The @GranSim...@ routines in here are directly called via macros from the +threaded world. + +First some auxiliary routines. \begin{code} +#if defined(GRAN) +/* Take the current thread off the thread queue and thereby activate the */ +/* next thread. It's assumed that the next ReSchedule after this uses */ +/* NEW_THREAD as param. */ +/* This fct is called from GranSimBlock and GranSimFetch */ -FILE *gr_file = NULL; +void +ActivateNextThread (PROC proc) +{ + ASSERT(RunnableThreadsHd[proc]!=PrelBase_Z91Z93_closure); + + RunnableThreadsHd[proc] = TSO_LINK(RunnableThreadsHd[proc]); + if(RunnableThreadsHd[proc]==PrelBase_Z91Z93_closure) { + MAKE_IDLE(proc); + RunnableThreadsTl[proc] = PrelBase_Z91Z93_closure; + } else { + CurrentTime[proc] += RTSflags.GranFlags.gran_threadcontextswitchtime; + if (RTSflags.GranFlags.granSimStats && + (!RTSflags.GranFlags.Light || (RTSflags.GranFlags.debug & 0x20000))) + DumpRawGranEvent(proc,0,GR_SCHEDULE,RunnableThreadsHd[proc], + PrelBase_Z91Z93_closure,0); + } +} +\end{code} -char *gran_event_names[] = { - "START", "START(Q)", - "STEALING", "STOLEN", "STOLEN(Q)", - "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)", - "SCHEDULE", "DESCHEDULE", - "END", - "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED", - "TERMINATE", - "??" -}; +Now the main stg-called routines: + +\begin{code} +/* ------------------------------------------------------------------------ */ +/* The following GranSim... fcts are stg-called from the threaded world. */ +/* ------------------------------------------------------------------------ */ + +/* Called from HEAP_CHK -- NB: node and liveness are junk here now. + They are left temporarily to avoid complete recompilation. + KH +*/ +void +GranSimAllocate(n,node,liveness) +I_ n; +P_ node; +W_ liveness; +{ + TSO_ALLOCS(CurrentTSO) += n; + ++TSO_BASICBLOCKS(CurrentTSO); + + if (RTSflags.GranFlags.granSimStats_Heap) { + DumpRawGranEvent(CurrentProc,0,GR_ALLOC,CurrentTSO, + PrelBase_Z91Z93_closure,n); + } + + TSO_EXECTIME(CurrentTSO) += RTSflags.GranFlags.gran_heapalloc_cost; + CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_heapalloc_cost; +} + +/* + Subtract the values added above, if a heap check fails and + so has to be redone. +*/ +void +GranSimUnallocate(n,node,liveness) +W_ n; +P_ node; +W_ liveness; +{ + TSO_ALLOCS(CurrentTSO) -= n; + --TSO_BASICBLOCKS(CurrentTSO); + + TSO_EXECTIME(CurrentTSO) -= RTSflags.GranFlags.gran_heapalloc_cost; + CurrentTime[CurrentProc] -= RTSflags.GranFlags.gran_heapalloc_cost; +} + +/* NB: We now inline this code via GRAN_EXEC rather than calling this fct */ +void +GranSimExec(ariths,branches,loads,stores,floats) +W_ ariths,branches,loads,stores,floats; +{ + W_ cost = RTSflags.GranFlags.gran_arith_cost*ariths + + RTSflags.GranFlags.gran_branch_cost*branches + + RTSflags.GranFlags.gran_load_cost * loads + + RTSflags.GranFlags.gran_store_cost*stores + + RTSflags.GranFlags.gran_float_cost*floats; + + TSO_EXECTIME(CurrentTSO) += cost; + CurrentTime[CurrentProc] += cost; +} + + +/* + Fetch the node if it isn't local + -- result indicates whether fetch has been done. + + This is GRIP-style single item fetching. +*/ + +/* This function in Threads.lc is only needed for SimplifiedFetch */ +FetchNode PROTO((P_ node,PROC CurrentProc)); + +I_ +GranSimFetch(node /* , liveness_mask */ ) +P_ node; +/* I_ liveness_mask; */ +{ + if (RTSflags.GranFlags.Light) { + /* Always reschedule in GrAnSim-Light to prevent one TSO from + running off too far + new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + CONTINUETHREAD,CurrentTSO,node,NULL); + */ + NeedToReSchedule = rtsFalse; + return(0); + } + + /* Note: once a node has been fetched, this test will be passed */ + if(!IS_LOCAL_TO(PROCS(node),CurrentProc)) + { + /* Add mpacktime to the remote PE for the reply */ + { + PROC p = where_is(node); + TIME fetchtime; + +# ifdef GRAN_CHECK + if ( ( RTSflags.GranFlags.debug & 0x40 ) && + p == CurrentProc ) + fprintf(stderr,"GranSimFetch: Trying to fetch from own processor%u\n", p); +# endif /* GRAN_CHECK */ + + CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mpacktime; + /* NB: Fetch is counted on arrival (FETCHREPLY) */ + + if (RTSflags.GranFlags.SimplifiedFetch) + { + FetchNode(node,CurrentProc); + CurrentTime[CurrentProc] += RTSflags.GranFlags.gran_mtidytime+ + RTSflags.GranFlags.gran_fetchtime+ + RTSflags.GranFlags.gran_munpacktime; + return(1); + } + + fetchtime = STG_MAX(CurrentTime[CurrentProc],CurrentTime[p]) + + RTSflags.GranFlags.gran_latency; + + new_event(p,CurrentProc,fetchtime,FETCHNODE,CurrentTSO,node,NULL); + if (!RTSflags.GranFlags.DoReScheduleOnFetch) + MAKE_FETCHING(CurrentProc); + ++OutstandingFetches[CurrentProc]; + + if (fetchtime CurrentTime[CurrentProc]? + CurrentTime[proc]: CurrentTime[CurrentProc]) + + RTSflags.GranFlags.gran_latency; + } else { + exporttime = CurrentTime[CurrentProc]; + } + + if ( RTSflags.GranFlags.Light ) + /* Need CurrentTSO in event field to associate costs with creating + spark even in a GrAnSim Light setup */ + new_event(proc,CurrentProc,exporttime, + MOVESPARK,CurrentTSO,PrelBase_Z91Z93_closure,spark); + else + new_event(proc,CurrentProc,exporttime, + MOVESPARK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,spark); + /* Bit of a hack to treat placed sparks the same as stolen sparks */ + ++OutstandingFishes[proc]; + + /* Force the PE to take notice of the spark (FINDWORK is put after a + MOVESPARK into the sparkq!) */ + if(RTSflags.GranFlags.DoAlwaysCreateThreads) { + new_event(CurrentProc,CurrentProc,exporttime+1, + FINDWORK,PrelBase_Z91Z93_closure,PrelBase_Z91Z93_closure,NULL); + } + + if (exporttime GR_EVENT_MAX) + if (name > GR_EVENT_MAX) name = GR_EVENT_MAX; - if (do_gr_binary) { - grputw(name); - grputw(pe); - abort(); /* die please: a single word doesn't represent long long times */ - grputw(CURRENT_TIME); /* this line is bound to do the wrong thing */ - grputw(id); - } else - fprintf(gr_file, "PE %2u [%s]: %s %lx \t0x%lx\t(from %2u)\n", - pe, time_string, gran_event_names[name], id, (W_) node, proc); + if(GRANSIMSTATS_BINARY) + /* ToDo: fix code for writing binary GrAnSim statistics */ + switch (name) { + case GR_START: + case GR_STARTQ: + grputw(name); + grputw(proc); + abort(); /* die please: a single word */ + /* doesn't represent long long times */ + grputw(TIME_ON_PROC(proc)); + grputw((W_)node); + break; + case GR_FETCH: + case GR_REPLY: + case GR_BLOCK: + grputw(name); + grputw(proc); + abort(); /* die please: a single word */ + /* doesn't represent long long times */ + grputw(TIME_ON_PROC(proc)); /* this line is bound to */ + grputw(id); /* do the wrong thing */ + break; + default: + grputw(name); + grputw(proc); + abort(); /* die please: a single word */ + /* doesn't represent long long times */ + grputw(TIME_ON_PROC(proc)); + grputw((W_)node); + } + else + switch (name) { + case GR_START: + case GR_STARTQ: + /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */ + /* using spark queue length as optional argument ^^^^^^^^^ */ + fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n", + /* using spark name as optional argument ^^^^^^ */ + proc,time_string,gran_event_names[name], + id,node_str,len); + break; + case GR_FETCH: + case GR_REPLY: + case GR_BLOCK: + case GR_STOLEN: + case GR_STOLENQ: + fprintf(gr_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n", + proc, time_string, gran_event_names[name], + id,node_str,p); + break; + case GR_RESUME: + case GR_RESUMEQ: + case GR_SCHEDULE: + case GR_DESCHEDULE: + fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx \n", + proc,time_string,gran_event_names[name],id); + break; + case GR_STEALING: + fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \t(by %2u)\n", + proc,time_string,gran_event_names[name],id,p); + break; + case GR_ALLOC: + fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t \tallocating %u words\n", + proc,time_string,gran_event_names[name],id,len); + break; + default: + fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", + proc,time_string,gran_event_names[name],id,node_str,len); + } } + +#if defined(GRAN) +/* Only needed for special dynamic spark labelling support */ void -DumpRawGranEvent(pe, name, id) -PROC pe; +DumpStartEventAt(time, proc, p, name, tso, node, len) +TIME time; +PROC proc, p; /* proc ... where it happens; p ... where node lives */ enum gran_event_types name; -W_ id; +P_ tso, node; +I_ len; { - char time_string[500]; /* ToDo: kill magic constant */ - - if (name > GR_EVENT_MAX) + W_ id; + char time_string[500], node_str[16]; /*ToDo: kill magic constants */ + ullong_format_string(time, time_string, rtsFalse/*no commas!*/); + /* ^^^^ only important change to DumpRawGranEvent */ + if (RTSflags.GranFlags.granSimStats_suppressed) + return; + + id = tso == NULL ? -1 : TSO_ID(tso); + if (node==PrelBase_Z91Z93_closure) + strcpy(node_str,"________"); /* "Z91Z93_closure"); */ + else + sprintf(node_str,"0x%-6lx",node); + + if (name > GR_EVENT_MAX) name = GR_EVENT_MAX; - ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); - - if (do_gr_binary) { - grputw(name); - grputw(pe); - abort(); /* die please: a single word doesn't represent long long times */ - grputw(CURRENT_TIME); /* this line is bound to fail */ - grputw(id); - } else - fprintf(gr_file, "PE %2u [%s]: %s %lx\n", - pe, time_string, gran_event_names[name], id); + if(GRANSIMSTATS_BINARY) + /* ToDo: fix code for writing binary GrAnSim statistics */ + switch (name) { + case GR_START: + case GR_STARTQ: + grputw(name); + grputw(proc); + abort(); /* die please: a single word */ + /* doesn't represent long long times */ + grputw(TIME_ON_PROC(proc)); + grputw((W_)node); + break; + default: + fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n", + gran_event_names[name]); + } + else + switch (name) { + case GR_START: + case GR_STARTQ: + /* fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", */ + /* using spark queue length as optional argument ^^^^^^^^^ */ + fprintf(gr_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\n", + /* using spark name as optional argument ^^^^^^ */ + proc,time_string,gran_event_names[name], + id,node_str,len); + break; + default: + fprintf(stderr,"Error in DumpStartEventAt: event %s is not a START event\n", + gran_event_names[name]); + } } +#endif /* GRAN */ void -DumpGranInfo(pe, tso, mandatory_thread) -PROC pe; +DumpGranInfo(proc, tso, mandatory_thread) +PROC proc; P_ tso; rtsBool mandatory_thread; { char time_string[500]; /* ToDo: kill magic constant */ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); - if (do_gr_binary) { +#if defined(GRAN) + if (RTSflags.GranFlags.granSimStats_suppressed) + return; +#endif + + if (GRANSIMSTATS_BINARY) { grputw(GR_END); - grputw(pe); + grputw(proc); abort(); /* die please: a single word doesn't represent long long times */ grputw(CURRENT_TIME); /* this line is bound to fail */ grputw(TSO_ID(tso)); @@ -324,11 +1177,11 @@ rtsBool mandatory_thread; } else { /* - * NB: DumpGranEvent cannot be used because PE may be wrong (as well as the - * extra info) + * NB: DumpGranEvent cannot be used because PE may be wrong + * (as well as the extra info) */ fprintf(gr_file, "PE %2u [%s]: END %lx, SN %lu, ST %lu, EXP %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu), LS %lu, GS %lu, MY %c\n" - ,pe + ,proc ,time_string ,TSO_ID(tso) ,TSO_SPARKNAME(tso) @@ -348,6 +1201,47 @@ rtsBool mandatory_thread; } } +void +DumpTSO(tso) +P_ tso; +{ + fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %lu, LINK 0x%lx, TYPE %s\n" + ,tso + ,TSO_NAME(tso) + ,TSO_ID(tso) + ,TSO_LINK(tso) + ,TSO_TYPE(tso)==T_MAIN?"MAIN": + TSO_TYPE(tso)==T_FAIL?"FAIL": + TSO_TYPE(tso)==T_REQUIRED?"REQUIRED": + TSO_TYPE(tso)==T_ADVISORY?"ADVISORY": + "???" + ); + + fprintf(stderr,"PC (0x%lx,0x%lx), ARG (0x%lx), SWITCH %lx0x\n" + ,TSO_PC1(tso) + ,TSO_PC2(tso) + ,TSO_ARG1(tso) + /* ,TSO_ARG2(tso) */ + ,TSO_SWITCH(tso) + ); + + fprintf(gr_file,"TSO %lx: SN %lu, ST %lu, GBL %c, BB %lu, HA %lu, RT %lu, BT %lu (%lu), FT %lu (%lu) LS %lu, GS %lu\n" + ,TSO_ID(tso) + ,TSO_SPARKNAME(tso) + ,TSO_STARTEDAT(tso) + ,TSO_EXPORTED(tso)?'T':'F' + ,TSO_BASICBLOCKS(tso) + ,TSO_ALLOCS(tso) + ,TSO_EXECTIME(tso) + ,TSO_BLOCKTIME(tso) + ,TSO_BLOCKCOUNT(tso) + ,TSO_FETCHTIME(tso) + ,TSO_FETCHCOUNT(tso) + ,TSO_LOCALSPARKS(tso) + ,TSO_GLOBALSPARKS(tso) + ); +} + /* Output a terminate event and an 8-byte time. */ @@ -356,7 +1250,12 @@ void grterminate(v) TIME v; { - DumpGranEvent(GR_TERMINATE, 0); +#if defined(GRAN) + if (RTSflags.GranFlags.granSimStats_suppressed) + return; +#endif + + DumpGranEvent(GR_TERMINATE, PrelBase_Z91Z93_closure); if (sizeof(TIME) == 4) { putc('\0', gr_file); @@ -389,12 +1288,17 @@ void grputw(v) TIME v; { - if (v <= 0x3fl) { +#if defined(GRAN) + if (RTSflags.GranFlags.granSimStats_suppressed) + return; +#endif + + if (v <= 0x3fl) { /* length v = 1 byte */ fputc(v & 0x3f, gr_file); - } else if (v <= 0x3fffl) { + } else if (v <= 0x3fffl) { /* length v = 2 byte */ fputc((v >> 8l) | 0x40l, gr_file); fputc(v & 0xffl, gr_file); - } else if (v <= 0x3fffffffl) { + } else if (v <= 0x3fffffffl) { /* length v = 4 byte */ fputc((v >> 24l) | 0x80l, gr_file); fputc((v >> 16l) & 0xffl, gr_file); fputc((v >> 8l) & 0xffl, gr_file); @@ -423,6 +1327,7 @@ TIME v; } } +#endif /* GRAN || PAR */ \end{code} %**************************************************************************** @@ -431,10 +1336,14 @@ TIME v; % %**************************************************************************** +General routines for GranSim. Mainly, startup and shutdown routines, called +from @main.lc@. + \begin{code} -#ifdef GRAN -char gr_filename[32]; /*ToDo: magic short filename constant????? WDP 95/07 */ -I_ do_gr_sim = 0; +#if defined(GRAN) +FILE *gr_file = NULL; +char gr_filename[STATS_FILENAME_MAXLEN]; +/* I_ do_gr_sim = 0; */ /* In GrAnSim setup always do simulation */ int init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv) @@ -443,19 +1352,21 @@ int prog_argc, rts_argc; { I_ i; - if (do_gr_sim) { - char *extension = do_gr_binary ? "gb" : "gr"; + char *extension = RTSflags.GranFlags.granSimStats_Binary ? "gb" : "gr"; - sprintf(gr_filename, "%0.28s.%0.2s", prog_argv[0], extension); + if (RTSflags.GranFlags.granSimStats_suppressed) + return; - if ((gr_file = fopen(gr_filename, "w")) == NULL) { - fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename); - exit(EXIT_FAILURE); /* why not EXIT??? WDP 95/07 */ - } -#if defined(GRAN_CHECK) && defined(GRAN) - if (DoReScheduleOnFetch) + sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension); + + if ((gr_file = fopen(gr_filename, "w")) == NULL) { + fprintf(stderr, "Can't open granularity simulation report file %s\n", gr_filename); + EXIT(EXIT_FAILURE); + } +# if 0 /* that's obsolete now, I think -- HWL */ + if (RTSflags.GranFlags.DoReScheduleOnFetch) setbuf(gr_file, NULL); -#endif +# endif fputs("Granularity Simulation for ", gr_file); for (i = 0; i < prog_argc; ++i) { @@ -471,87 +1382,195 @@ int prog_argc, rts_argc; fputc(' ', gr_file); } } + + fputs("\nStart time: ", gr_file); + fputs(time_str(), gr_file); /* defined in main.lc */ + fputc('\n', gr_file); + fputs("\n\n--------------------\n\n", gr_file); fputs("General Parameters:\n\n", gr_file); - fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads%s ????? %s\n", - max_proc, DoFairSchedule ? "Fair" : "Unfair", - DoThreadMigration ? "" : "Don't ", - DoThreadMigration && DoStealThreadsFirst ? " Before Sparks" : "", - DoReScheduleOnFetch ? "" : "Don't "); - - fprintf(gr_file, "%s, Fetch %s in Each Packet\n", - SimplifiedFetch ? "Simplified Fetch" : (DoReScheduleOnFetch ? "Reschedule on Fetch" : "Block on Fetch"), - DoGUMMFetching ? "Many Closures" : "Exactly One Closure"); - fprintf(gr_file, "Fetch Strategy(%lu): If outstanding fetches %s\n", - FetchStrategy, - FetchStrategy == 1 ? "only run runnable threads (don't create new ones" : - FetchStrategy == 2 ? "create threads only from local sparks" : - FetchStrategy == 3 ? "create threads from local or global sparks" : - FetchStrategy == 4 ? "create sparks and steal threads if necessary" : - "unknown"); + if (RTSflags.GranFlags.Light) + fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n", + RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair", + RTSflags.GranFlags.DoThreadMigration?"":"Don't ", + RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"", + RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" : + RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" : + "Block on Fetch"); + else + fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n", + RTSflags.GranFlags.proc,RTSflags.GranFlags.DoFairSchedule?"Fair":"Unfair", + RTSflags.GranFlags.DoThreadMigration?"":"Don't ", + RTSflags.GranFlags.DoThreadMigration && RTSflags.GranFlags.DoStealThreadsFirst?" Before Sparks":"", + RTSflags.GranFlags.SimplifiedFetch ? "Simplified Fetch" : + RTSflags.GranFlags.DoReScheduleOnFetch ? "Reschedule on Fetch" : + "Block on Fetch"); + + if (RTSflags.GranFlags.DoGUMMFetching) + if (RTSflags.GranFlags.ThunksToPack) + fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n", + RTSflags.GranFlags.ThunksToPack, + RTSflags.GranFlags.packBufferSize); + else + fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n", + RTSflags.GranFlags.packBufferSize); + else + fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n"); + + fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n", + RTSflags.GranFlags.FetchStrategy, + RTSflags.GranFlags.FetchStrategy==0 ? + " block (block-on-fetch)": + RTSflags.GranFlags.FetchStrategy==1 ? + "only run runnable threads": + RTSflags.GranFlags.FetchStrategy==2 ? + "create threads only from local sparks": + RTSflags.GranFlags.FetchStrategy==3 ? + "create threads from local or global sparks": + RTSflags.GranFlags.FetchStrategy==4 ? + "create sparks and steal threads if necessary": + "unknown"); + + if (RTSflags.GranFlags.DoPrioritySparking) + fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n"); + + if (RTSflags.GranFlags.DoPriorityScheduling) + fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n"); fprintf(gr_file, "Thread Creation Time %lu, Thread Queue Time %lu\n", - gran_threadcreatetime, gran_threadqueuetime); + RTSflags.GranFlags.gran_threadcreatetime, + RTSflags.GranFlags.gran_threadqueuetime); fprintf(gr_file, "Thread DeSchedule Time %lu, Thread Schedule Time %lu\n", - gran_threaddescheduletime, gran_threadscheduletime); + RTSflags.GranFlags.gran_threaddescheduletime, + RTSflags.GranFlags.gran_threadscheduletime); fprintf(gr_file, "Thread Context-Switch Time %lu\n", - gran_threadcontextswitchtime); + RTSflags.GranFlags.gran_threadcontextswitchtime); fputs("\n\n--------------------\n\n", gr_file); fputs("Communication Metrics:\n\n", gr_file); fprintf(gr_file, "Latency %lu (1st) %lu (rest), Fetch %lu, Notify %lu (Global) %lu (Local)\n", - gran_latency, gran_additional_latency, gran_fetchtime, - gran_gunblocktime, gran_lunblocktime); + RTSflags.GranFlags.gran_latency, + RTSflags.GranFlags.gran_additional_latency, + RTSflags.GranFlags.gran_fetchtime, + RTSflags.GranFlags.gran_gunblocktime, + RTSflags.GranFlags.gran_lunblocktime); fprintf(gr_file, "Message Creation %lu (+ %lu after send), Message Read %lu\n", - gran_mpacktime, gran_mtidytime, gran_munpacktime); + RTSflags.GranFlags.gran_mpacktime, + RTSflags.GranFlags.gran_mtidytime, + RTSflags.GranFlags.gran_munpacktime); fputs("\n\n--------------------\n\n", gr_file); fputs("Instruction Metrics:\n\n", gr_file); fprintf(gr_file, "Arith %lu, Branch %lu, Load %lu, Store %lu, Float %lu, Alloc %lu\n", - gran_arith_cost, gran_branch_cost, - gran_load_cost, gran_store_cost, gran_float_cost, gran_heapalloc_cost); + RTSflags.GranFlags.gran_arith_cost, + RTSflags.GranFlags.gran_branch_cost, + RTSflags.GranFlags.gran_load_cost, + RTSflags.GranFlags.gran_store_cost, + RTSflags.GranFlags.gran_float_cost, + RTSflags.GranFlags.gran_heapalloc_cost); fputs("\n\n++++++++++++++++++++\n\n", gr_file); - } - if (do_gr_binary) + + if (RTSflags.GranFlags.granSimStats_Binary) grputw(sizeof(TIME)); - Idlers = max_proc; return (0); } void end_gr_simulation(STG_NO_ARGS) { - if (do_gr_sim) { - fprintf(stderr, "The simulation is finished. Look at %s for details.\n", - gr_filename); - fclose(gr_file); - } -} + char time_string[500]; /* ToDo: kill magic constant */ + ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); -#endif /* GRAN */ + if (RTSflags.GranFlags.granSimStats_suppressed) + return; -#ifdef PAR -char gr_filename[50]; /*ToDo: (small) magic constant alert!!!! WDP 95/07 */ +#if defined(GRAN_CHECK) && defined(GRAN) + /* Print event stats */ + if (RTSflags.GranFlags.debug & 0x20) { + int i; + + fprintf(stderr,"Event statistics (number of events: %d):\n", + noOfEvents); + for (i=0; i<=MAX_EVENT; i++) { + fprintf(stderr," %s (%d): \t%ld \t%f%%\t%f%%\n", + event_names[i],i,event_counts[i], + (float)(100*event_counts[i])/(float)(noOfEvents), + (i==CONTINUETHREAD ? 0.0 : + (float)(100*(event_counts[i])/(float)(noOfEvents-event_counts[CONTINUETHREAD])) )); + } + fprintf(stderr,"Randomized steals: %u sparks, %u threads \n \t(Sparks: #%u (avg ntimes=%f; avg fl=%f) \n", + rs_sp_count, rs_t_count, no_of_steals, + (float)ntimes_total/(float)STG_MAX(no_of_steals,1), + (float)fl_total/(float)STG_MAX(no_of_steals,1)); + fprintf(stderr,"Moved sparks: %d Withered sparks: %d (%.2f %%)\n", + tot_sparks,withered_sparks, + ( tot_sparks == 0 ? 0 : + (float)(100*withered_sparks)/(float)(tot_sparks)) ); + /* Print statistics about priority sparking */ + if (RTSflags.GranFlags.DoPrioritySparking) { + fprintf(stderr,"About Priority Sparking:\n"); + fprintf(stderr," Total no. NewThreads: %d Avg. spark queue len: %.2f \n", tot_sq_probes, (float)tot_sq_len/(float)tot_sq_probes); + } + /* Print statistics about priority sparking */ + if (RTSflags.GranFlags.DoPriorityScheduling) { + fprintf(stderr,"About Priority Scheduling:\n"); + fprintf(stderr," Total no. of StartThreads: %d (non-end: %d) Avg. thread queue len: %.2f\n", + tot_add_threads, non_end_add_threads, + (float)tot_tq_len/(float)tot_add_threads); + } + /* Print packet statistics if GUMM fetching is turned on */ + if (RTSflags.GranFlags.DoGUMMFetching) { + fprintf(stderr,"Packet statistcs:\n"); + fprintf(stderr," Total no. of packets: %d Avg. packet size: %.2f \n", tot_packets, (float)tot_packet_size/(float)tot_packets); + fprintf(stderr," Total no. of thunks: %d Avg. thunks/packet: %.2f \n", tot_thunks, (float)tot_thunks/(float)tot_packets); + fprintf(stderr," Total no. of cuts: %d Avg. cuts/packet: %.2f\n", tot_cuts, (float)tot_cuts/(float)tot_packets); + /* + if (closure_queue_overflows>0) + fprintf(stderr," Number of closure queue overflows: %u\n", + closure_queue_overflows); + */ + } + } + + if (RTSflags.GranFlags.PrintFetchMisses) + fprintf(stderr,"Number of fetch misses: %d\n",fetch_misses); + +# if defined(GRAN_COUNT) + fprintf(stderr,"Update count statistics:\n"); + fprintf(stderr," Total number of updates: %u\n",nUPDs); + fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n", + nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ); + fprintf(stderr," Number of PAPs: %u\n",nPAPs); +# endif -I_ do_gr_profile = 0; -I_ do_sp_profile = 0; -I_ do_gr_binary = 0; +#endif /* GRAN_CHECK */ + + fprintf(stderr, "Simulation finished after @ %s @ cycles. Look at %s for details.\n", + time_string,gr_filename); + if (RTSflags.GranFlags.granSimStats) + fclose(gr_file); +} +#elif defined(PAR) +FILE *gr_file = NULL; +char gr_filename[STATS_FILENAME_MAXLEN]; + +/* I_ do_sp_profile = 0; */ void init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv) -char *prog_argv[], *rts_argv[]; -int prog_argc, rts_argc; + char *prog_argv[], *rts_argv[]; + int prog_argc, rts_argc; { int i; - char *extension = do_gr_binary ? "gb" : "gr"; + char *extension = RTSflags.ParFlags.granSimStats_Binary ? "gb" : "gr"; - sprintf(gr_filename, "%0.28s.%03d.%0.2s", prog_argv[0], thisPE, extension); + sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension); if ((gr_file = fopen(gr_filename, "w")) == NULL) { fprintf(stderr, "Can't open activity report file %s\n", gr_filename); @@ -573,6 +1592,10 @@ int prog_argc, rts_argc; } fputc('\n', gr_file); + fputs("Start-Time: ", gr_file); + fputs(time_str(), gr_file); /* defined in main.lc */ + fputc('\n', gr_file); + startTime = CURRENT_TIME; if (startTime > LL(1000000000)) { @@ -584,12 +1607,12 @@ int prog_argc, rts_argc; fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime); } - if (do_gr_binary) + if (RTSflags.ParFlags.granSimStats_Binary) grputw(sizeof(TIME)); } #endif /* PAR */ -#endif /* GRAN || PAR */ +#endif /* GRAN || PAR */ \end{code}