%
-% (c) The GRASP/AQUA Project, Glasgow University, 1994-1998
-% Hans Wolfgang Loidl
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%
+% $Id: Costs.lhs,v 1.20 2000/01/13 14:33:57 hwloidl Exp $
+%
+% Only needed in a GranSim setup -- HWL
% ---------------------------------------------------------------------------
\section[Costs]{Evaluating the costs of computing some abstract C code}
instructions.
\end{itemize}
-This function is needed in GrAnSim for parallelism.
+This function is needed in GranSim for costing pieces of abstract C.
-These are first suggestions for scaling the costs. But, this scaling should be done in the RTS rather than the compiler (this really should be tunable!):
+These are first suggestions for scaling the costs. But, this scaling should
+be done in the RTS rather than the compiler (this really should be
+tunable!):
\begin{pseudocode}
negate = mapOp negate
abs = mapOp abs
signum = mapOp signum
+ fromInteger _ = error "fromInteger not defined"
mapOp :: (Int -> Int) -> CostRes -> CostRes
mapOp g ( Cost (i, b, l, s, f) ) = Cost (g i, g b, g l, g s, g f)
CSimultaneous absC -> costs absC
- CCheck _ amodes code -> Cost (2, 1, 0, 0, 0)
+ CCheck _ amodes code -> Cost (2, 1, 0, 0, 0) -- ToDo: refine this by
+ -- looking at the first arg
+
+ CRetDirect _ _ _ _ -> nullCosts
CMacroStmt macro modes -> stmtMacroCosts macro modes
-- *** the next three [or so...] are DATA (those above are CODE) ***
-- as they are data rather than code they all have nullCosts -- HWL
+ CCallTypedef _ _ _ _ -> nullCosts
+
CStaticClosure _ _ _ _ -> nullCosts
- CClosureInfoAndCode _ _ _ _ -> nullCosts
+ CSRT _ _ -> nullCosts
- CRetDirect _ _ _ _ -> nullCosts
+ CBitmap _ _ -> nullCosts
+
+ CClosureInfoAndCode _ _ _ _ -> nullCosts
CRetVector _ _ _ _ -> nullCosts
+ CClosureTbl _ -> nullCosts
+
CCostCentreDecl _ _ -> nullCosts
+
CCostCentreStackDecl _ -> nullCosts
CSplitMarker -> nullCosts
+ _ -> trace ("Costs.costs") nullCosts
+
-- ---------------------------------------------------------------------------
addrModeCosts :: CAddrMode -> Side -> CostRes
CVal _ _ -> if lhs then Cost (0, 0, 0, 1, 0)
else Cost (0, 0, 1, 0, 0)
- CReg _ -> nullCosts {- loading from, storing to reg is free ! -}
+ CAddr (CIndex _ n _ ) -> Cost (1, 0, 1, 0, 0) -- does pointer arithmetic
+
+ CAddr _ -> nullCosts
+
+ CReg _ -> nullCosts {- loading from, storing to reg is free ! -}
{- for costing CReg->Creg ops see special -}
{- case in costs fct -}
CMacroExpr _ macro mode_list -> exprMacroCosts side macro mode_list
+ _ -> trace ("Costs.addrModeCosts") nullCosts
+
-- ---------------------------------------------------------------------------
exprMacroCosts :: Side -> CExprMacro -> [CAddrMode] -> CostRes
in
arg_costs +
case macro of
- ENTRY_CODE -> nullCosts
- ARG_TAG -> nullCosts -- XXX
- GET_TAG -> nullCosts -- XXX
-
+ ENTRY_CODE -> nullCosts -- nothing
+ ARG_TAG -> nullCosts -- nothing
+ GET_TAG -> Cost (0, 0, 1, 0, 0) -- indirect load
+ UPD_FRAME_UPDATEE -> Cost (0, 0, 1, 0, 0) -- indirect load
+ _ -> trace ("Costs.exprMacroCosts") nullCosts
-- ---------------------------------------------------------------------------
UPD_CAF -> Cost (7, 0, 1, 3, 0) {- SMupdate.lh -}
UPD_BH_UPDATABLE -> Cost (3, 0, 0, 1, 0) {- SMupdate.lh -}
UPD_BH_SINGLE_ENTRY -> Cost (3, 0, 0, 1, 0) {- SMupdate.lh -}
- PUSH_UPD_FRAME -> Cost (3, 0, 0, 4, 0) {- SMupdate.lh -}
+ PUSH_UPD_FRAME -> Cost (3, 0, 0, 4, 0) {- Updates.h -}
+ PUSH_SEQ_FRAME -> Cost (2, 0, 0, 3, 0) {- StgMacros.h !-}
+ UPDATE_SU_FROM_UPD_FRAME -> Cost (1, 0, 1, 0, 0) {- StgMacros.h !-}
SET_TAG -> nullCosts {- COptRegs.lh -}
GRAN_FETCH -> nullCosts {- GrAnSim bookkeeping -}
GRAN_RESCHEDULE -> nullCosts {- GrAnSim bookkeeping -}
-> (ModuleHiMap, ModuleHiMap)
-> (FilePath, String)
-> IO (ModuleHiMap, ModuleHiMap)
-getAllFilesMatching dirs hims (dir_path, suffix) = ( do
+getAllFilesMatching dirs hims (dir_path, suffix) =
+ do
-- fpaths entries do not have dir_path prepended
fpaths <- getDirectoryContents dir_path
is_dll <- catch
)
(\ _ {-don't care-} -> return NotDll)
return (foldl (addModules is_dll) hims fpaths)
- ) -- soft failure
+ -- soft failure
`catch`
(\ err -> do
hPutStrLn stderr
%
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%
-% $Id: CgCase.lhs,v 1.36 1999/11/01 17:10:06 simonpj Exp $
+% $Id: CgCase.lhs,v 1.37 2000/01/13 14:33:57 hwloidl Exp $
%
%********************************************************
%* *
= -- We have arranged that Node points to the thing
restoreCurrentCostCentre cc_slot `thenFC` \restore_cc ->
getAbsC (absC restore_cc `thenC`
- (if opt_GranMacros && emit_yield
- then yield [node] False
- else absC AbsCNop) `thenC`
+ -- HWL: maybe need yield here
+ --(if emit_yield
+ -- then yield [node] True
+ -- else absC AbsCNop) `thenC`
possibleHeapCheck gc_flag is_fun [node] [] Nothing (cgExpr rhs)
-- Node is live, but doesn't need to point at the thing itself;
-- it's ok for Node to point to an indirection or FETCH_ME
=
restoreCurrentCostCentre cc_slot `thenFC` \restore_cc ->
getAbsC (absC restore_cc `thenC`
- (if opt_GranMacros && emit_yield
- then yield [node] True -- XXX live regs wrong
- else absC AbsCNop) `thenC`
+ -- HWL: maybe need yield here
+ -- (if emit_yield
+ -- then yield [node] True -- XXX live regs wrong
+ -- else absC AbsCNop) `thenC`
(case gc_flag of
NoGC -> mapFCs bindNewToTemp args `thenFC` \_ -> nopC
GCMayHappen -> bindConArgs con args
restoreCurrentCostCentre cc_slot `thenFC` \restore_cc ->
absC restore_cc `thenC`
- (if opt_GranMacros && emit_yield
- then yield live_regs True -- XXX live regs wrong?
- else absC AbsCNop) `thenC`
+ -- HWL: maybe need yield here
+ -- (if emit_yield
+ -- then yield live_regs True -- XXX live regs wrong?
+ -- else absC AbsCNop) `thenC`
let
-- ToDo: could maybe use Nothing here if stack_res is False
-- since the heap-check can just return to the top of the
%
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%
-% $Id: CgClosure.lhs,v 1.38 1999/11/11 17:50:49 simonpj Exp $
+% $Id: CgClosure.lhs,v 1.39 2000/01/13 14:33:58 hwloidl Exp $
%
\section[CgClosure]{Code generation for closures}
getSpRelOffset, getHpRelOffset
)
import CLabel ( CLabel, mkClosureLabel, mkFastEntryLabel,
- mkRednCountsLabel, mkInfoTableLabel
+ mkRednCountsLabel, mkInfoTableLabel,
+ pprCLabel
)
import ClosureInfo -- lots and lots of stuff
import CmdLineOpts ( opt_GranMacros, opt_SccProfilingOn, opt_DoTickyProfiling )
--
arg_regs = case entry_conv of
DirectEntry lbl arity regs -> regs
- other -> panic "closureCodeBody:arg_regs"
+ other -> trace ("*** closureCodeBody:arg_regs " ++ (pprHWL entry_conv) ++ "(HWL ignored; no args passed in regs)") []
+
+ pprHWL :: EntryConvention -> String
+ pprHWL (ViaNode) = "ViaNode"
+ pprHWL (StdEntry cl) = "StdEntry"
+ pprHWL (DirectEntry cl i l) = "DirectEntry"
num_arg_regs = length arg_regs
mapCs bindNewToStack arg_offsets `thenC`
setRealAndVirtualSp sp_all_args `thenC`
- argSatisfactionCheck closure_info `thenC`
+ argSatisfactionCheck closure_info arg_regs `thenC`
-- OK, so there are enough args. Now we need to stuff as
-- many of them in registers as the fast-entry code
are expected.
\begin{code}
-argSatisfactionCheck :: ClosureInfo -> Code
+argSatisfactionCheck :: ClosureInfo -> [MagicId] {-GRAN-} -> Code
-argSatisfactionCheck closure_info
+argSatisfactionCheck closure_info arg_regs
= nodeMustPointToIt (closureLFInfo closure_info) `thenFC` \ node_points ->
- let
- emit_gran_macros = opt_GranMacros
- in
+-- let
+-- emit_gran_macros = opt_GranMacros
+-- in
-- HWL ngo' ngoq:
-- absC (CMacroStmt GRAN_FETCH []) `thenC`
-- forceHeapCheck [] node_points (absC AbsCNop) `thenC`
- (if emit_gran_macros
- then if node_points
- then fetchAndReschedule [] node_points
- else yield [] node_points
- else absC AbsCNop) `thenC`
+ --(if opt_GranMacros
+ -- then if node_points
+ -- then fetchAndReschedule arg_regs node_points
+ -- else yield arg_regs node_points
+ -- else absC AbsCNop) `thenC`
getSpRelOffset 0 `thenFC` \ (SpRel sp) ->
let
= -- Stack and heap overflow checks
nodeMustPointToIt (closureLFInfo closure_info) `thenFC` \ node_points ->
- let
- emit_gran_macros = opt_GranMacros
- in
- -- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node
- -- (we prefer fetchAndReschedule-style context switches to yield ones)
- (if emit_gran_macros
- then if node_points
- then fetchAndReschedule [] node_points
- else yield [] node_points
- else absC AbsCNop) `thenC`
+ -- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node
+ -- (we prefer fetchAndReschedule-style context switches to yield ones)
+ (if opt_GranMacros
+ then if node_points
+ then fetchAndReschedule [] node_points
+ else yield [] node_points
+ else absC AbsCNop) `thenC`
-- stack and/or heap checks
thunkChecks lbl node_points (
funWrapper closure_info arg_regs stk_tags info_label fun_body
= -- Stack overflow check
nodeMustPointToIt (closureLFInfo closure_info) `thenFC` \ node_points ->
- let
- emit_gran_macros = opt_GranMacros
- in
-- HWL chu' ngoq:
- (if emit_gran_macros
- then yield arg_regs node_points
- else absC AbsCNop) `thenC`
+ (if opt_GranMacros
+ then yield arg_regs node_points
+ else absC AbsCNop) `thenC`
-- heap and/or stack checks
fastEntryChecks arg_regs stk_tags info_label node_points (
%
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%
-% $Id: CgHeapery.lhs,v 1.19 1999/10/13 16:39:15 simonmar Exp $
+% $Id: CgHeapery.lhs,v 1.20 2000/01/13 14:33:58 hwloidl Exp $
%
\section[CgHeapery]{Heap management functions}
)
import PrimRep ( PrimRep(..), isFollowableRep )
import Unique ( Unique )
-import CmdLineOpts ( opt_SccProfilingOn )
+import CmdLineOpts ( opt_SccProfilingOn, opt_GranMacros )
import GlaExts
import Outputable
getTickyCtrLabel `thenFC` \ ticky_ctr ->
( if all_pointers then -- heap checks are quite easy
+ -- HWL: gran-yield immediately before heap check proper
+ --(if node `elem` regs
+ -- then yield regs True
+ -- else absC AbsCNop ) `thenC`
absC (checking_code stk_words hp_words tag_assts
free_reg (length regs) ticky_ctr)
= ((int2Word# 1#) `shiftL#` (i -# 1#)) `or#` mkRegLiveness regs
mkRegLiveness (_ : regs) = mkRegLiveness regs
+-- The two functions below are only used in a GranSim setup
-- Emit macro for simulating a fetch and then reschedule
fetchAndReschedule :: [MagicId] -- Live registers
-> Bool -- Node reqd?
-> Code
-fetchAndReschedule regs node_reqd =
+fetchAndReschedule regs node_reqd =
if (node `elem` regs || node_reqd)
then fetch_code `thenC` reschedule_code
else absC AbsCNop
where
all_regs = if node_reqd then node:regs else regs
- liveness_mask = 0 {-XXX: mkLiveRegsMask all_regs-}
-
+ liveness_mask = mkRegLiveness regs
reschedule_code = absC (CMacroStmt GRAN_RESCHEDULE [
- mkIntCLit liveness_mask,
+ mkIntCLit (IBOX(word2Int# liveness_mask)),
mkIntCLit (if node_reqd then 1 else 0)])
--HWL: generate GRAN_FETCH macro for GrAnSim
-> Bool -- Node reqd?
-> Code
-yield regs node_reqd =
- -- NB: node is not alive; that's why we use DO_YIELD rather than
- -- GRAN_RESCHEDULE
- yield_code
- where
- all_regs = if node_reqd then node:regs else regs
- liveness_mask = 0 {-XXX: mkLiveRegsMask all_regs-}
-
- yield_code = absC (CMacroStmt GRAN_YIELD [mkIntCLit liveness_mask])
+yield regs node_reqd =
+ if opt_GranMacros && node_reqd
+ then yield_code
+ else absC AbsCNop
+ where
+ -- all_regs = if node_reqd then node:regs else regs
+ liveness_mask = mkRegLiveness regs
+ yield_code =
+ absC (CMacroStmt GRAN_YIELD
+ [mkIntCLit (IBOX(word2Int# liveness_mask))])
\end{code}
%************************************************************************
%
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%
-% $Id: CgStackery.lhs,v 1.12 1999/06/24 13:04:20 simonmar Exp $
+% $Id: CgStackery.lhs,v 1.13 2000/01/13 14:33:58 hwloidl Exp $
%
\section[CgStackery]{Stack management functions}
import CgUsages ( getRealSp )
import AbsCUtils ( mkAbstractCs, mkAbsCStmts, getAmodeRep )
import PrimRep ( getPrimRepSize, PrimRep(..), isFollowableRep )
-import CmdLineOpts ( opt_SccProfilingOn )
+import CmdLineOpts ( opt_SccProfilingOn, opt_GranMacros )
import Panic ( panic )
-import Constants ( uF_SIZE, sCC_UF_SIZE, sEQ_FRAME_SIZE, sCC_SEQ_FRAME_SIZE )
+import Constants ( uF_SIZE, sCC_UF_SIZE, gRAN_UF_SIZE,
+ sEQ_FRAME_SIZE, sCC_SEQ_FRAME_SIZE, gRAN_SEQ_FRAME_SIZE )
import IOExts ( trace )
\end{code}
\end{code}
\begin{code}
-updateFrameSize | opt_SccProfilingOn = sCC_UF_SIZE
- | otherwise = uF_SIZE
+updateFrameSize | opt_SccProfilingOn = trace ("updateFrameSize = " ++ (show sCC_UF_SIZE)) sCC_UF_SIZE
+ | opt_GranMacros = trace ("updateFrameSize = " ++ (show gRAN_UF_SIZE))gRAN_UF_SIZE
+ | otherwise = trace ("updateFrameSize = " ++ (show uF_SIZE)) uF_SIZE
seqFrameSize | opt_SccProfilingOn = sCC_SEQ_FRAME_SIZE
- | otherwise = sEQ_FRAME_SIZE
+ | opt_GranMacros = gRAN_SEQ_FRAME_SIZE
+ | otherwise = sEQ_FRAME_SIZE
\end{code}
%************************************************************************
uF_SIZE,
sCC_UF_SIZE,
+ gRAN_UF_SIZE, -- HWL
uF_RET,
uF_SU,
uF_UPDATEE,
sEQ_FRAME_SIZE,
sCC_SEQ_FRAME_SIZE,
+ gRAN_SEQ_FRAME_SIZE, -- HWL
mAX_Vanilla_REG,
mAX_Float_REG,
-- Same again, with profiling
sCC_UF_SIZE = (SCC_UF_SIZE::Int)
+-- Same again, with gransim
+gRAN_UF_SIZE = (GRAN_UF_SIZE::Int)
+
-- Offsets in an update frame. They don't change with profiling!
uF_RET = (UF_RET::Int)
uF_SU = (UF_SU::Int)
\begin{code}
sEQ_FRAME_SIZE = (NOSCC_SEQ_FRAME_SIZE::Int)
sCC_SEQ_FRAME_SIZE = (SCC_SEQ_FRAME_SIZE::Int)
+gRAN_SEQ_FRAME_SIZE = (GRAN_SEQ_FRAME_SIZE::Int)
\end{code}
\begin{code}
frame n = StInd PtrRep
(StIndex PtrRep stgSp (StInt (toInteger (n-uF_SIZE))))
+ -- HWL: these values are *wrong* in a GranSim setup; ToDo: fix
a1 = StAssign PtrRep (frame uF_RET) upd_frame_info
a3 = StAssign PtrRep (frame uF_SU) stgSu
a4 = StAssign PtrRep (frame uF_UPDATEE) bhptr
print STDERR "$Pgm: unknown prologue mangling? $TargetPlatform\n";
}
+ # HWL HACK: dont die, just print a warning
+ #print stderr "HWL: this should die! Prologue junk?: $p\n" if $p =~ /^\t[^\.]/
+ # && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test
die "Prologue junk?: $p\n" if $p =~ /^\t[^\.]/
&& $TargetPlatform !~ /^powerpc-/; #ToDo: remove test
} else {
print STDERR "$Pgm: unknown epilogue mangling? $TargetPlatform\n";
}
+ # HWL HACK: dont die, just print a warning
+ #print stderr "HWL: this should die! Epilogue junk?: $e\n" if $e =~ /^\t[^\.]/
+ # && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test
die "Epilogue junk?: $e\n" if $e =~ /^\t[^\.]/
&& $TargetPlatform !~ /^powerpc-/; #ToDo: remove test
# =!=!=!=!=!=!=!=!=!=!=!
# This script is automatically generated: DO NOT EDIT!!!
# Generated by Glasgow Haskell, version ${ProjectVersion}
+# ngoqvam choHbogh vaj' vIHoHnISbej !!!!
#
\$pvm_executable = '$pvm_executable';
\$pvm_executable_base = '$pvm_executable_base';
}
if ( $a eq '-d' && $in_RTS_args ) {
$debug = '-';
- } elsif ( $a =~ /^-N(\d+)/ && $in_RTS_args ) {
+ } elsif ( $a =~ /^-qN(\d+)/ && $in_RTS_args ) {
+ $nprocessors = $1;
+ } elsif ( $a =~ /^-qp(\d+)/ && $in_RTS_args ) {
$nprocessors = $1;
} else {
push(@nonPVM_args, $a);
local ($final,$suffix,$tmp)= @_ ;
local ($to_do);
+ local ($new_suffix);
+
# $final -- root of where to park ${final}.${suffix}
# $tmp -- temporary file where hsc put the intermediate file.
+ # HWL: use -odir for .hc and .s files, too
+ if ( $Specific_output_dir ne '' ) {
+ $final = "${Specific_output_dir}/${final}";
+ }
+ # HWL: use the same suffix as for $Osuffix in generating intermediate file,
+ # replacing o with hc or s, respectively.
+ if ( $Osuffix ne '' ) {
+ ($new_suffix = $Osuffix) =~ s/o$/hc/ if $suffix eq "hc";
+ ($new_suffix = $Osuffix) =~ s/o$/s/ if $suffix eq "s";
+ $suffix = $new_suffix;
+ print stderr "HWL says: suffix for intermediate file is $suffix; ${final}.${suffix} overall\n" if $Verbose;
+ }
+
# Delete the old file
$to_do = "$Rm ${final}.${suffix}"; &run_something($to_do, "Removing old .${suffix} file");
-#! /usr/local/bin/perl
+#! /usr/bin/perl
# a simple wrapper to test a .s-file mangler
# reads stdin, writes stdout
+push(@INC,"/net/dazdak/BUILDS/gransim-4.04/i386-unknown-linux/ghc/driver");
+
$TargetPlatform = $ARGV[0]; shift; # nice error checking, Will
require("ghc-asm.prl") || die "require mangler failed!\n";
/* -----------------------------------------------------------------------------
- * $Id: CCall.h,v 1.3 1999/02/05 16:02:19 simonm Exp $
+ * $Id: CCall.h,v 1.4 2000/01/13 14:34:00 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
#define STGCALL5(f,a,b,c,d,e) \
CALLER_SAVE_ALL (void) f(a,b,c,d,e); CALLER_RESTORE_ALL
+#define STGCALL6(f,a,b,c,d,e,z) \
+ CALLER_SAVE_ALL (void) f(a,b,c,d,e,z); CALLER_RESTORE_ALL
+
#define RET_STGCALL0(t,f) \
({ t _r; CALLER_SAVE_ALL _r = f(); CALLER_RESTORE_ALL; _r; })
#define RET_STGCALL5(t,f,a,b,c,d,e) \
({ t _r; CALLER_SAVE_ALL _r = f(a,b,c,d,e); CALLER_RESTORE_ALL; _r; })
+#define RET_STGCALL6(t,f,a,b,c,d,e,z) \
+ ({ t _r; CALLER_SAVE_ALL _r = f(a,b,c,d,e,z); CALLER_RESTORE_ALL; _r; })
+
/*
* A PRIM_STGCALL is used when we have arranged to save the R<n>,
#define PRIM_STGCALL5(f,a,b,c,d,e) \
CALLER_SAVE_SYSTEM (void) f(a,b,c,d,e); CALLER_RESTORE_SYSTEM
+#define PRIM_STGCALL6(f,a,b,c,d,e,z) \
+ CALLER_SAVE_SYSTEM (void) f(a,b,c,d,e,z); CALLER_RESTORE_SYSTEM
+
#define RET_PRIM_STGCALL0(t,f) \
({ t _r; CALLER_SAVE_SYSTEM _r = f(); CALLER_RESTORE_SYSTEM; _r; })
#define RET_PRIM_STGCALL5(t,f,a,b,c,d,e) \
({ t _r; CALLER_SAVE_SYSTEM _r = f(a,b,c,d,e); CALLER_RESTORE_SYSTEM; _r; })
+#define RET_PRIM_STGCALL6(t,f,a,b,c,d,e,z) \
+ ({ t _r; CALLER_SAVE_SYSTEM _r = f(a,b,c,d,e,z); CALLER_RESTORE_SYSTEM; _r; })
+
/* ToDo: ccalls that might garbage collect - do we need to return to
* the scheduler to perform these? Similarly, ccalls that might want
* to call Haskell right back, or start a new thread or something.
/* ----------------------------------------------------------------------------
- * $Id: ClosureTypes.h,v 1.11 1999/05/11 16:47:40 keithw Exp $
+ * $Id: ClosureTypes.h,v 1.12 2000/01/13 14:34:00 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
#define WEAK 56
#define FOREIGN 57
#define STABLE_NAME 58
+
#define TSO 59
#define BLOCKED_FETCH 60
#define FETCH_ME 61
-#define EVACUATED 62
-#define N_CLOSURE_TYPES 63
+#define FETCH_ME_BQ 62
+#define RBH 63
+
+#define EVACUATED 64
+
+#define N_CLOSURE_TYPES 65
#endif CLOSURETYPES_H
/* ----------------------------------------------------------------------------
- * $Id: Closures.h,v 1.14 1999/12/01 14:34:48 simonmar Exp $
+ * $Id: Closures.h,v 1.15 2000/01/13 14:34:00 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
The parallel header
-------------------------------------------------------------------------- */
-#ifdef GRAN
+#ifdef PAR
typedef struct {
- W_ procs;
-} StgGranHeader;
+ /* StgWord ga; */ /* nope! global addresses are managed via a hash table */
+} StgParHeader;
#else /* !PAR */
typedef struct {
/* empty */
-} StgGranHeader;
+} StgParHeader;
#endif /* PAR */
/* -----------------------------------------------------------------------------
+ The GranSim header
+ -------------------------------------------------------------------------- */
+
+#if defined(GRAN)
+
+typedef struct {
+ StgWord procs; /* bitmask indicating on which PEs this closure resides */
+} StgGranHeader;
+
+#else /* !GRAN */
+
+typedef struct {
+ /* empty */
+} StgGranHeader;
+
+#endif /* GRAN */
+
+/* -----------------------------------------------------------------------------
The ticky-ticky header
Comment from old Ticky.h:
#ifdef PROFILING
StgProfHeader prof;
#endif
-#ifdef GRAN
- StgGranHeader par;
+#ifdef PAR
+ StgParHeader par;
+#endif
+#if defined(GRAN)
+ StgGranHeader gran;
#endif
#ifdef TICKY_TICKY
StgTickyHeader ticky;
typedef struct {
StgHeader header;
- struct StgTSO_ *blocking_queue;
- StgMutClosure *mut_link;
-} StgBlockingQueue;
-
-typedef struct {
- StgHeader header;
StgWord words;
StgWord payload[0];
} StgArrWords;
StgClosure* value;
} StgMVar;
-/* Parallel FETCH_ME closures */
-#ifdef PAR
-typedef struct {
+#if defined(PAR) || defined(GRAN)
+/*
+ StgBlockingQueueElement represents the types of closures that can be
+ found on a blocking queue: StgTSO, StgRBHSave, StgBlockedFetch.
+ (StgRBHSave can only appear at the end of a blocking queue).
+ Logically, this is a union type, but defining another struct with a common
+ layout is easier to handle in the code (same as for StgMutClosures).
+*/
+typedef struct StgBlockingQueueElement_ {
+ StgHeader header;
+ struct StgBlockingQueueElement_ *link;
+ StgMutClosure *mut_link;
+ struct StgClosure_ *payload[0];
+} StgBlockingQueueElement;
+
+typedef struct StgBlockingQueue_ {
+ StgHeader header;
+ struct StgBlockingQueueElement_ *blocking_queue;
+ StgMutClosure *mut_link;
+} StgBlockingQueue;
+
+/* this closure is hanging at the end of a blocking queue in (par setup only) */
+typedef struct StgRBHSave_ {
StgHeader header;
- void *ga; /* type globalAddr is abstract here */
+ StgPtr payload[0];
+} StgRBHSave;
+
+typedef struct StgRBH_ {
+ StgHeader header;
+ struct StgBlockingQueueElement_ *blocking_queue;
+ StgMutClosure *mut_link;
+} StgRBH;
+
+#else
+/* old sequential version of a blocking queue, which can only hold TSOs */
+typedef struct StgBlockingQueue_ {
+ StgHeader header;
+ struct StgTSO_ *blocking_queue;
+ StgMutClosure *mut_link;
+} StgBlockingQueue;
+#endif
+
+#if defined(PAR)
+/* global indirections aka FETCH_ME closures */
+typedef struct StgFetchMe_ {
+ StgHeader header;
+ globalAddr *ga; /* type globalAddr is abstract here */
+ StgMutClosure *mut_link;
} StgFetchMe;
+
+/* same contents as an ordinary StgBlockingQueue */
+typedef struct StgFetchMeBlockingQueue_ {
+ StgHeader header;
+ struct StgBlockingQueueElement_ *blocking_queue;
+ StgMutClosure *mut_link;
+} StgFetchMeBlockingQueue;
+
+/* entry in a blocking queue, indicating a request from a TSO on another PE */
+typedef struct StgBlockedFetch_ {
+ StgHeader header;
+ struct StgBlockingQueueElement_ *link;
+ StgMutClosure *mut_link;
+ StgClosure *node;
+ globalAddr ga;
+} StgBlockedFetch;
#endif
#endif /* CLOSURES_H */
/* ----------------------------------------------------------------------------
- * $Id: Constants.h,v 1.7 1999/10/27 09:58:36 simonmar Exp $
+ * $Id: Constants.h,v 1.8 2000/01/13 14:34:00 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
Header Sizes
NOTE: keep these in line with the real definitions in Closures.h
+ HWL: checked GRAN_HDR_SIZE; ok
-------------------------------------------------------------------------- */
#define STD_HDR_SIZE 1
#define PROF_HDR_SIZE 1
#define GRAN_HDR_SIZE 1
+#define PAR_HDR_SIZE 0
#define TICKY_HDR_SIZE 0
#define ARR_HDR_SIZE 1
NOTE: keep these in line with the real definitions in InfoTables.h
NOTE: the PROF, and GRAN values are *wrong* (ToDo)
+ HWL: checked GRAN_ITBL_SIZE; ok
-------------------------------------------------------------------------- */
#define STD_ITBL_SIZE 3
#define PROF_ITBL_SIZE 1
#define GRAN_ITBL_SIZE 1
+#define PAR_ITBL_SIZE 0
#define TICKY_ITBL_SIZE 0
/* -----------------------------------------------------------------------------
/* -----------------------------------------------------------------------------
Update Frame Layout
+ GranSim uses an additional word as bitmask in the update frame; actually,
+ not really necessary, but uses standard closure layout that way
+ NB: UF_RET etc are *wrong* in a GranSim setup; should be increased by 1
+ if compiling for GranSim (currently not used in compiler) -- HWL
-------------------------------------------------------------------------- */
-
#define NOSCC_UF_SIZE 3
+#define GRAN_UF_SIZE 4
#define SCC_UF_SIZE 4
#define UF_RET 0
SEQ frame size
I don't think seq frames really need sccs --SDM
+ They don't need a GranSim bitmask either, but who cares anyway -- HWL
-------------------------------------------------------------------------- */
#define NOSCC_SEQ_FRAME_SIZE 2
+#define GRAN_SEQ_FRAME_SIZE 3
#define SCC_SEQ_FRAME_SIZE 3
/* -----------------------------------------------------------------------------
--- /dev/null
+/*
+ Time-stamp: <Tue Jan 11 2000 11:29:41 Stardate: [-30]4188.43 hwloidl>
+ $Id: GranSim.h,v 1.2 2000/01/13 14:34:00 hwloidl Exp $
+
+ Headers for GranSim specific objects.
+
+ Note that in GranSim we have one run-queue and blocking-queue for each
+ processor. Therefore, this header file redefines variables like
+ run_queue_hd to be relative to CurrentProc. The main arrays of runnable
+ and blocking queues are defined in Schedule.c. The important STG-called
+ GranSim macros (e.g. for fetching nodes) are at the end of this
+ file. Usually they are just wrappers to proper C functions in GranSim.c. */
+
+#ifndef GRANSIM_H
+#define GRANSIM_H
+
+#if !defined(GRAN)
+
+//Dummy definitions for basic GranSim macros (see GranSim.h)
+#define DO_GRAN_ALLOCATE(n) /* nothing */
+#define DO_GRAN_UNALLOCATE(n) /* nothing */
+#define DO_GRAN_FETCH(node) /* nothing */
+#define DO_GRAN_EXEC(arith,branch,load,store,floats) /* nothing */
+#define GRAN_FETCH_AND_RESCHEDULE(liveness_mask,reenter) /* nothing */
+#define GRAN_RESCHEDULE(liveness_mask,reenter) /* nothing */
+
+#endif
+
+#if defined(GRAN) /* whole file */
+
+extern StgTSO *CurrentTSOs[];
+
+//@node Headers for GranSim specific objects, , ,
+//@section Headers for GranSim specific objects
+
+//@menu
+//* Includes::
+//* Externs and prototypes::
+//* Run and blocking queues::
+//* Spark queues::
+//* Processor related stuff::
+//* GranSim costs::
+//* STG called GranSim functions::
+//* STG-called routines::
+//@end menu
+
+//@node Includes, Externs and prototypes, Headers for GranSim specific objects, Headers for GranSim specific objects
+//@subsection Includes
+
+/*
+#include "Closures.h"
+#include "TSO.h"
+#include "Rts.h"
+*/
+
+//@node Externs and prototypes, Run and blocking queues, Includes, Headers for GranSim specific objects
+//@subsection Externs and prototypes
+
+/* Global constants */
+extern char *gran_event_names[];
+extern char *proc_status_names[];
+extern char *event_names[];
+
+/* Vars checked from within STG land */
+extern rtsBool NeedToReSchedule, IgnoreEvents, IgnoreYields;
+;
+extern rtsTime TimeOfNextEvent, TimeOfLastEvent, EndOfTimeSlice;
+
+/* costs for basic operations (copied from RTS flags) */
+extern nat gran_arith_cost, gran_branch_cost, gran_load_cost, gran_store_cost, gran_float_cost;
+
+extern nat SparksAvail; /* How many sparks are available */
+extern nat SurplusThreads; /* How many excess threads are there */
+extern nat sparksIgnored, sparksCreated;
+
+//@node Run and blocking queues, Spark queues, Externs and prototypes, Headers for GranSim specific objects
+//@subsection Run and blocking queues
+
+/* declared in Schedule.c */
+extern StgTSO *run_queue_hds[], *run_queue_tls[];
+extern StgTSO *blocked_queue_hds[], *blocked_queue_tls[];
+extern StgTSO *ccalling_threadss[];
+
+#define run_queue_hd run_queue_hds[CurrentProc]
+#define run_queue_tl run_queue_tls[CurrentProc]
+#define blocked_queue_hd blocked_queue_hds[CurrentProc]
+#define blocked_queue_tl blocked_queue_tls[CurrentProc]
+#define pending_sparks_hd pending_sparks_hds[CurrentProc]
+#define pending_sparks_tl pending_sparks_tls[CurrentProc]
+#define ccalling_threads ccalling_threadss[CurrentProc]
+
+//@node Spark queues, Processor related stuff, Run and blocking queues, Headers for GranSim specific objects
+//@subsection Spark queues
+
+/*
+In GranSim we use a double linked list to represent spark queues.
+
+This is more flexible, but slower, than the array of pointers
+representation used in GUM. We use the flexibility to define new fields in
+the rtsSpark structure, representing e.g. granularity info (see HWL's PhD
+thesis), or info about the parent of a spark.
+*/
+
+/* Sparks and spark queues */
+typedef struct rtsSpark_
+{
+ StgClosure *node;
+ StgInt name, global;
+ StgInt gran_info; /* for granularity improvement mechanisms */
+ PEs creator; /* PE that created this spark (unused) */
+ struct rtsSpark_ *prev, *next;
+} rtsSpark;
+typedef rtsSpark *rtsSparkQ;
+
+/* The spark queues, proper */
+/* In GranSim this is a globally visible array of spark queues */
+extern rtsSparkQ pending_sparks_hds[];
+extern rtsSparkQ pending_sparks_tls[];
+
+/* Prototypes of those spark routines visible to compiler generated .hc */
+/* Routines only used inside the RTS are defined in rts/parallel GranSimRts.h */
+rtsSpark *newSpark(StgClosure *node,
+ StgInt name, StgInt gran_info, StgInt size_info,
+ StgInt par_info, StgInt local);
+void add_to_spark_queue(rtsSpark *spark);
+
+//@node Processor related stuff, GranSim costs, Spark queues, Headers for GranSim specific objects
+//@subsection Processor related stuff
+
+extern PEs CurrentProc;
+extern rtsTime CurrentTime[];
+
+/* Maximum number of PEs that can be simulated */
+#define MAX_PROC 32 /* (BITS_IN(StgWord)) */ // ToDo: fix this!!
+//#if MAX_PROC==16
+//#else
+//#error MAX_PROC should be 32 on this architecture
+//#endif
+
+#define CurrentTSO CurrentTSOs[CurrentProc]
+
+/* Processor numbers to bitmasks and vice-versa */
+#define MainProc 0 /* Id of main processor */
+#define NO_PRI 0 /* dummy priority */
+#define MAX_PRI 10000 /* max possible priority */
+#define MAIN_PRI MAX_PRI /* priority of main thread */
+
+/* GrAnSim uses IdleProcs as bitmask to indicate which procs are idle */
+#define PE_NUMBER(n) (1l << (long)n)
+#define ThisPE PE_NUMBER(CurrentProc)
+#define MainPE PE_NUMBER(MainProc)
+#define Everywhere (~0l)
+#define Nowhere (0l)
+#define Now CurrentTime[CurrentProc]
+
+#define IS_LOCAL_TO(ga,proc) ((1l << (PEs) proc) & ga)
+
+#define GRAN_TIME_SLICE 1000 /* max time between 2 ReSchedules */
+
+//@node GranSim costs, STG called GranSim functions, Processor related stuff, Headers for GranSim specific objects
+//@subsection GranSim costs
+
+/* Default constants for communication (see RtsFlags on how to change them) */
+
+#define LATENCY 1000 /* Latency for single packet */
+#define ADDITIONAL_LATENCY 100 /* Latency for additional packets */
+#define BASICBLOCKTIME 10
+#define FETCHTIME (LATENCY*2+MSGUNPACKTIME)
+#define LOCALUNBLOCKTIME 10
+#define GLOBALUNBLOCKTIME (LATENCY+MSGUNPACKTIME)
+
+#define MSGPACKTIME 0 /* Cost of creating a packet */
+#define MSGUNPACKTIME 0 /* Cost of receiving a packet */
+#define MSGTIDYTIME 0 /* Cost of cleaning up after send */
+
+/* How much to increase GrAnSims internal packet size if an overflow
+ occurs.
+ NB: This is a GrAnSim internal variable and is independent of the
+ simulated packet buffer size.
+*/
+
+#define GRANSIM_DEFAULT_PACK_BUFFER_SIZE 400
+#define REALLOC_SZ 200
+
+/* extern W_ gran_mpacktime, gran_mtidytime, gran_munpacktime; */
+
+/* Thread cost model */
+#define THREADCREATETIME (25+THREADSCHEDULETIME)
+#define THREADQUEUETIME 12 /* Cost of adding a thread to the running/runnable queue */
+#define THREADDESCHEDULETIME 75 /* Cost of descheduling a thread */
+#define THREADSCHEDULETIME 75 /* Cost of scheduling a thread */
+#define THREADCONTEXTSWITCHTIME (THREADDESCHEDULETIME+THREADSCHEDULETIME)
+
+/* Instruction Cost model (SPARC, including cache misses) */
+#define ARITH_COST 1
+#define BRANCH_COST 2
+#define LOAD_COST 4
+#define STORE_COST 4
+#define FLOAT_COST 1 /* ? */
+
+#define HEAPALLOC_COST 11
+
+#define PRI_SPARK_OVERHEAD 5
+#define PRI_SCHED_OVERHEAD 5
+
+//@node STG called GranSim functions, STG-called routines, GranSim costs, Headers for GranSim specific objects
+//@subsection STG called GranSim functions
+
+/* STG called GranSim functions */
+void GranSimAllocate(StgInt n);
+void GranSimUnallocate(StgInt n);
+void GranSimExec(StgWord ariths, StgWord branches, StgWord loads, StgWord stores, StgWord floats);
+StgInt GranSimFetch(StgClosure *node);
+void GranSimSpark(StgInt local, StgClosure *node);
+void GranSimSparkAt(rtsSpark *spark, StgClosure *where,StgInt identifier);
+void GranSimSparkAtAbs(rtsSpark *spark, PEs proc, StgInt identifier);
+void GranSimBlock(StgTSO *tso, PEs proc, StgClosure *node);
+
+
+//@node STG-called routines, , STG called GranSim functions, Headers for GranSim specific objects
+//@subsection STG-called routines
+
+/* Wrapped version of calls to GranSim-specific STG routines */
+
+/*
+#define DO_PERFORM_RESCHEDULE(liveness, always_reenter_node) PerformReschedule_wrapper(liveness, always_reenter_node)
+*/
+#define DO_GRAN_ALLOCATE(n) STGCALL1(GranSimAllocate, n)
+#define DO_GRAN_UNALLOCATE(n) STGCALL1(GranSimUnallocate, n)
+#define DO_GRAN_FETCH(node) STGCALL1(GranSimFetch, node)
+#define DO_GRAN_EXEC(arith,branch,load,store,floats) GranSimExec(arith,branch,load,store,floats)
+
+/*
+ ToDo: Clean up this mess of GRAN macros!!! -- HWL
+*/
+// DO_GRAN_FETCH((StgClosure*)R1.p);
+#define GRAN_FETCH() /* nothing */
+
+#define GRAN_FETCH_AND_RESCHEDULE(liveness,reenter) \
+ DO_GRAN_FETCH((StgClosure*)R1.p); \
+ DO_GRAN_YIELD(liveness,ENTRY_CODE((D_)(*R1.p)));
+// RESTORE_EVERYTHING is done implicitly before entering threaded world agian
+
+/*
+ This is the only macro currently enabled;
+ It should check whether it is time for the current thread to yield
+ (e.g. if there is a more recent event in the queue) and it should check
+ whether node is local, via a call to GranSimFetch.
+ ToDo: split this in 2 routines:
+ - GRAN_YIELD (as it is below)
+ - GRAN_FETCH (the rest of this macro)
+ emit only these 2 macros based on node's liveness
+ node alive: emit both macros
+ node not alive: do only a GRAN_YIELD
+
+ replace gran_yield_? with gran_block_? (they really block the current
+ thread)
+*/
+#define GRAN_RESCHEDULE(liveness,ptrs) \
+ if (RET_STGCALL1(StgInt, GranSimFetch, (StgClosure*)R1.p)) {\
+ EXTFUN_RTS(gran_block_##ptrs); \
+ JMP_(gran_block_##ptrs); \
+ } else { \
+ if (TimeOfLastEvent < CurrentTime[CurrentProc] && \
+ HEAP_ALLOCED((StgClosure *)R1.p) && \
+ LOOKS_LIKE_GHC_INFO(get_itbl((StgClosure *)R1.p))) { \
+ EXTFUN_RTS(gran_yield_##ptrs); \
+ JMP_(gran_yield_##ptrs); \
+ } \
+ /* GRAN_YIELD(ptrs) */ \
+ }
+
+
+// YIELD(liveness,reenter)
+
+// GRAN_YIELD(liveness_mask);
+
+// GRAN_FETCH_AND_RESCHEDULE(liveness_mask,reenter)
+
+#define THREAD_CONTEXT_SWITCH(liveness_mask,reenter) \
+ do { \
+ if (context_switch /* OR_INTERVAL_EXPIRED */) { \
+ GRAN_RESCHEDULE(liveness_mask,reenter); \
+ } }while(0)
+
+#define GRAN_EXEC(arith,branch,load,store,floats) \
+ { \
+ W_ cost = gran_arith_cost*arith + \
+ gran_branch_cost*branch + \
+ gran_load_cost*load + \
+ gran_store_cost*store + \
+ gran_float_cost*floats; \
+ CurrentTSO->gran.exectime += cost; \
+ CurrentTime[CurrentProc] += cost; \
+ }
+
+/* In GranSim we first check whether there is an event to handle; only if
+ this is the case (or the time slice is over in case of fair scheduling)
+ we do a yield, which is very similar to that in the concurrent world
+ ToDo: check whether gran_yield_? can be merged with other yielding codes
+*/
+
+#define DO_GRAN_YIELD(ptrs) if (!IgnoreYields && \
+ TimeOfLastEvent < CurrentTime[CurrentProc] && \
+ HEAP_ALLOCED((StgClosure *)R1.p) && \
+ LOOKS_LIKE_GHC_INFO(get_itbl((StgClosure *)R1.p))) { \
+ EXTFUN_RTS(gran_yield_##ptrs); \
+ JMP_(gran_yield_##ptrs); \
+ }
+
+#define GRAN_YIELD(ptrs) \
+ { \
+ extern nat context_switch; \
+ if ( (CurrentTime[CurrentProc]>=EndOfTimeSlice) || \
+ ((CurrentTime[CurrentProc]>=TimeOfNextEvent) && \
+ (TimeOfNextEvent!=0) && !IgnoreEvents )) { \
+ /* context_switch = 1; */ \
+ DO_GRAN_YIELD(ptrs); \
+ } \
+ }
+
+#define ADD_TO_SPARK_QUEUE(spark) \
+ STGCALL1(add_to_spark_queue,spark) \
+
+#endif /* GRAN */
+
+#endif /* GRANSIM_H */
/* ----------------------------------------------------------------------------
- * $Id: InfoMacros.h,v 1.8 1999/11/30 11:44:32 simonmar Exp $
+ * $Id: InfoMacros.h,v 1.9 2000/01/13 14:34:00 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
#define INIT_VECTOR
#endif
+/*
+ On the GRAN/PAR specific parts of the InfoTables:
+
+ In both GranSim and GUM we use revertible black holes (RBH) when putting
+ an updatable closure into a packet for communication. The entry code for
+ an RBH performs standard blocking (as with any kind of BH). The info
+ table for the RBH resides just before the one for the std info
+ table. (NB: there is one RBH ITBL for every ITBL of an updatable
+ closure.) The @rbh_infoptr@ field in the ITBL points from the std ITBL to
+ the RBH ITBL and vice versa. This is used by the RBH_INFOPTR and
+ REVERT_INFOPTR macros to turn an updatable node into an RBH and vice
+ versa. Note, that the only case where we have to revert the RBH in its
+ original form is when a packet is sent back because of garbage collection
+ on another PE. In the RTS for GdH we will use this reversion mechanism in
+ order to deal with faults in the system.
+ ToDo: Check that RBHs are needed for all the info tables below. From a quick
+ check of the macros generated in the libs it seems that all of them are used
+ for generating THUNKs.
+ Possible optimisation: Note that any RBH ITBL is a fixed distance away from
+ the actual ITBL. We could inline this offset as a constant into the RTS and
+ avoid the rbh_infoptr fields altogether (Jim did that in the old RTS).
+ -- HWL
+*/
+
+
/* function/thunk info tables --------------------------------------------- */
+#if defined(GRAN) || defined(PAR)
+
#define \
INFO_TABLE_SRT(info, /* info-table label */ \
entry, /* entry code label */ \
type, /* closure type */ \
info_class, entry_class, /* C storage classes */ \
prof_descr, prof_type) /* profiling info */ \
+ entry_class(RBH_##entry); \
entry_class(entry); \
+ info_class INFO_TBL_CONST StgInfoTable info; \
+ info_class INFO_TBL_CONST StgInfoTable RBH_##info = { \
+ layout : { payload : {ptrs,nptrs} }, \
+ SRT_INFO(RBH,srt_,srt_off_,srt_len_), \
+ INCLUDE_RBH_INFO(info), \
+ INIT_ENTRY(RBH_##entry), \
+ INIT_VECTOR \
+ } ; \
+ StgFunPtr RBH_##entry (void) { JMP_(RBH_entry); } ; \
info_class INFO_TBL_CONST StgInfoTable info = { \
layout : { payload : {ptrs,nptrs} }, \
SRT_INFO(type,srt_,srt_off_,srt_len_), \
+ INCLUDE_RBH_INFO(RBH_##info), \
INIT_ENTRY(entry), \
INIT_VECTOR \
}
+#else
+
+#define \
+INFO_TABLE_SRT(info, /* info-table label */ \
+ entry, /* entry code label */ \
+ ptrs, nptrs, /* closure layout info */\
+ srt_, srt_off_, srt_len_, /* SRT info */ \
+ type, /* closure type */ \
+ info_class, entry_class, /* C storage classes */ \
+ prof_descr, prof_type) /* profiling info */ \
+ entry_class(entry); \
+ info_class INFO_TBL_CONST StgInfoTable info = { \
+ layout : { payload : {ptrs,nptrs} }, \
+ SRT_INFO(type,srt_,srt_off_,srt_len_), \
+ INIT_ENTRY(entry), \
+ INIT_VECTOR \
+ }
+
+#endif
/* direct-return address info tables --------------------------------------*/
+#if defined(GRAN) || defined(PAR)
+
+#define \
+INFO_TABLE_SRT_BITMAP(info, entry, bitmap_, srt_, srt_off_, srt_len_, \
+ type, info_class, entry_class, \
+ prof_descr, prof_type) \
+ entry_class(RBH_##entry); \
+ entry_class(entry); \
+ info_class INFO_TBL_CONST StgInfoTable info; \
+ info_class INFO_TBL_CONST StgInfoTable RBH_##info = { \
+ layout : { bitmap : (StgWord32)bitmap_ }, \
+ SRT_INFO(RBH,srt_,srt_off_,srt_len_), \
+ INCLUDE_RBH_INFO(info), \
+ INIT_ENTRY(RBH_##entry), \
+ INIT_VECTOR \
+ }; \
+ StgFunPtr RBH_##entry (void) { JMP_(RBH_entry); } ; \
+ info_class INFO_TBL_CONST StgInfoTable info = { \
+ layout : { bitmap : (StgWord32)bitmap_ }, \
+ SRT_INFO(type,srt_,srt_off_,srt_len_), \
+ INCLUDE_RBH_INFO(RBH_##info), \
+ INIT_ENTRY(entry), \
+ INIT_VECTOR \
+ }
+#else
+
#define \
INFO_TABLE_SRT_BITMAP(info, entry, bitmap_, srt_, srt_off_, srt_len_, \
type, info_class, entry_class, \
INIT_ENTRY(entry), \
INIT_VECTOR \
}
+#endif
/* info-table without an SRT -----------------------------------------------*/
+#if defined(GRAN) || defined(PAR)
+
+#define \
+INFO_TABLE(info, entry, ptrs, nptrs, type, info_class, \
+ entry_class, prof_descr, prof_type) \
+ entry_class(RBH_##entry); \
+ entry_class(entry); \
+ info_class INFO_TBL_CONST StgInfoTable info; \
+ info_class INFO_TBL_CONST StgInfoTable RBH_##info = { \
+ layout : { payload : {ptrs,nptrs} }, \
+ STD_INFO(RBH), \
+ INCLUDE_RBH_INFO(info), \
+ INIT_ENTRY(RBH_##entry), \
+ INIT_VECTOR \
+ }; \
+ StgFunPtr RBH_##entry (void) { JMP_(RBH_entry); } ; \
+ info_class INFO_TBL_CONST StgInfoTable info = { \
+ layout : { payload : {ptrs,nptrs} }, \
+ STD_INFO(type), \
+ INCLUDE_RBH_INFO(RBH_##info), \
+ INIT_ENTRY(entry), \
+ INIT_VECTOR \
+ }
+
+#else
+
#define \
INFO_TABLE(info, entry, ptrs, nptrs, type, info_class, \
entry_class, prof_descr, prof_type) \
INIT_VECTOR \
}
+#endif
+
/* special selector-thunk info table ---------------------------------------*/
+#if defined(GRAN) || defined(PAR)
+
+#define \
+INFO_TABLE_SELECTOR(info, entry, offset, info_class, \
+ entry_class, prof_descr, prof_type) \
+ entry_class(RBH_##entry); \
+ entry_class(entry); \
+ info_class INFO_TBL_CONST StgInfoTable info; \
+ info_class INFO_TBL_CONST StgInfoTable RBH_##info = { \
+ layout : { selector_offset : offset }, \
+ STD_INFO(RBH), \
+ INCLUDE_RBH_INFO(info), \
+ INIT_ENTRY(RBH_##entry), \
+ INIT_VECTOR \
+ }; \
+ StgFunPtr RBH_##entry (void) { JMP_(RBH_entry); } ; \
+ info_class INFO_TBL_CONST StgInfoTable info = { \
+ layout : { selector_offset : offset }, \
+ STD_INFO(THUNK_SELECTOR), \
+ INCLUDE_RBH_INFO(RBH_##info), \
+ INIT_ENTRY(entry), \
+ INIT_VECTOR \
+ }
+
+#else
+
#define \
INFO_TABLE_SELECTOR(info, entry, offset, info_class, \
entry_class, prof_descr, prof_type) \
INIT_VECTOR \
}
+#endif
+
/* constructor info table --------------------------------------------------*/
#define \
/* ----------------------------------------------------------------------------
- * $Id: InfoTables.h,v 1.17 1999/07/16 09:41:12 panne Exp $
+ * $Id: InfoTables.h,v 1.18 2000/01/13 14:34:00 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
Parallelism info
-------------------------------------------------------------------------- */
-#ifdef PAR
+#if 0 && (defined(PAR) || defined(GRAN))
-#define PAR_INFO_WORDS 0
+// CURRENTLY UNUSED
+// ToDo: use this in StgInfoTable (mutually recursive) -- HWL
+
+#define PAR_INFO_WORDS 1
typedef struct {
- /* empty */
+ StgInfoTable *rbh_infoptr; /* infoptr to the RBH */
} StgParInfo;
#else /* !PAR */
#endif /* PAR */
+/*
+ Copied from ghc-0.29; ToDo: check this code -- HWL
+
+ In the parallel system, all updatable closures have corresponding
+ revertible black holes. When we are assembly-mangling, we guarantee
+ that the revertible black hole code precedes the normal entry code, so
+ that the RBH info table resides at a fixed offset from the normal info
+ table. Otherwise, we add the RBH info table pointer to the end of the
+ normal info table and vice versa.
+
+ Currently has to use a !RBH_MAGIC_OFFSET setting.
+ Still todo: init of par.infoptr field in all infotables!!
+*/
+
+#if defined(PAR) || defined(GRAN)
+# define RBH_INFO_OFFSET (GEN_INFO_OFFSET+GEN_INFO_WORDS)
+
+# ifdef RBH_MAGIC_OFFSET
+
+# error magic offset not yet implemented
+
+# define RBH_INFO_WORDS 0
+# define INCLUDE_RBH_INFO(infoptr)
+
+# define RBH_INFOPTR(infoptr) (((P_)infoptr) - RBH_MAGIC_OFFSET)
+# define REVERT_INFOPTR(infoptr) (((P_)infoptr) + RBH_MAGIC_OFFSET)
+
+# else
+
+# define RBH_INFO_WORDS 1
+# define INCLUDE_RBH_INFO(info) rbh_infoptr : &(info)
+
+# define RBH_INFOPTR(infoptr) (((StgInfoTable *)(infoptr))->rbh_infoptr)
+# define REVERT_INFOPTR(infoptr) (((StgInfoTable *)(infoptr))->rbh_infoptr)
+
+# endif
+
+/* see ParallelRts.h */
+// EXTFUN(RBH_entry);
+//StgClosure *convertToRBH(StgClosure *closure);
+//#if defined(GRAN)
+//void convertFromRBH(StgClosure *closure);
+//#elif defined(PAR)
+//void convertToFetchMe(StgPtr closure, globalAddr *ga);
+//#endif
+
+#endif
+
/* -----------------------------------------------------------------------------
Debugging info
-------------------------------------------------------------------------- */
#define closureFlags(c) (closure_flags[get_itbl(c)->type])
-#define closure_STATIC(c) ( closureFlags(c) & _STA)
+#define closure_HNF(c) ( closureFlags(c) & _HNF)
+#define closure_BITMAP(c) ( closureFlags(c) & _BTM)
+#define closure_NON_SPARK(c) ( (closureFlags(c) & _NS))
#define closure_SHOULD_SPARK(c) (!(closureFlags(c) & _NS))
+#define closure_STATIC(c) ( closureFlags(c) & _STA)
+#define closure_THUNK(c) ( closureFlags(c) & _THU)
#define closure_MUTABLE(c) ( closureFlags(c) & _MUT)
#define closure_UNPOINTED(c) ( closureFlags(c) & _UPT)
+#define closure_SRT(c) ( closureFlags(c) & _SRT)
+
+/* same as above but for info-ptr rather than closure */
+#define ipFlags(ip) (closure_flags[ip->type])
+#define ip_HNF(ip) ( ipFlags(ip) & _HNF)
+#define ip_BITMAP(ip) ( ipFlags(ip) & _BTM)
+#define ip_SHOULD_SPARK(ip) (!(ipFlags(ip) & _NS))
+#define ip_STATIC(ip) ( ipFlags(ip) & _STA)
+#define ip_THUNK(ip) ( ipFlags(ip) & _THU)
+#define ip_MUTABLE(ip) ( ipFlags(ip) & _MUT)
+#define ip_UNPOINTED(ip) ( ipFlags(ip) & _UPT)
+#define ip_SRT(ip) ( ipFlags(ip) & _SRT)
/* -----------------------------------------------------------------------------
Info Tables
typedef struct _StgInfoTable {
StgSRT *srt; /* pointer to the SRT table */
-#ifdef PAR
- StgParInfo par;
+#if defined(PAR) || defined(GRAN)
+ // StgParInfo par;
+ struct _StgInfoTable *rbh_infoptr;
#endif
#ifdef PROFILING
/* StgProfInfo prof; */
--- /dev/null
+/*
+ Time-stamp: <Fri Dec 10 1999 17:15:01 Stardate: [-30]4028.38 software>
+
+ Definitions for parallel machines.
+
+ This section contains definitions applicable only to programs compiled
+ to run on a parallel machine, i.e. on GUM. Some of these definitions
+ are also used when simulating parallel execution, i.e. on GranSim.
+*/
+
+/*
+ ToDo: Check the PAR specfic part of this file
+ Move stuff into Closures.h and ClosureMacros.h
+ Clean-up GRAN specific code
+ -- HWL
+*/
+
+#ifndef PARALLEL_H
+#define PARALLEL_H
+
+#if defined(PAR) || defined(GRAN) /* whole file */
+
+//@node Parallel definitions, End of File
+//@section Parallel definitions
+
+//@menu
+//* Basic definitions::
+//* GUM::
+//* GranSim::
+//@end menu
+
+//@node Basic definitions, GUM, Parallel definitions, Parallel definitions
+//@subsection Basic definitions
+
+/* SET_PAR_HDR and SET_STATIC_PAR_HDR now live in ClosureMacros.h */
+
+/* Needed for dumping routines */
+#if defined(PAR)
+# define NODE_STR_LEN 20
+# define TIME_STR_LEN 120
+# define TIME rtsTime
+# define CURRENT_TIME msTime()
+# define TIME_ON_PROC(p) msTime()
+# define CURRENT_PROC thisPE
+# define BINARY_STATS RtsFlags.ParFlags.ParStats.Binary
+#elif defined(GRAN)
+# define NODE_STR_LEN 20
+# define TIME_STR_LEN 120
+# define TIME rtsTime
+# define CURRENT_TIME CurrentTime[CurrentProc]
+# define TIME_ON_PROC(p) CurrentTime[p]
+# define CURRENT_PROC CurrentProc
+# define BINARY_STATS RtsFlags.GranFlags.GranSimStats.Binary
+#endif
+
+#if defined(PAR)
+# define MAX_PES 256 /* Maximum number of processors */
+ /* MAX_PES is enforced by SysMan, which does not
+ allow more than this many "processors".
+ This is important because PackGA [GlobAddr.lc]
+ **assumes** that a PE# can fit in 8+ bits.
+ */
+
+# define SPARK_POOLS 2 /* no. of spark pools */
+# define REQUIRED_POOL 0 /* idx of pool of mandatory sparks (concurrency) */
+# define ADVISORY_POOL 1 /* idx of pool of advisory sparks (parallelism) */
+#endif
+
+//@menu
+//* GUM::
+//* GranSim::
+//@end menu
+//*/
+
+//@node GUM, GranSim, Basic definitions, Parallel definitions
+//@subsection GUM
+
+#if defined(PAR)
+/*
+Symbolic constants for the packing code.
+
+This constant defines how many words of data we can pack into a single
+packet in the parallel (GUM) system.
+*/
+
+//@menu
+//* Types::
+//* Externs::
+//* Prototypes::
+//* Macros::
+//@end menu
+//*/
+
+//@node Types, Externs, GUM, GUM
+//@subsubsection Types
+
+/* Sparks and spark queues */
+typedef StgClosure *rtsSpark;
+typedef rtsSpark *rtsSparkQ;
+
+typedef struct rtsPackBuffer_ {
+ StgInt /* nat */ id;
+ StgInt /* nat */ size;
+ StgInt /* nat */ unpacked_size;
+ StgTSO *tso;
+ StgWord *buffer[0];
+} rtsPackBuffer;
+
+#define PACK_BUFFER_HDR_SIZE 4
+
+//@node Externs, Prototypes, Types, GUM
+//@subsubsection Externs
+
+// extern rtsBool do_sp_profile;
+
+extern globalAddr theGlobalFromGA, theGlobalToGA;
+extern StgBlockedFetch *PendingFetches;
+extern GlobalTaskId *allPEs;
+
+extern rtsBool IAmMainThread, GlobalStopPending;
+//extern rtsBool fishing;
+extern rtsTime last_fish_arrived_at;
+extern nat outstandingFishes;
+extern GlobalTaskId SysManTask;
+extern int seed; /* pseudo-random-number generator seed: */
+ /* Initialised in ParInit */
+extern StgInt threadId; /* Number of Threads that have existed on a PE */
+extern GlobalTaskId mytid;
+
+extern GlobalTaskId *allPEs;
+extern nat nPEs;
+extern nat sparksIgnored, sparksCreated, threadsIgnored, threadsCreated;
+extern nat advisory_thread_count;
+
+extern rtsBool InGlobalGC; /* Are we in the midst of performing global GC */
+
+static ullong startTime; /* start of comp; in RtsStartup.c */
+
+/* the spark pools proper */
+extern rtsSpark *pending_sparks_hd[]; /* ptr to start of a spark pool */
+extern rtsSpark *pending_sparks_tl[]; /* ptr to end of a spark pool */
+extern rtsSpark *pending_sparks_lim[];
+extern rtsSpark *pending_sparks_base[];
+extern nat spark_limit[];
+
+extern rtsPackBuffer *PackBuffer; /* size: can be set via option */
+extern rtsPackBuffer *buffer; /* HWL_ */
+extern rtsPackBuffer *freeBuffer; /* HWL_ */
+extern rtsPackBuffer *packBuffer; /* HWL_ */
+extern rtsPackBuffer *gumPackBuffer;
+
+extern int thisPE;
+
+/* From Global.c */
+extern GALA *freeGALAList;
+extern GALA *freeIndirections;
+extern GALA *liveIndirections;
+extern GALA *liveRemoteGAs;
+
+/*
+extern HashTable *taskIDtoPEtable;
+extern HashTable *LAtoGALAtable;
+extern HashTable *pGAtoGALAtable;
+*/
+
+//@node Prototypes, Macros, Externs, GUM
+//@subsubsection Prototypes
+
+/* From ParInit.c */
+void initParallelSystem(void);
+void SynchroniseSystem(void);
+void par_exit(StgInt n);
+
+PEs taskIDtoPE (GlobalTaskId gtid);
+void registerTask (GlobalTaskId gtid);
+globalAddr *LAGAlookup (StgClosure *addr);
+StgClosure *GALAlookup (globalAddr *ga);
+//static GALA *allocIndirection (StgPtr addr);
+globalAddr *makeGlobal (StgClosure *addr, rtsBool preferred);
+globalAddr *setRemoteGA (StgClosure *addr, globalAddr *ga, rtsBool preferred);
+void splitWeight (globalAddr *to, globalAddr *from);
+globalAddr *addWeight (globalAddr *ga);
+void initGAtables (void);
+void RebuildLAGAtable (void);
+StgWord PackGA (StgWord pe, int slot);
+
+//@node Macros, , Prototypes, GUM
+//@subsubsection Macros
+
+/* delay (in us) between dying fish returning and sending out a new fish */
+#define FISH_DELAY 1000
+/* max no. of outstanding spark steals */
+#define MAX_FISHES 1
+
+// ToDo: check which of these is actually needed!
+
+# define PACK_HEAP_REQUIRED ((RtsFlags.ParFlags.packBufferSize - PACK_HDR_SIZE) / (PACK_GA_SIZE + _FHS) * (MIN_UPD_SIZE + 2))
+
+# define MAX_GAS (RtsFlags.ParFlags.packBufferSize / PACK_GA_SIZE)
+
+
+# define PACK_GA_SIZE 3 /* Size of a packed GA in words */
+ /* Size of a packed fetch-me in words */
+# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS)
+
+# define PACK_HDR_SIZE 1 /* Words of header in a packet */
+
+# define PACK_PLC_SIZE 2 /* Size of a packed PLC in words */
+
+/*
+ Definitions relating to the entire parallel-only fixed-header field.
+
+ On GUM, the global addresses for each local closure are stored in a
+ separate hash table, rather then with the closure in the heap. We call
+ @getGA@ to look up the global address associated with a local closure (0
+ is returned for local closures that have no global address), and @setGA@
+ to store a new global address for a local closure which did not
+ previously have one. */
+
+# define GA_HDR_SIZE 0
+
+# define GA(closure) getGA(closure)
+
+# define SET_GA(closure, ga) setGA(closure,ga)
+# define SET_STATIC_GA(closure)
+# define SET_GRAN_HDR(closure,pe)
+# define SET_STATIC_PROCS(closure)
+
+# define MAX_GA_WEIGHT 0 /* Treat as 2^n */
+
+/* At the moment, there is no activity profiling for GUM. This may change. */
+# define SET_TASK_ACTIVITY(act) /* nothing */
+
+#endif /* PAR */
+
+//@node GranSim, , GUM, Parallel definitions
+//@subsection GranSim
+
+#if defined(GRAN)
+/* ToDo: Check which of the PAR routines are needed in GranSim -- HWL */
+
+//@menu
+//* Types::
+//* Prototypes::
+//* Macros::
+//@end menu
+//*/
+
+//@node Types, Prototypes, GranSim, GranSim
+//@subsubsection Types
+
+typedef struct rtsPackBuffer_ {
+ StgInt /* nat */ id;
+ StgInt /* nat */ size;
+ StgInt /* nat */ unpacked_size;
+ StgTSO *tso;
+ StgClosure **buffer;
+} rtsPackBuffer;
+
+//@node Prototypes, Macros, Types, GranSim
+//@subsubsection Prototypes
+
+
+/* main packing functions */
+/*
+rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso, nat *packbuffersize);
+rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso, nat *packbuffersize);
+void PrintPacket(rtsPackBuffer *buffer);
+StgClosure *UnpackGraph(rtsPackBuffer* buffer);
+*/
+/* important auxiliary functions */
+
+/*
+OLD CODE -- HWL
+void InitPackBuffer(void);
+P_ AllocateHeap (W_ size);
+P_ PackNearbyGraph (P_ closure, P_ tso, W_ *packbuffersize);
+P_ PackOneNode (P_ closure, P_ tso, W_ *packbuffersize);
+P_ UnpackGraph (P_ buffer);
+
+void InitClosureQueue (void);
+P_ DeQueueClosure(void);
+void QueueClosure (P_ closure);
+// rtsBool QueueEmpty();
+void PrintPacket (P_ buffer);
+*/
+
+// StgInfoTable *get_closure_info(StgClosure* node, unsigned int /* nat */ *size, unsigned int /* nat */ *ptrs, unsigned int /* nat */ *nonptrs, unsigned int /* nat */ *vhs, char *info_hdr_ty);
+// int /* rtsBool */ IS_BLACK_HOLE(StgClosure* node) ;
+
+//@node Macros, , Prototypes, GranSim
+//@subsubsection Macros
+
+/* max no. of outstanding spark steals */
+#define MAX_FISHES 1
+
+/* These are needed in the packing code to get the size of the packet
+ right. The closures itself are never built in GrAnSim. */
+# define FETCHME_VHS IND_VHS
+# define FETCHME_HS IND_HS
+
+# define FETCHME_GA_LOCN FETCHME_HS
+
+# define FETCHME_CLOSURE_SIZE(closure) IND_CLOSURE_SIZE(closure)
+# define FETCHME_CLOSURE_NoPTRS(closure) 0L
+# define FETCHME_CLOSURE_NoNONPTRS(closure) (IND_CLOSURE_SIZE(closure)-IND_VHS)
+
+# define MAX_GAS (RtsFlags.GranFlags.packBufferSize / PACK_GA_SIZE)
+# define PACK_GA_SIZE 3 /* Size of a packed GA in words */
+ /* Size of a packed fetch-me in words */
+# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS)
+# define PACK_HDR_SIZE 4 /* Words of header in a packet */
+
+# define PACK_HEAP_REQUIRED \
+ (RtsFlags.GranFlags.packBufferSize * sizeofW(StgClosure*) + \
+ 2 * sizeofW(StgInt) + sizeofW(StgTSO*))
+
+# define PACK_FLAG_LOCN 0
+# define PACK_TSO_LOCN 1
+# define PACK_UNPACKED_SIZE_LOCN 2
+# define PACK_SIZE_LOCN 3
+# define MAGIC_PACK_FLAG 0xfabc
+
+# define GA_HDR_SIZE 1
+
+# define PROCS_HDR_POSN PAR_HDR_POSN
+# define PROCS_HDR_SIZE 1
+
+/* Accessing components of the field */
+# define PROCS(closure) ((closure)->header.gran.procs)
+/* SET_PROCS is now SET_GRAN_HEADER in ClosureMacros.h. */
+
+#endif /* GRAN */
+
+//@node End of File, , Parallel definitions
+//@section End of File
+
+#endif /* defined(PAR) || defined(GRAN) whole file */
+
+#endif /* Parallel_H */
+
+
/* -----------------------------------------------------------------------------
- * $Id: PrimOps.h,v 1.43 2000/01/12 15:15:17 simonmar Exp $
+ * $Id: PrimOps.h,v 1.44 2000/01/13 14:34:00 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
extern int cmp_thread(const StgTSO *tso1, const StgTSO *tso2);
+/* ------------------------------------------------------------------------
+ Parallel PrimOps
+
+ A par in the Haskell code is ultimately translated to a parzh macro
+ (with a case wrapped around it to guarantee that the macro is actually
+ executed; see compiler/prelude/PrimOps.lhs)
+ ---------------------------------------------------------------------- */
+
+#if defined(GRAN)
+// hash coding changed from 2.10 to 4.00
+#define parzh(r,node) parZh(r,node)
+
+#define parZh(r,node) \
+ PARZh(r,node,1,0,0,0,0,0)
+
+#define parAtzh(r,node,where,identifier,gran_info,size_info,par_info,rest) \
+ parATZh(r,node,where,identifier,gran_info,size_info,par_info,rest,1)
+
+#define parAtAbszh(r,node,proc,identifier,gran_info,size_info,par_info,rest) \
+ parATZh(r,node,proc,identifier,gran_info,size_info,par_info,rest,2)
+
+#define parAtRelzh(r,node,proc,identifier,gran_info,size_info,par_info,rest) \
+ parATZh(r,node,proc,identifier,gran_info,size_info,par_info,rest,3)
+
+#define parAtForNowzh(r,node,where,identifier,gran_info,size_info,par_info,rest) \
+ parATZh(r,node,where,identifier,gran_info,size_info,par_info,rest,0)
+
+#define parATZh(r,node,where,identifier,gran_info,size_info,par_info,rest,local) \
+{ \
+ rtsSparkQ result; \
+ if (closure_SHOULD_SPARK((StgClosure*)node)) { \
+ rtsSparkQ result; \
+ STGCALL6(newSpark, node,identifier,gran_info,size_info,par_info,local); \
+ if (local==2) { /* special case for parAtAbs */ \
+ STGCALL3(GranSimSparkAtAbs, result,(I_)where,identifier);\
+ } else if (local==3) { /* special case for parAtRel */ \
+ STGCALL3(GranSimSparkAtAbs, result,(I_)(CurrentProc+where),identifier); \
+ } else { \
+ STGCALL3(GranSimSparkAt, result,where,identifier); \
+ } \
+ } \
+}
+
+#define parLocalzh(r,node,identifier,gran_info,size_info,par_info,rest) \
+ PARZh(r,node,rest,identifier,gran_info,size_info,par_info,1)
+
+#define parGlobalzh(r,node,identifier,gran_info,size_info,par_info,rest) \
+ PARZh(r,node,rest,identifier,gran_info,size_info,par_info,0)
+
+#define PARZh(r,node,rest,identifier,gran_info,size_info,par_info,local) \
+{ \
+ if (closure_SHOULD_SPARK((StgClosure*)node)) { \
+ rtsSpark *result; \
+ result = RET_STGCALL6(rtsSpark*, newSpark, \
+ node,identifier,gran_info,size_info,par_info,local);\
+ STGCALL1(add_to_spark_queue,result); \
+ STGCALL2(GranSimSpark, local,(P_)node); \
+ } \
+}
+
+#define copyablezh(r,node) \
+ /* copyable not yet implemented!! */
+
+#define noFollowzh(r,node) \
+ /* noFollow not yet implemented!! */
+
+#endif /* GRAN */
+
+#if 0
+
+# if defined(GRAN)
+/* ToDo: Use a parallel ticky macro for this */
+# define COUNT_SPARK(node) { (CurrentTSO->gran.globalsparks)++; sparksCreated++; }
+# elif defined(PAR)
+# define COUNT_SPARK(node) { (CurrentTSO->par.globalsparks)++; sparksCreated++; }
+# endif
+
+/*
+ Note that we must bump the required thread count NOW, rather
+ than when the thread is actually created.
+
+ forkzh not needed any more; see ghc/rts/PrimOps.hc
+*/
+#define forkzh(r,liveness,node) \
+{ \
+ extern nat context_switch; \
+ while (pending_sparks_tl[REQUIRED_POOL] == pending_sparks_lim[REQUIRED_POOL]) \
+ DO_YIELD((liveness << 1) | 1); \
+ if (closure_SHOULD_SPARK((StgClosure *)node)) { \
+ *pending_sparks_tl[REQUIRED_POOL]++ = (P_)(node); \
+ } else { \
+ sparksIgnored++; \
+ } \
+ context_switch = 1; \
+}
+
+// old version of par (previously used in GUM
+
+#define parzh(r,node) \
+{ \
+ extern nat context_switch; \
+ COUNT_SPARK(node); \
+ if (closure_SHOULD_SPARK((StgClosure *)node) && \
+ pending_sparks_tl[ADVISORY_POOL] < pending_sparks_lim[ADVISORY_POOL]) {\
+ *pending_sparks_tl[ADVISORY_POOL]++ = (StgClosure *)(node); \
+ } else { \
+ sparksIgnored++; \
+ } \
+ r = context_switch = 1; \
+}
+#endif /* 0 */
+
#if defined(SMP) || defined(PAR)
#define parzh(r,node) \
{ \
/* -----------------------------------------------------------------------------
- * $Id: Rts.h,v 1.11 2000/01/13 12:40:15 simonmar Exp $
+ * $Id: Rts.h,v 1.12 2000/01/13 14:34:01 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
#define MAX_RTS_ARGS 32
/* -----------------------------------------------------------------------------
- Useful typedefs
+ Assertions and Debuggery
-------------------------------------------------------------------------- */
-typedef unsigned int nat; /* at least 32 bits (like int) */
-typedef unsigned long lnat; /* at least 32 bits */
-typedef unsigned long long ullong; /* at least 32 bits */
-
-typedef enum {
- rtsFalse = 0,
- rtsTrue
-} rtsBool;
+#define IF_RTSFLAGS(c,s) if (RtsFlags.c) { s; }
/* -----------------------------------------------------------------------------
Assertions and Debuggery
--- /dev/null
+/*
+ Time-stamp: <Mon Nov 22 1999 21:29:44 Stardate: [-30]3939.47 hwloidl>
+
+ RTS specific types.
+*/
+
+/* -------------------------------------------------------------------------
+ Generally useful typedefs
+ ------------------------------------------------------------------------- */
+
+#ifndef RTS_TYPES_H
+#define RTS_TYPES_H
+
+typedef unsigned int nat; /* at least 32 bits (like int) */
+typedef unsigned long lnat; /* at least 32 bits */
+typedef unsigned long long ullong; /* at least 32 bits */
+
+/* ullong (64|128-bit) type: only include if needed (not ANSI) */
+#if defined(__GNUC__)
+#define LL(x) (x##LL)
+#else
+#define LL(x) (x##L)
+#endif
+
+typedef enum {
+ rtsFalse = 0,
+ rtsTrue
+} rtsBool;
+
+/*
+ Types specific to the parallel runtime system.
+*/
+
+#if defined(PAR)
+/* types only needed in the parallel system */
+typedef struct hashtable ParHashTable;
+typedef struct hashlist ParHashList;
+
+// typedef double REAL_TIME;
+// typedef W_ TIME;
+// typedef GlobalTaskId Proc;
+typedef int GlobalTaskId;
+typedef ullong rtsTime;
+typedef GlobalTaskId PEs;
+typedef unsigned int rtsWeight;
+typedef int rtsPacket;
+typedef int OpCode;
+
+/* Global addresses i.e. unique ids in a parallel setup; needed in Closures.h*/
+typedef struct {
+ union {
+ StgPtr plc;
+ struct {
+ GlobalTaskId gtid;
+ int slot;
+ } gc;
+ } payload;
+ rtsWeight weight;
+} globalAddr;
+
+/* (GA, LA) pairs */
+typedef struct gala {
+ globalAddr ga;
+ StgPtr la;
+ struct gala *next;
+ rtsBool preferred;
+} GALA;
+
+#elif defined(GRAN)
+
+typedef lnat rtsTime;
+typedef StgWord PEs;
+
+#endif
+
+#endif /* RTS_TYPES_H */
/* -----------------------------------------------------------------------------
- * $Id: SchedAPI.h,v 1.8 1999/11/18 12:10:17 sewardj Exp $
+ * $Id: SchedAPI.h,v 1.9 2000/01/13 14:34:01 hwloidl Exp $
*
* (c) The GHC Team 1998
*
#ifndef SCHEDAPI_H
#define SCHEDAPI_H
+#if defined(GRAN)
+// Dummy def for NO_PRI if not in GranSim
+#define NO_PRI 0
+#endif
+
/*
* schedule() plus the thread creation functions are not part
* part of the external RTS API, so leave them out if we're
/*
* Creating threads
*/
-
+#if defined(GRAN)
+StgTSO *createThread(nat stack_size, StgInt pri);
+#else
StgTSO *createThread(nat stack_size);
+#endif
void scheduleThread(StgTSO *tso);
static inline void pushClosure (StgTSO *tso, StgClosure *c) {
static inline StgTSO *
createGenThread(nat stack_size, StgClosure *closure) {
StgTSO *t;
+#if defined(GRAN)
+ t = createThread(stack_size, NO_PRI);
+#else
t = createThread(stack_size);
+#endif
pushClosure(t,closure);
return t;
}
static inline StgTSO *
createIOThread(nat stack_size, StgClosure *closure) {
StgTSO *t;
+#if defined(GRAN)
+ t = createThread(stack_size, NO_PRI);
+#else
t = createThread(stack_size);
+#endif
pushRealWorld(t);
pushClosure(t,closure);
return t;
static inline StgTSO *
createStrictIOThread(nat stack_size, StgClosure *closure) {
StgTSO *t;
+#if defined(GRAN)
+ t = createThread(stack_size, NO_PRI);
+#else
t = createThread(stack_size);
+#endif
pushClosure(t,closure);
pushClosure(t,(StgClosure*)&forceIO_closure);
return t;
/* -----------------------------------------------------------------------------
- * $Id: Stg.h,v 1.21 1999/11/09 15:57:40 simonmar Exp $
+ * $Id: Stg.h,v 1.22 2000/01/13 14:34:01 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
/* Global type definitions*/
#include "StgTypes.h"
+#include "RtsTypes.h"
/* Global constaints */
#include "Constants.h"
#include "InfoTables.h"
#include "TSO.h"
+/* Simulated-parallel information */
+#include "GranSim.h"
+
+/* Parallel information */
+#include "Parallel.h"
+
/* STG/Optimised-C related stuff */
#include "SMP.h"
#include "MachRegs.h"
/* -----------------------------------------------------------------------------
- * $Id: StgMacros.h,v 1.19 1999/11/22 16:44:30 sewardj Exp $
+ * $Id: StgMacros.h,v 1.20 2000/01/13 14:34:01 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
}
#define HP_CHK(headroom,ret,r,layout,tag_assts) \
+ DO_GRAN_ALLOCATE(headroom) \
if ((Hp += headroom) > HpLim) { \
EXTFUN_RTS(stg_chk_##layout); \
tag_assts \
}
#define HP_STK_CHK(stk_headroom,hp_headroom,ret,r,layout,tag_assts) \
+ DO_GRAN_ALLOCATE(hp_headroom) \
if (Sp - stk_headroom < SpLim || (Hp += hp_headroom) > HpLim) { \
EXTFUN_RTS(stg_chk_##layout); \
tag_assts \
functions. In all these cases, node points to a closure that we
can just enter to restart the heap check (the NP stands for 'node points').
+ In the NP case GranSim absolutely has to check whether the current node
+ resides on the current processor. Otherwise a FETCH event has to be
+ scheduled. All that is done in GranSimFetch. -- HWL
+
HpLim points to the LAST WORD of valid allocation space.
-------------------------------------------------------------------------- */
}
#define HP_CHK_NP(headroom,ptrs,tag_assts) \
+ DO_GRAN_ALLOCATE(headroom) \
if ((Hp += (headroom)) > HpLim) { \
EXTFUN_RTS(stg_gc_enter_##ptrs); \
tag_assts \
}
#define HP_CHK_SEQ_NP(headroom,ptrs,tag_assts) \
+ DO_GRAN_ALLOCATE(headroom) \
if ((Hp += (headroom)) > HpLim) { \
EXTFUN_RTS(stg_gc_seq_##ptrs); \
tag_assts \
}
#define HP_STK_CHK_NP(stk_headroom, hp_headroom, ptrs, tag_assts) \
+ DO_GRAN_ALLOCATE(hp_headroom) \
if ((Sp - (stk_headroom)) < SpLim || (Hp += (hp_headroom)) > HpLim) { \
EXTFUN_RTS(stg_gc_enter_##ptrs); \
tag_assts \
/* Heap checks for branches of a primitive case / unboxed tuple return */
#define GEN_HP_CHK_ALT(headroom,lbl,tag_assts) \
+ DO_GRAN_ALLOCATE(headroom) \
if ((Hp += (headroom)) > HpLim) { \
EXTFUN_RTS(lbl); \
tag_assts \
JMP_(stg_block_##ptrs); \
}
+#if defined(PAR)
+/*
+ Similar to BLOCK_NP but separates the saving of the thread state from the
+ actual jump via an StgReturn
+*/
+
+#define SAVE_THREAD_STATE(ptrs) \
+ ASSERT(ptrs==1); \
+ Sp -= 1; \
+ Sp[0] = R1.w; \
+ SaveThreadState();
+
+#define THREAD_RETURN(ptrs) \
+ ASSERT(ptrs==1); \
+ CurrentTSO->whatNext = ThreadEnterGHC; \
+ R1.i = ThreadBlocked; \
+ JMP_(StgReturn);
+#endif
+
/* -----------------------------------------------------------------------------
CCall_GC needs to push a dummy stack frame containing the contents
of volatile registers and variables.
/* -----------------------------------------------------------------------------
- * $Id: StgMiscClosures.h,v 1.15 1999/11/02 15:05:53 simonmar Exp $
+ * $Id: StgMiscClosures.h,v 1.16 2000/01/13 14:34:01 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
STGFUN(SE_BLACKHOLE_entry);
STGFUN(SE_CAF_BLACKHOLE_entry);
#endif
+#if defined(PAR) || defined(GRAN)
+STGFUN(RBH_entry);
+#endif
STGFUN(BCO_entry);
STGFUN(EVACUATED_entry);
STGFUN(FOREIGN_entry);
STGFUN(END_MUT_LIST_entry);
STGFUN(dummy_ret_entry);
+/* this is the NIL ptr for a TSO queue (e.g. runnable queue) */
+#define END_TSO_QUEUE ((StgTSO *)(void*)&END_TSO_QUEUE_closure)
+#if defined(PAR) || defined(GRAN)
+/* this is the NIL ptr for a blocking queue */
+# define END_BQ_QUEUE ((StgBlockingQueueElement *)(void*)&END_TSO_QUEUE_closure)
+/* this is the NIL ptr for a blocked fetch queue (as in PendingFetches in GUM) */
+# define END_BF_QUEUE ((StgBlockedFetch *)(void*)&END_TSO_QUEUE_closure)
+#endif
+
/* info tables */
extern DLL_IMPORT_RTS const StgInfoTable IND_info;
extern DLL_IMPORT_RTS const StgInfoTable SE_BLACKHOLE_info;
extern DLL_IMPORT_RTS const StgInfoTable SE_CAF_BLACKHOLE_info;
#endif
+#if defined(PAR) || defined(GRAN)
+extern DLL_IMPORT_RTS const StgInfoTable RBH_info;
+#endif
extern DLL_IMPORT_RTS const StgInfoTable BCO_info;
extern DLL_IMPORT_RTS const StgInfoTable EVACUATED_info;
extern DLL_IMPORT_RTS const StgInfoTable FOREIGN_info;
/* -----------------------------------------------------------------------------
- * $Id: TSO.h,v 1.9 1999/12/01 14:34:49 simonmar Exp $
+ * $Id: TSO.h,v 1.10 2000/01/13 14:34:01 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
#ifndef TSO_H
#define TSO_H
+#if defined(GRAN) || defined(PAR)
+// magic marker for TSOs; debugging only
+#define TSO_MAGIC 4321
+
+typedef struct {
+ StgInt pri;
+ StgInt magic;
+ StgInt sparkname;
+ rtsTime startedat;
+ rtsBool exported;
+ StgInt basicblocks;
+ StgInt allocs;
+ rtsTime exectime;
+ rtsTime fetchtime;
+ rtsTime fetchcount;
+ rtsTime blocktime;
+ StgInt blockcount;
+ rtsTime blockedat;
+ StgInt globalsparks;
+ StgInt localsparks;
+ rtsTime clock;
+} StgTSOStatBuf;
+#endif
+
#if defined(PROFILING)
typedef struct {
CostCentreStack *CCCS; /* thread's current CCS */
#endif /* PROFILING */
#if defined(PAR)
-typedef struct {
-} StgTSOParInfo;
+typedef StgTSOStatBuf StgTSOParInfo;
#else /* !PAR */
typedef struct {
} StgTSOParInfo;
#endif /* PAR */
-#if defined(TICKY_TICKY)
+#if defined(GRAN)
+typedef StgTSOStatBuf StgTSOGranInfo;
+#else /* !GRAN */
+typedef struct {
+} StgTSOGranInfo;
+#endif /* GRAN */
+
+
+#if defined(TICKY)
typedef struct {
} StgTSOTickyInfo;
#else /* !TICKY_TICKY */
BlockedOnRead,
BlockedOnWrite,
BlockedOnDelay
+#if defined(PAR)
+ , BlockedOnGA // blocked on a remote closure represented by a Global Address
+#endif
} StgTSOBlockReason;
typedef union {
struct StgTSO_ *tso;
int fd;
unsigned int delay;
+#if defined(PAR)
+ globalAddr ga;
+#endif
} StgTSOBlockInfo;
/*
typedef struct StgTSO_ {
StgHeader header;
struct StgTSO_* link;
+ /* SDM and HWL agree that it would be cool to have a list of all TSOs */
StgMutClosure * mut_link; /* TSO's are mutable of course! */
StgTSOWhatNext whatNext;
StgTSOBlockReason why_blocked;
StgTSOTickyInfo ticky;
StgTSOProfInfo prof;
StgTSOParInfo par;
- /* GranSim Info? */
+ StgTSOGranInfo gran;
/* The thread stack... */
StgWord stack_size; /* stack size in *words* */
/* -----------------------------------------------------------------------------
- * $Id: Updates.h,v 1.15 1999/11/09 15:47:09 simonmar Exp $
+ * $Id: Updates.h,v 1.16 2000/01/13 14:34:01 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
Awaken any threads waiting on this computation
-------------------------------------------------------------------------- */
+#if defined(PAR)
+
+/*
+ In a parallel setup several types of closures, might have a blocking queue:
+ BLACKHOLE_BQ ... same as in the default concurrent setup; it will be
+ reawakened via calling UPD_IND on that closure after
+ having finished the computation of the graph
+ FETCH_ME_BQ ... a global indirection (FETCH_ME) may be entered by a
+ local TSO, turning it into a FETCH_ME_BQ; it will be
+ reawakened via calling processResume
+ RBH ... a revertible black hole may be entered by another
+ local TSO, putting it onto its blocking queue; since
+ RBHs only exist while the corresponding closure is in
+ transit, they will be reawakened via calling
+ convertToFetchMe (upon processing an ACK message)
+
+ In a parallel setup a blocking queue may contain 3 types of closures:
+ TSO ... as in the default concurrent setup
+ BLOCKED_FETCH ... indicating that a TSO on another PE is waiting for
+ the result of the current computation
+ CONSTR ... a RBHSave closure (which contains data ripped out of
+ the closure to make room for a blocking queue; since
+ it only contains data we use the exisiting type of
+ a CONSTR closure); this closure is the end of a
+ blocking queue for an RBH closure; it only exists in
+ this kind of blocking queue and must be at the end
+ of the queue
+*/
+extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
+#define DO_AWAKEN_BQ(bqe, node) STGCALL2(awakenBlockedQueue, bqe, node);
+
+#define AWAKEN_BQ(info,closure) \
+ if (info == &BLACKHOLE_BQ_info || \
+ info == &FETCH_ME_BQ_info || \
+ get_itbl(closure)->type == RBH) { \
+ StgBlockingQueueElement *bqe = ((StgBlockingQueue *)closure)->blocking_queue;\
+ ASSERT(bqe!=END_BQ_QUEUE); \
+ DO_AWAKEN_BQ(bqe, closure); \
+ }
+
+#elif defined(GRAN)
+
+extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
+#define DO_AWAKEN_BQ(bq, node) STGCALL2(awakenBlockedQueue, bq, node);
+
+/* In GranSim we don't have FETCH_ME or FETCH_ME_BQ closures, so they are
+ not checked. The rest of the code is the same as for GUM.
+*/
+#define AWAKEN_BQ(info,closure) \
+ if (info == &BLACKHOLE_BQ_info || \
+ get_itbl(closure)->type == RBH) { \
+ StgBlockingQueueElement *bqe = ((StgBlockingQueue *)closure)->blocking_queue;\
+ ASSERT(bqe!=END_BQ_QUEUE); \
+ DO_AWAKEN_BQ(bqe, closure); \
+ }
+
+
+#else /* !GRAN && !PAR */
+
extern void awakenBlockedQueue(StgTSO *q);
+#define DO_AWAKEN_BQ(closure) \
+ STGCALL1(awakenBlockedQueue, \
+ ((StgBlockingQueue *)closure)->blocking_queue);
#define AWAKEN_BQ(info,closure) \
if (info == &BLACKHOLE_BQ_info) { \
- STGCALL1(awakenBlockedQueue, \
- ((StgBlockingQueue *)closure)->blocking_queue); \
+ DO_AWAKEN_BQ(closure); \
}
+#endif /* GRAN || PAR */
-/* -----------------------------------------------------------------------------
+/* -------------------------------------------------------------------------
Push an update frame on the stack.
- -------------------------------------------------------------------------- */
+ ------------------------------------------------------------------------- */
#if defined(PROFILING)
#define PUSH_STD_CCCS(frame) frame->header.prof.ccs = CCCS
SRC_HC_OPTS += -hisuf $(way_)hi
endif
+# HWL: for debugging GranSim generate .hc and .s files
+SRC_HC_OPTS += -keep-hc-files-too -keep-s-files-too
+# # HWL: why isn't that on by default !!????????????
+# ifeq "$(way)" "mg"
+# SRC_HC_OPTS += -gransim
+# endif
+
# per-module flags
PrelArrExtra_HC_OPTS += -monly-2-regs
PrelNumExtra_HC_OPTS += -H24m -K2m
PrelPack_HC_OPTS += -K4m
-PrelBase_HC_OPTS += -H12m
+PrelBase_HC_OPTS += -H32m -K32m
PrelRead_HC_OPTS += -H20m
PrelTup_HC_OPTS += -H12m -K2m
PrelNum_HC_OPTS += -H12m -K4m
import PrelAddr ( Addr, nullAddr )
import PrelReal ( toInteger )
import PrelPack ( packString )
+#ifndef __PARALLEL_HASKELL__
import PrelWeak ( addForeignFinalizer )
+#endif
import Ix
#ifdef __CONCURRENT_HASKELL__
\begin{code}
{-# OPTIONS -fno-implicit-prelude #-}
+#ifndef __PARALLEL_HASKELL__
+
module PrelWeak where
import PrelGHC
(Weak w1) == (Weak w2) = w1 `sameWeak#` w2
-}
+#endif
+
\end{code}
# -----------------------------------------------------------------------------
-# $Id: paths.mk,v 1.17 2000/01/10 11:59:55 simonmar Exp $
+# $Id: paths.mk,v 1.18 2000/01/13 14:34:02 hwloidl Exp $
#
# ghc project specific make variables
#
GHC_UTILS_DIR := $(TOP)/utils
GHC_INTERPRETER_DIR := $(TOP)/interpreter
-GHC_SYSMAN_DIR := $(GHC_RUNTIME_DIR)/gum
GHC_HSP_DIR := $(GHC_HSC_DIR)
GHC_MKDEPENDHS_DIR := $(GHC_UTILS_DIR)/mkdependHS
GHC_HSCPP_DIR := $(GHC_UTILS_DIR)/hscpp
GHC_MKDEPENDHS = $(GHC_MKDEPENDHS_DIR)/mkdependHS-inplace
GHC_HSP = $(GHC_HSP_DIR)/hsp
GHC_HSC = $(GHC_HSC_DIR)/hsc
-GHC_SYSMAN = $(GHC_RUNTIME_DIR)/gum/SysMan
-
UNLIT = $(GHC_UNLIT_DIR)/unlit
GHC_UNLIT = $(GHC_UNLIT_DIR)/unlit
+GHC_SYSMAN = $(GHC_RUNTIME_DIR)/parallel/SysMan
+GHC_SYSMAN_DIR = $(GHC_RUNTIME_DIR)/parallel
+
#-----------------------------------------------------------------------------
# Stuff for the C-compiling phase in particular...
/* -----------------------------------------------------------------------------
- * $Id: ClosureFlags.c,v 1.5 2000/01/12 12:39:20 simonmar Exp $
+ * $Id: ClosureFlags.c,v 1.6 2000/01/13 14:34:02 hwloidl Exp $
*
* (c) The GHC Team 1998-1999
*
* to thunks.)
*/
+/* 0 1 2 3 4 5 6 7 */
/* HNF BTM NS STA THU MUT UPT SRT */
/* INVALID_OBJECT */ ( 0 ),
/* WEAK */ (_HNF| _NS| _UPT ),
/* FOREIGN */ (_HNF| _NS| _UPT ),
/* STABLE_NAME */ (_HNF| _NS| _UPT ),
+
/* TSO */ (_HNF| _NS| _MUT|_UPT ),
-/* BLOCKED_FETCH */ (_HNF| _NS ),
-/* FETCH_ME */ (_HNF| _NS ),
-/* EVACUATED */ ( 0 )
+/* BLOCKED_FETCH */ (_HNF| _NS| _MUT|_UPT ),
+/* FETCH_ME */ (_HNF| _NS| _MUT|_UPT ),
+/* FETCH_ME_BQ */ ( _NS| _MUT|_UPT ),
+/* RBH */ ( _NS| _MUT|_UPT ),
+
+/* EVACUATED */ ( 0 ),
+
+/* N_CLOSURE_TYPES */ ( 0 )
};
/* -----------------------------------------------------------------------------
- * $Id: Exception.hc,v 1.2 1999/12/02 09:52:41 simonmar Exp $
+ * $Id: Exception.hc,v 1.3 2000/01/13 14:34:02 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
#include "Storage.h"
#include "RtsUtils.h"
#include "RtsFlags.h"
+#if defined(PAR)
+# include "FetchMe.h"
+#endif
/* -----------------------------------------------------------------------------
Exception Primitives
{
FB_
ASSERT(CurrentTSO->blocked_exceptions != NULL);
+#if defined(GRAN)
+# error FixME
+#elif defined(PAR)
+ // is CurrentTSO->block_info.closure always set to the node
+ // holding the blocking queue !? -- HWL
+ awakenBlockedQueue(CurrentTSO->blocked_exceptions,
+ CurrentTSO->block_info.closure);
+#else
awakenBlockedQueue(CurrentTSO->blocked_exceptions);
+#endif
CurrentTSO->blocked_exceptions = NULL;
Sp++;
JMP_(ENTRY_CODE(Sp[0]));
STK_CHK_GEN(2, R1_PTR, unblockAsyncExceptionszh_fast, );
if (CurrentTSO->blocked_exceptions != NULL) {
+#if defined(GRAN)
+# error FixME
+#elif defined(PAR)
+ // is CurrentTSO->block_info.closure always set to the node
+ // holding the blocking queue !? -- HWL
+ awakenBlockedQueue(CurrentTSO->blocked_exceptions,
+ CurrentTSO->block_info.closure);
+#else
awakenBlockedQueue(CurrentTSO->blocked_exceptions);
+#endif
CurrentTSO->blocked_exceptions = NULL;
Sp--;
Sp[0] = (W_)&blockAsyncExceptionszh_ret_info;
/* -----------------------------------------------------------------------------
- * $Id: GC.c,v 1.68 1999/12/01 15:07:00 simonmar Exp $
+ * $Id: GC.c,v 1.69 2000/01/13 14:34:02 hwloidl Exp $
*
* (c) The GHC Team 1998-1999
*
*
* ---------------------------------------------------------------------------*/
+//@menu
+//* Includes::
+//* STATIC OBJECT LIST::
+//* Static function declarations::
+//* Garbage Collect::
+//* Weak Pointers::
+//* Evacuation::
+//* Scavenging::
+//* Reverting CAFs::
+//* Sanity code for CAF garbage collection::
+//* Lazy black holing::
+//* Stack squeezing::
+//* Pausing a thread::
+//* Index::
+//@end menu
+
+//@node Includes, STATIC OBJECT LIST
+//@subsection Includes
+
#include "Rts.h"
#include "RtsFlags.h"
#include "RtsUtils.h"
#include "SchedAPI.h"
#include "Weak.h"
#include "StablePriv.h"
+#if defined(GRAN) || defined(PAR)
+# include "GranSimRts.h"
+# include "ParallelRts.h"
+# include "FetchMe.h"
+# if defined(DEBUG)
+# include "Printer.h"
+# include "ParallelDebug.h"
+# endif
+#endif
StgCAF* enteredCAFs;
+//@node STATIC OBJECT LIST, Static function declarations, Includes
+//@subsection STATIC OBJECT LIST
+
/* STATIC OBJECT LIST.
*
* During GC:
lnat new_blocks; /* blocks allocated during this GC */
lnat g0s0_pcnt_kept = 30; /* percentage of g0s0 live at last minor GC */
+//@node Static function declarations, Garbage Collect, STATIC OBJECT LIST
+//@subsection Static function declarations
+
/* -----------------------------------------------------------------------------
Static function declarations
-------------------------------------------------------------------------- */
static void gcCAFs ( void );
#endif
+//@node Garbage Collect, Weak Pointers, Static function declarations
+//@subsection Garbage Collect
+
/* -----------------------------------------------------------------------------
GarbageCollect
- free from-space in each step, and set from-space = to-space.
-------------------------------------------------------------------------- */
+//@cindex GarbageCollect
void GarbageCollect(void (*get_roots)(void))
{
CostCentreStack *prev_CCS;
#endif
+#if defined(DEBUG) && defined(GRAN)
+ IF_DEBUG(gc, belch("@@ Starting garbage collection at %ld (%lx)\n",
+ Now, Now))
+#endif
+
/* tell the stats department that we've started a GC */
stat_startGC();
major_gc = (N == RtsFlags.GcFlags.generations-1);
/* check stack sanity *before* GC (ToDo: check all threads) */
- /*IF_DEBUG(sanity, checkTSO(MainTSO,0)); */
- IF_DEBUG(sanity, checkFreeListSanity());
+#if defined(GRAN)
+ // ToDo!: check sanity IF_DEBUG(sanity, checkTSOsSanity());
+#endif
+ IF_DEBUG(sanity, checkFreeListSanity());
/* Initialise the static object lists
*/
/* Do the mut-once lists first */
for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
+ IF_PAR_DEBUG(verbose,
+ printMutOnceList(&generations[g]));
scavenge_mut_once_list(&generations[g]);
evac_gen = g;
for (st = generations[g].n_steps-1; st >= 0; st--) {
}
for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
+ IF_PAR_DEBUG(verbose,
+ printMutableList(&generations[g]));
scavenge_mutable_list(&generations[g]);
evac_gen = g;
for (st = generations[g].n_steps-1; st >= 0; st--) {
evac_gen = 0;
get_roots();
+#if defined(PAR)
+ /* And don't forget to mark the TSO if we got here direct from
+ * Haskell! */
+ /* Not needed in a seq version?
+ if (CurrentTSO) {
+ CurrentTSO = (StgTSO *)MarkRoot((StgClosure *)CurrentTSO);
+ }
+ */
+
+ /* Mark the entries in the GALA table of the parallel system */
+ markLocalGAs(major_gc);
+#endif
+
/* Mark the weak pointer list, and prepare to detect dead weak
* pointers.
*/
int pc_free;
adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
- IF_DEBUG(gc, fprintf(stderr, "Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %d\n", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks));
+ IF_DEBUG(gc, fprintf(stderr, "@@ Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %d\n", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks));
pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even be < 0 */ {
heapOverflow();
*/
resetNurseries();
+#if defined(PAR)
+ /* Reconstruct the Global Address tables used in GUM */
+ RebuildGAtables(major_gc);
+#endif
+
/* start any pending finalizers */
scheduleFinalizers(old_weak_ptr_list);
stat_endGC(allocated, collected, live, copied, N);
}
+//@node Weak Pointers, Evacuation, Garbage Collect
+//@subsection Weak Pointers
+
/* -----------------------------------------------------------------------------
Weak Pointers
probably be optimised by keeping per-generation lists of weak
pointers, but for a few weak pointers this scheme will work.
-------------------------------------------------------------------------- */
+//@cindex traverse_weak_ptr_list
static rtsBool
traverse_weak_ptr_list(void)
evacuated need to be evacuated now.
-------------------------------------------------------------------------- */
+//@cindex cleanup_weak_ptr_list
+
static void
cleanup_weak_ptr_list ( StgWeak **list )
{
closure if it is alive, or NULL otherwise.
-------------------------------------------------------------------------- */
+//@cindex isAlive
+
StgClosure *
isAlive(StgClosure *p)
{
* for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
*/
+#if 1 || !defined(PAR)
/* ignore closures in generations that we're not collecting. */
+ /* In GUM we use this routine when rebuilding GA tables; for some
+ reason it has problems with the LOOKS_LIKE_STATIC macro -- HWL */
if (LOOKS_LIKE_STATIC(p) || Bdescr((P_)p)->gen->no > N) {
return p;
}
+#endif
switch (info->type) {
}
}
+//@cindex MarkRoot
StgClosure *
MarkRoot(StgClosure *root)
{
+ //if (root != END_TSO_QUEUE)
return evacuate(root);
}
+//@cindex MarkRootHWL
+StgClosure *
+MarkRootHWL(StgClosure *root)
+{
+ StgClosure *new = evacuate(root);
+ upd_evacuee(root, new);
+ return new;
+}
+
+//@cindex addBlock
static void addBlock(step *step)
{
bdescr *bd = allocBlock();
new_blocks++;
}
+//@cindex upd_evacuee
+
static __inline__ void
upd_evacuee(StgClosure *p, StgClosure *dest)
{
((StgEvacuated *)p)->evacuee = dest;
}
+//@cindex copy
+
static __inline__ StgClosure *
copy(StgClosure *src, nat size, step *step)
{
* used to optimise evacuation of BLACKHOLEs.
*/
+//@cindex copyPart
+
static __inline__ StgClosure *
copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *step)
{
return (StgClosure *)dest;
}
+//@node Evacuation, Scavenging, Weak Pointers
+//@subsection Evacuation
+
/* -----------------------------------------------------------------------------
Evacuate a large object
evacuated, or 0 otherwise.
-------------------------------------------------------------------------- */
+//@cindex evacuate_large
+
static inline void
evacuate_large(StgPtr p, rtsBool mutable)
{
the promotion until the next GC.
-------------------------------------------------------------------------- */
+//@cindex mkMutCons
+
static StgClosure *
mkMutCons(StgClosure *ptr, generation *gen)
{
didn't manage to evacuate this object into evac_gen.
-------------------------------------------------------------------------- */
-
+//@cindex evacuate
static StgClosure *
evacuate(StgClosure *q)
step *step;
const StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
loop:
if (HEAP_ALLOCED(q)) {
bd = Bdescr((P_)q);
ASSERT(q && (LOOKS_LIKE_GHC_INFO(GET_INFO(q))
|| IS_HUGS_CONSTR_INFO(GET_INFO(q))));
info = get_itbl(q);
-
+ /*
+ if (info->type==RBH) {
+ info = REVERT_INFOPTR(info);
+ IF_DEBUG(gc,
+ belch("@_ Trying to evacuate an RBH %p (%s); reverting to IP %p (%s)",
+ q, info_type(q), info, info_type_by_ip(info)));
+ }
+ */
+
switch (info -> type) {
case BCO:
case CATCH_FRAME:
case SEQ_FRAME:
/* shouldn't see these */
- barf("evacuate: stack frame\n");
+ barf("evacuate: stack frame at %p\n", q);
case AP_UPD:
case PAP:
if (evac_gen > 0) { /* optimisation */
StgClosure *p = ((StgEvacuated*)q)->evacuee;
if (Bdescr((P_)p)->gen->no < evac_gen) {
- /* fprintf(stderr,"evac failed!\n");*/
+ IF_DEBUG(gc, belch("@@ evacuate: evac of EVACUATED node %p failed!", p));
failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
}
}
+#if defined(PAR)
+ case RBH: // cf. BLACKHOLE_BQ
+ {
+ //StgInfoTable *rip = get_closure_info(q, &size, &ptrs, &nonptrs, &vhs, str);
+ to = copy(q,BLACKHOLE_sizeW(),step);
+ //ToDo: derive size etc from reverted IP
+ //to = copy(q,size,step);
+ recordMutable((StgMutClosure *)to);
+ IF_DEBUG(gc,
+ belch("@@ evacuate: RBH %p (%s) to %p (%s)",
+ q, info_type(q), to, info_type(to)));
+ return to;
+ }
+
case BLOCKED_FETCH:
+ ASSERT(sizeofW(StgBlockedFetch) >= MIN_NONUPD_SIZE);
+ to = copy(q,sizeofW(StgBlockedFetch),step);
+ IF_DEBUG(gc,
+ belch("@@ evacuate: %p (%s) to %p (%s)",
+ q, info_type(q), to, info_type(to)));
+ return to;
+
case FETCH_ME:
- fprintf(stderr,"evacuate: unimplemented/strange closure type\n");
- return q;
+ ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE);
+ to = copy(q,sizeofW(StgFetchMe),step);
+ IF_DEBUG(gc,
+ belch("@@ evacuate: %p (%s) to %p (%s)",
+ q, info_type(q), to, info_type(to)));
+ return to;
+
+ case FETCH_ME_BQ:
+ ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE);
+ to = copy(q,sizeofW(StgFetchMeBlockingQueue),step);
+ IF_DEBUG(gc,
+ belch("@@ evacuate: %p (%s) to %p (%s)",
+ q, info_type(q), to, info_type(to)));
+ return to;
+#endif
default:
barf("evacuate: strange closure type %d", (int)(info->type));
relocate_TSO is called just after a TSO has been copied from src to
dest. It adjusts the update frame list for the new location.
-------------------------------------------------------------------------- */
+//@cindex relocate_TSO
StgTSO *
relocate_TSO(StgTSO *src, StgTSO *dest)
return dest;
}
+//@node Scavenging, Reverting CAFs, Evacuation
+//@subsection Scavenging
+
+//@cindex scavenge_srt
+
static inline void
scavenge_srt(const StgInfoTable *info)
{
scavenging a mutable object where early promotion isn't such a good
idea.
-------------------------------------------------------------------------- */
-
+//@cindex scavenge
static void
scavenge(step *step)
|| IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p))));
info = get_itbl((StgClosure *)p);
+ /*
+ if (info->type==RBH)
+ info = REVERT_INFOPTR(info);
+ */
+
switch (info -> type) {
case BCO:
break;
}
+#if defined(PAR)
+ case RBH: // cf. BLACKHOLE_BQ
+ {
+ // nat size, ptrs, nonptrs, vhs;
+ // char str[80];
+ // StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str);
+ StgRBH *rbh = (StgRBH *)p;
+ (StgClosure *)rbh->blocking_queue =
+ evacuate((StgClosure *)rbh->blocking_queue);
+ if (failed_to_evac) {
+ failed_to_evac = rtsFalse;
+ recordMutable((StgMutClosure *)rbh);
+ }
+ IF_DEBUG(gc,
+ belch("@@ scavenge: RBH %p (%s) (new blocking_queue link=%p)",
+ p, info_type(p), (StgClosure *)rbh->blocking_queue));
+ // ToDo: use size of reverted closure here!
+ p += BLACKHOLE_sizeW();
+ break;
+ }
+
case BLOCKED_FETCH:
+ {
+ StgBlockedFetch *bf = (StgBlockedFetch *)p;
+ /* follow the pointer to the node which is being demanded */
+ (StgClosure *)bf->node =
+ evacuate((StgClosure *)bf->node);
+ /* follow the link to the rest of the blocking queue */
+ (StgClosure *)bf->link =
+ evacuate((StgClosure *)bf->link);
+ if (failed_to_evac) {
+ failed_to_evac = rtsFalse;
+ recordMutable((StgMutClosure *)bf);
+ }
+ IF_DEBUG(gc,
+ belch("@@ scavenge: %p (%s); node is now %p; exciting, isn't it",
+ bf, info_type((StgClosure *)bf),
+ bf->node, info_type(bf->node)));
+ p += sizeofW(StgBlockedFetch);
+ break;
+ }
+
case FETCH_ME:
+ IF_DEBUG(gc,
+ belch("@@ scavenge: HWL claims nothing to do for %p (%s)",
+ p, info_type((StgClosure *)p)));
+ p += sizeofW(StgFetchMe);
+ break; // nothing to do in this case
+
+ case FETCH_ME_BQ: // cf. BLACKHOLE_BQ
+ {
+ StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
+ (StgClosure *)fmbq->blocking_queue =
+ evacuate((StgClosure *)fmbq->blocking_queue);
+ if (failed_to_evac) {
+ failed_to_evac = rtsFalse;
+ recordMutable((StgMutClosure *)fmbq);
+ }
+ IF_DEBUG(gc,
+ belch("@@ scavenge: %p (%s) exciting, isn't it",
+ p, info_type((StgClosure *)p)));
+ p += sizeofW(StgFetchMeBlockingQueue);
+ break;
+ }
+#endif
+
case EVACUATED:
barf("scavenge: unimplemented/strange closure type\n");
because they contain old-to-new generation pointers. Only certain
objects can have this property.
-------------------------------------------------------------------------- */
+//@cindex scavenge_one
+
static rtsBool
scavenge_one(StgClosure *p)
{
info = get_itbl(p);
+ /* ngoq moHqu'!
+ if (info->type==RBH)
+ info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
+ */
+
switch (info -> type) {
case FUN:
generations older than the one being collected) as roots. We also
remove non-mutable objects from the mutable list at this point.
-------------------------------------------------------------------------- */
+//@cindex scavenge_mut_once_list
static void
scavenge_mut_once_list(generation *gen)
|| IS_HUGS_CONSTR_INFO(GET_INFO(p))));
info = get_itbl(p);
+ /*
+ if (info->type==RBH)
+ info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
+ */
switch(info->type) {
case IND_OLDGEN:
((StgIndOldGen *)p)->indirectee =
evacuate(((StgIndOldGen *)p)->indirectee);
-#if 0
+#ifdef DEBUG
+ if (RtsFlags.DebugFlags.gc)
/* Debugging code to print out the size of the thing we just
* promoted
*/
gen->mut_once_list = new_list;
}
+//@cindex scavenge_mutable_list
static void
scavenge_mutable_list(generation *gen)
|| IS_HUGS_CONSTR_INFO(GET_INFO(p))));
info = get_itbl(p);
+ /*
+ if (info->type==RBH)
+ info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
+ */
switch(info->type) {
case MUT_ARR_PTRS_FROZEN:
{
StgPtr end, q;
+ IF_DEBUG(gc,
+ belch("@@ scavenge_mut_list: scavenging MUT_ARR_PTRS_FROZEN %p; size: %#x ; next: %p",
+ p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p), p->mut_link));
+
end = (P_)p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
evac_gen = gen->no;
for (q = (P_)((StgMutArrPtrs *)p)->payload; q < end; q++) {
{
StgPtr end, q;
+ IF_DEBUG(gc,
+ belch("@@ scavenge_mut_list: scavenging MUT_ARR_PTRS %p; size: %#x ; next: %p",
+ p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p), p->mut_link));
+
end = (P_)p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
for (q = (P_)((StgMutArrPtrs *)p)->payload; q < end; q++) {
(StgClosure *)*q = evacuate((StgClosure *)*q);
* it from the mutable list if possible by promoting whatever it
* points to.
*/
+ IF_DEBUG(gc,
+ belch("@@ scavenge_mut_list: scavenging MUT_VAR %p; var: %p ; next: %p",
+ p, ((StgMutVar *)p)->var, p->mut_link));
+
ASSERT(p->header.info != &MUT_CONS_info);
((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
p->mut_link = gen->mut_list;
case MVAR:
{
StgMVar *mvar = (StgMVar *)p;
+
+ IF_DEBUG(gc,
+ belch("@@ scavenge_mut_list: scavenging MAVR %p; head: %p; tail: %p; value: %p ; next: %p",
+ mvar, mvar->head, mvar->tail, mvar->value, p->mut_link));
+
(StgClosure *)mvar->head = evacuate((StgClosure *)mvar->head);
(StgClosure *)mvar->tail = evacuate((StgClosure *)mvar->tail);
(StgClosure *)mvar->value = evacuate((StgClosure *)mvar->value);
case BLACKHOLE_BQ:
{
StgBlockingQueue *bh = (StgBlockingQueue *)p;
+
+ IF_DEBUG(gc,
+ belch("@@ scavenge_mut_list: scavenging BLACKHOLE_BQ (%p); next: %p",
+ p, p->mut_link));
+
(StgClosure *)bh->blocking_queue =
evacuate((StgClosure *)bh->blocking_queue);
p->mut_link = gen->mut_list;
}
continue;
+ // HWL: old PAR code deleted here
+
default:
/* shouldn't have anything else on the mutables list */
barf("scavenge_mutable_list: strange object? %d", (int)(info->type));
}
}
+//@cindex scavenge_static
+
static void
scavenge_static(void)
{
while (p != END_OF_STATIC_LIST) {
info = get_itbl(p);
-
+ /*
+ if (info->type==RBH)
+ info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
+ */
/* make sure the info pointer is into text space */
ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO(p))
|| IS_HUGS_CONSTR_INFO(GET_INFO(p))));
objects pointed to by it. We can use the same code for walking
PAPs, since these are just sections of copied stack.
-------------------------------------------------------------------------- */
+//@cindex scavenge_stack
static void
scavenge_stack(StgPtr p, StgPtr stack_end)
const StgInfoTable* info;
StgWord32 bitmap;
+ IF_DEBUG(sanity, belch(" scavenging stack between %p and %p", p, stack_end));
+
/*
* Each time around this loop, we are looking at a chunk of stack
* that starts with either a pending argument section or an
/* probably a slow-entry point return address: */
case FUN:
case FUN_STATIC:
- p++;
+ {
+#if 0
+ StgPtr old_p = p;
+ p++; p++;
+ IF_DEBUG(sanity,
+ belch("HWL: scavenge_stack: FUN(_STATIC) adjusting p from %p to %p (instead of %p)",
+ old_p, p, old_p+1));
+#else
+ p++; /* what if FHS!=1 !? -- HWL */
+#endif
goto follow_srt;
+ }
/* Specialised code for update frames, since they're so common.
* We *know* the updatee points to a BLACKHOLE, CAF_BLACKHOLE,
}
/* small bitmap (< 32 entries, or 64 on a 64-bit machine) */
- case RET_BCO:
- case RET_SMALL:
- case RET_VEC_SMALL:
case STOP_FRAME:
case CATCH_FRAME:
case SEQ_FRAME:
+ {
+ StgPtr old_p = p; // debugging only -- HWL
+ /* stack frames like these are ordinary closures and therefore may
+ contain setup-specific fixed-header words (as in GranSim!);
+ therefore, these cases should not use p++ but &(p->payload) -- HWL */
+ IF_DEBUG(gran, IF_DEBUG(sanity, printObj(p)));
+ bitmap = info->layout.bitmap;
+
+ p = (StgPtr)&(((StgClosure *)p)->payload);
+ IF_DEBUG(sanity,
+ belch("HWL: scavenge_stack: (STOP|CATCH|SEQ)_FRAME adjusting p from %p to %p (instead of %p)",
+ old_p, p, old_p+1));
+ goto small_bitmap;
+ }
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
bitmap = info->layout.bitmap;
p++;
+ /* this assumes that the payload starts immediately after the info-ptr */
small_bitmap:
while (bitmap != 0) {
if ((bitmap & 1) == 0) {
objects are (repeatedly) mutable, so most of the time evac_gen will
be zero.
--------------------------------------------------------------------------- */
+//@cindex scavenge_large
static void
scavenge_large(step *step)
case TSO:
scavengeTSO((StgTSO *)p);
+ // HWL: old PAR code deleted here
continue;
default:
}
}
+//@cindex zero_static_object_list
+
static void
zero_static_object_list(StgClosure* first_static)
{
* It doesn't do any harm to zero all the mutable link fields on the
* mutable list.
*/
+//@cindex zero_mutable_list
+
static void
zero_mutable_list( StgMutClosure *first )
{
}
}
+//@node Reverting CAFs, Sanity code for CAF garbage collection, Scavenging
+//@subsection Reverting CAFs
+
/* -----------------------------------------------------------------------------
Reverting CAFs
-------------------------------------------------------------------------- */
+//@cindex RevertCAFs
void RevertCAFs(void)
{
enteredCAFs = END_CAF_LIST;
}
+//@cindex revert_dead_CAFs
+
void revert_dead_CAFs(void)
{
StgCAF* caf = enteredCAFs;
}
}
+//@node Sanity code for CAF garbage collection, Lazy black holing, Reverting CAFs
+//@subsection Sanity code for CAF garbage collection
+
/* -----------------------------------------------------------------------------
Sanity code for CAF garbage collection.
-------------------------------------------------------------------------- */
#ifdef DEBUG
+//@cindex gcCAFs
+
static void
gcCAFs(void)
{
}
#endif
+//@node Lazy black holing, Stack squeezing, Sanity code for CAF garbage collection
+//@subsection Lazy black holing
+
/* -----------------------------------------------------------------------------
Lazy black holing.
some work, we have to run down the stack and black-hole all the
closures referred to by update frames.
-------------------------------------------------------------------------- */
+//@cindex threadLazyBlackHole
static void
threadLazyBlackHole(StgTSO *tso)
}
}
+//@node Stack squeezing, Pausing a thread, Lazy black holing
+//@subsection Stack squeezing
+
/* -----------------------------------------------------------------------------
* Stack squeezing
*
* lazy black holing here.
*
* -------------------------------------------------------------------------- */
+//@cindex threadSqueezeStack
static void
threadSqueezeStack(StgTSO *tso)
StgUpdateFrame *prev_frame; /* Temporally previous */
StgPtr bottom;
rtsBool prev_was_update_frame;
+#if DEBUG
+ StgUpdateFrame *top_frame;
+ nat upd_frames=0, stop_frames=0, catch_frames=0, seq_frames=0,
+ bhs=0, squeezes=0;
+ void printObj( StgClosure *obj ); // from Printer.c
+
+ top_frame = tso->su;
+#endif
bottom = &(tso->stack[tso->stack_size]);
frame = tso->su;
frame->link = next_frame;
next_frame = frame;
frame = prev_frame;
+#if DEBUG
+ IF_DEBUG(sanity,
+ if (!(frame>=top_frame && frame<=bottom)) {
+ printObj((StgClosure *)prev_frame);
+ barf("threadSqueezeStack: current frame is rubbish %p; previous was %p\n",
+ frame, prev_frame);
+ })
+ switch (get_itbl(frame)->type) {
+ case UPDATE_FRAME: upd_frames++;
+ if (frame->updatee->header.info == &BLACKHOLE_info)
+ bhs++;
+ break;
+ case STOP_FRAME: stop_frames++;
+ break;
+ case CATCH_FRAME: catch_frames++;
+ break;
+ case SEQ_FRAME: seq_frames++;
+ break;
+ default:
+ barf("Found non-frame during stack squeezing at %p (prev frame was %p)\n",
+ frame, prev_frame);
+ printObj((StgClosure *)prev_frame);
+ }
+#endif
if (get_itbl(frame)->type == UPDATE_FRAME
&& frame->updatee->header.info == &BLACKHOLE_info) {
break;
StgClosure *updatee_keep = prev_frame->updatee;
StgClosure *updatee_bypass = frame->updatee;
-#if 0 /* DEBUG */
- fprintf(stderr, "squeezing frame at %p\n", frame);
+#if DEBUG
+ IF_DEBUG(gc, fprintf(stderr, "@@ squeezing frame at %p\n", frame));
+ squeezes++;
#endif
/* Deal with blocking queues. If both updatees have blocked
else
next_frame_bottom = tso->sp - 1;
-#if 0 /* DEBUG */
- fprintf(stderr, "sliding [%p, %p] by %ld\n", sp, next_frame_bottom,
- displacement);
+#if DEBUG
+ IF_DEBUG(gc,
+ fprintf(stderr, "sliding [%p, %p] by %ld\n", sp, next_frame_bottom,
+ displacement))
#endif
while (sp >= next_frame_bottom) {
tso->sp += displacement;
tso->su = prev_frame;
+#if DEBUG
+ IF_DEBUG(gc,
+ fprintf(stderr, "@@ threadSqueezeStack: squeezed %d update-frames; found %d BHs; found %d update-, %d stop-, %d catch, %d seq-frames\n",
+ squeezes, bhs, upd_frames, stop_frames, catch_frames, seq_frames))
+#endif
}
+//@node Pausing a thread, Index, Stack squeezing
+//@subsection Pausing a thread
+
/* -----------------------------------------------------------------------------
* Pausing a thread
*
* here. We also take the opportunity to do stack squeezing if it's
* turned on.
* -------------------------------------------------------------------------- */
+//@cindex threadPaused
void
threadPaused(StgTSO *tso)
else
threadLazyBlackHole(tso);
}
+
+#if DEBUG
+//@cindex printMutOnceList
+void
+printMutOnceList(generation *gen)
+{
+ const StgInfoTable *info;
+ StgMutClosure *p, *next, *new_list;
+
+ p = gen->mut_once_list;
+ new_list = END_MUT_LIST;
+ next = p->mut_link;
+
+ evac_gen = gen->no;
+ failed_to_evac = rtsFalse;
+
+ fprintf(stderr, "@@ Mut once list %p: ", gen->mut_once_list);
+ for (; p != END_MUT_LIST; p = next, next = p->mut_link) {
+ fprintf(stderr, "%p (%s), ",
+ p, info_type((StgClosure *)p));
+ }
+ fputc('\n', stderr);
+}
+
+//@cindex printMutableList
+void
+printMutableList(generation *gen)
+{
+ const StgInfoTable *info;
+ StgMutClosure *p, *next;
+
+ p = gen->saved_mut_list;
+ next = p->mut_link;
+
+ evac_gen = 0;
+ failed_to_evac = rtsFalse;
+
+ fprintf(stderr, "@@ Mutable list %p: ", gen->saved_mut_list);
+ for (; p != END_MUT_LIST; p = next, next = p->mut_link) {
+ fprintf(stderr, "%p (%s), ",
+ p, info_type((StgClosure *)p));
+ }
+ fputc('\n', stderr);
+}
+#endif /* DEBUG */
+
+//@node Index, , Pausing a thread
+//@subsection Index
+
+//@index
+//* GarbageCollect:: @cindex\s-+GarbageCollect
+//* MarkRoot:: @cindex\s-+MarkRoot
+//* RevertCAFs:: @cindex\s-+RevertCAFs
+//* addBlock:: @cindex\s-+addBlock
+//* cleanup_weak_ptr_list:: @cindex\s-+cleanup_weak_ptr_list
+//* copy:: @cindex\s-+copy
+//* copyPart:: @cindex\s-+copyPart
+//* evacuate:: @cindex\s-+evacuate
+//* evacuate_large:: @cindex\s-+evacuate_large
+//* gcCAFs:: @cindex\s-+gcCAFs
+//* isAlive:: @cindex\s-+isAlive
+//* mkMutCons:: @cindex\s-+mkMutCons
+//* relocate_TSO:: @cindex\s-+relocate_TSO
+//* revert_dead_CAFs:: @cindex\s-+revert_dead_CAFs
+//* scavenge:: @cindex\s-+scavenge
+//* scavenge_large:: @cindex\s-+scavenge_large
+//* scavenge_mut_once_list:: @cindex\s-+scavenge_mut_once_list
+//* scavenge_mutable_list:: @cindex\s-+scavenge_mutable_list
+//* scavenge_one:: @cindex\s-+scavenge_one
+//* scavenge_srt:: @cindex\s-+scavenge_srt
+//* scavenge_stack:: @cindex\s-+scavenge_stack
+//* scavenge_static:: @cindex\s-+scavenge_static
+//* threadLazyBlackHole:: @cindex\s-+threadLazyBlackHole
+//* threadPaused:: @cindex\s-+threadPaused
+//* threadSqueezeStack:: @cindex\s-+threadSqueezeStack
+//* traverse_weak_ptr_list:: @cindex\s-+traverse_weak_ptr_list
+//* upd_evacuee:: @cindex\s-+upd_evacuee
+//* zero_mutable_list:: @cindex\s-+zero_mutable_list
+//* zero_static_object_list:: @cindex\s-+zero_static_object_list
+//@end index
/* -----------------------------------------------------------------------------
- * $Id: GC.h,v 1.4 1999/02/05 16:02:43 simonm Exp $
+ * $Id: GC.h,v 1.5 2000/01/13 14:34:03 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
void threadPaused(StgTSO *);
StgClosure *isAlive(StgClosure *p);
+void GarbageCollect(void (*get_roots)(void));
/*-----------------------------------------------------------------------------
- * $Id: Hash.h,v 1.1 1999/01/27 12:11:26 simonm Exp $
+ * $Id: Hash.h,v 1.2 2000/01/13 14:34:03 hwloidl Exp $
*
* (c) The GHC Team, 1999
*
void * removeHashTable ( HashTable *table, StgWord key, void *data );
void freeHashTable ( HashTable *table, void (*freeDataFun)(void *) );
HashTable * allocHashTable ( void );
+
/* -----------------------------------------------------------------------------
- * $Id: HeapStackCheck.hc,v 1.10 1999/11/09 15:46:51 simonmar Exp $
+ * $Id: HeapStackCheck.hc,v 1.11 2000/01/13 14:34:03 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
FE_
}
+#if defined(GRAN)
+/*
+ ToDo: merge the block and yield macros, calling something like BLOCK(N)
+ at the end;
+*/
+
+/*
+ Should we actually ever do a yield in such a case?? -- HWL
+*/
+EXTFUN(gran_yield_0)
+{
+ FB_
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadYielding;
+ JMP_(StgReturn);
+ FE_
+}
+
+EXTFUN(gran_yield_1)
+{
+ FB_
+ Sp -= 1;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadYielding;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 2 Regs--------------------------------------------------------------------*/
+
+EXTFUN(gran_yield_2)
+{
+ FB_
+ Sp -= 2;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadYielding;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 3 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_yield_3)
+{
+ FB_
+ Sp -= 3;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadYielding;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 4 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_yield_4)
+{
+ FB_
+ Sp -= 4;
+ Sp[3] = R4.w;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadYielding;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 5 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_yield_5)
+{
+ FB_
+ Sp -= 5;
+ Sp[4] = R5.w;
+ Sp[3] = R4.w;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadYielding;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 6 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_yield_6)
+{
+ FB_
+ Sp -= 6;
+ Sp[5] = R6.w;
+ Sp[4] = R5.w;
+ Sp[3] = R4.w;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadYielding;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 7 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_yield_7)
+{
+ FB_
+ Sp -= 7;
+ Sp[6] = R7.w;
+ Sp[5] = R6.w;
+ Sp[4] = R5.w;
+ Sp[3] = R4.w;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadYielding;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 8 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_yield_8)
+{
+ FB_
+ Sp -= 8;
+ Sp[7] = R8.w;
+ Sp[6] = R7.w;
+ Sp[5] = R6.w;
+ Sp[4] = R5.w;
+ Sp[3] = R4.w;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadYielding;
+ JMP_(StgReturn);
+ FE_
+}
+
+// the same routines but with a block rather than a yield
+
+EXTFUN(gran_block_1)
+{
+ FB_
+ Sp -= 1;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadBlocked;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 2 Regs--------------------------------------------------------------------*/
+
+EXTFUN(gran_block_2)
+{
+ FB_
+ Sp -= 2;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadBlocked;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 3 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_block_3)
+{
+ FB_
+ Sp -= 3;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadBlocked;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 4 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_block_4)
+{
+ FB_
+ Sp -= 4;
+ Sp[3] = R4.w;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadBlocked;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 5 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_block_5)
+{
+ FB_
+ Sp -= 5;
+ Sp[4] = R5.w;
+ Sp[3] = R4.w;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadBlocked;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 6 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_block_6)
+{
+ FB_
+ Sp -= 6;
+ Sp[5] = R6.w;
+ Sp[4] = R5.w;
+ Sp[3] = R4.w;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadBlocked;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 7 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_block_7)
+{
+ FB_
+ Sp -= 7;
+ Sp[6] = R7.w;
+ Sp[5] = R6.w;
+ Sp[4] = R5.w;
+ Sp[3] = R4.w;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadBlocked;
+ JMP_(StgReturn);
+ FE_
+}
+
+/*- 8 Regs -------------------------------------------------------------------*/
+
+EXTFUN(gran_block_8)
+{
+ FB_
+ Sp -= 8;
+ Sp[7] = R8.w;
+ Sp[6] = R7.w;
+ Sp[5] = R6.w;
+ Sp[4] = R5.w;
+ Sp[3] = R4.w;
+ Sp[2] = R3.w;
+ Sp[1] = R2.w;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadBlocked;
+ JMP_(StgReturn);
+ FE_
+}
+
+#endif
+
+#if 0 && defined(PAR)
+
+/*
+ Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
+ saving of the thread state from the actual jump via an StgReturn.
+ We need this separation because we call RTS routines in blocking entry codes
+ before jumping back into the RTS (see parallel/FetchMe.hc).
+*/
+
+EXTFUN(par_block_1_no_jump)
+{
+ FB_
+ Sp -= 1;
+ Sp[0] = R1.w;
+ SaveThreadState();
+ FE_
+}
+
+EXTFUN(par_jump)
+{
+ FB_
+ CurrentTSO->whatNext = ThreadEnterGHC;
+ R1.i = ThreadBlocked;
+ JMP_(StgReturn);
+ FE_
+}
+
+#endif
+
/* -----------------------------------------------------------------------------
For a case expression on a polymorphic or function-typed object, if
the default branch (there can only be one branch) of the case fails
/* -----------------------------------------------------------------------------
- * $Id: Main.c,v 1.13 2000/01/13 12:40:15 simonmar Exp $
+ * $Id: Main.c,v 1.14 2000/01/13 14:34:03 hwloidl Exp $
*
* (c) The GHC Team 1998-1999
*
#include "RtsUtils.h"
#ifdef DEBUG
-#include "Printer.h" /* for printing */
+# include "Printer.h" /* for printing */
#endif
#ifdef INTERPRETER
-#include "Assembler.h"
+# include "Assembler.h"
#endif
#ifdef PAR
-#include "ParInit.h"
-#include "Parallel.h"
-#include "LLC.h"
+# include "ParInit.h"
+# include "Parallel.h"
+# include "LLC.h"
+#endif
+
+#if defined(GRAN) || defined(PAR)
+# include "GranSimRts.h"
#endif
#ifdef HAVE_WINDOWS_H
-#include <windows.h>
+# include <windows.h>
#endif
int main(int argc, char *argv[])
{
int exit_status;
-
SchedulerStatus status;
+ /* all GranSim/GUM init is done in startupHaskell; sets IAmMainThread! */
+
startupHaskell(argc,argv);
-# ifndef PAR
- /* ToDo: want to start with a larger stack size */
- status = rts_evalIO((StgClosure *)&mainIO_closure, NULL);
-# else
+ /* kick off the computation by creating the main thread with a pointer
+ to mainIO_closure representing the computation of the overall program;
+ then enter the scheduler with this thread and off we go;
+
+ the same for GranSim (we have only one instance of this code)
+
+ in a parallel setup, where we have many instances of this code
+ running on different PEs, we should do this only for the main PE
+ (IAmMainThread is set in startupHaskell)
+ */
+
+# if defined(PAR)
+
+# if DEBUG
+ { /* a wait loop to allow attachment of gdb to UNIX threads */
+ nat i, j, s;
+
+ for (i=0, s=0; i<RtsFlags.ParFlags.wait; i++)
+ for (j=0; j<1000000; j++)
+ s += j % 65536;
+ }
+ IF_PAR_DEBUG(verbose,
+ belch("Passed wait loop"));
+# endif
+
if (IAmMainThread == rtsTrue) {
- /*Just to show we're alive */
fprintf(stderr, "Main Thread Started ...\n");
-
+
+ /* ToDo: Dump event for the main thread */
status = rts_evalIO((StgClosure *)&mainIO_closure, NULL);
} else {
- WaitForPEOp(PP_FINISH,SysManTask);
- exit(EXIT_SUCCESS);
+ /* Just to show we're alive */
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "== [%x] Non-Main PE enters scheduler without work ...\n",
+ mytid));
+
+ /* all non-main threads enter the scheduler without work */
+ status = schedule( /* nothing */ );
}
-# endif /* PAR */
+
+# elif defined(GRAN)
+
+ /* ToDo: Dump event for the main thread */
+ status = rts_evalIO((StgClosure *)&mainIO_closure, NULL);
+
+# else /* !PAR && !GRAN */
+
+ /* ToDo: want to start with a larger stack size */
+ status = rts_evalIO((StgClosure *)&mainIO_closure, NULL);
+
+# endif /* !PAR && !GRAN */
+
+ // ToDo: update for parallel execution
+ /* check the status of the entire Haskell computation */
switch (status) {
case Deadlock:
prog_belch("no threads to run: infinite loop or deadlock?");
#-----------------------------------------------------------------------------
-# $Id: Makefile,v 1.16 1999/12/07 15:52:40 simonmar Exp $
-
+# $Id: Makefile,v 1.17 2000/01/13 14:34:03 hwloidl Exp $
+#
# This is the Makefile for the runtime-system stuff.
# This stuff is written in C (and cannot be written in Haskell).
#
WAYS=$(GhcLibWays)
-SRCS_RTS_C = $(wildcard *.c) $(wildcard hooks/*.c) $(filter-out gum/SysMan.c,$(wildcard gum/*.c))
+SRCS_RTS_C = $(wildcard *.c) $(wildcard hooks/*.c) $(filter-out parallel/SysMan.c,$(wildcard parallel/*.c))
SRCS_RTS_S = $(wildcard *.S)
-SRCS_RTS_HC = $(wildcard *.hc)
+SRCS_RTS_HC = $(wildcard *.hc) $(wildcard parallel/*.hc)
ifneq "$(way)" "dll"
SRCS_RTS_C := $(filter-out RtsDllMain.c, $(SRCS_RTS_C))
#WARNING_OPTS += -optc-Wredundant-decls
#WARNING_OPTS += -optc-Wconversion
-SRC_HC_OPTS += -I../includes -I. -Igum $(WARNING_OPTS) $(GhcRtsHcOpts) -optc-DCOMPILING_RTS
+SRC_HC_OPTS += -I../includes -I. -Iparallel $(WARNING_OPTS) $(GhcRtsHcOpts) -optc-DCOMPILING_RTS
SRC_CC_OPTS = $(GhcRtsCcOpts)
ifneq "$(way)" "dll"
#
ifeq "$(way)" "mp"
-all :: gum/SysMan
+all :: parallel/SysMan
ifdef solaris2_TARGET_OS
__socket_libs = -lsocket -lnsl
__socket_libs =
endif
-gum/SysMan : gum/SysMan.mp_o gum/LLComms.mp_o
+parallel/SysMan : parallel/SysMan.mp_o parallel/LLComms.mp_o RtsUtils.mp_o RtsFlags.mp_o
$(RM) $@
- gcc -o $@ gum/SysMan.mp_o gum/LLComms.mp_o -L$$PVM_ROOT/lib/$$PVM_ARCH -lgpvm3 -lpvm3 $(__socket_libs)
+ gcc -o $@ parallel/SysMan.mp_o parallel/LLComms.mp_o -L$$PVM_ROOT/lib/$$PVM_ARCH -lgpvm3 -lpvm3 $(__socket_libs)
-CLEAN_FILES += gum/SysMan.mp_o gum/SysMan
-INSTALL_LIBEXECS += gum/SysMan
+CLEAN_FILES += parallel/SysMan.mp_o parallel/SysMan
+INSTALL_LIBEXECS += parallel/SysMan
endif
#-----------------------------------------------------------------------------
/* -----------------------------------------------------------------------------
- * $Id: PrimOps.hc,v 1.38 2000/01/13 12:40:15 simonmar Exp $
+ * $Id: PrimOps.hc,v 1.39 2000/01/13 14:34:03 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
*/
if (mvar->head != (StgTSO *)&END_TSO_QUEUE_closure) {
ASSERT(mvar->head->why_blocked == BlockedOnMVar);
+#if defined(GRAN)
+# error FixME
+#elif defined(PAR)
+ // ToDo: check 2nd arg (mvar) is right
+ mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar);
+#else
mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head);
+#endif
if (mvar->head == (StgTSO *)&END_TSO_QUEUE_closure) {
mvar->tail = (StgTSO *)&END_TSO_QUEUE_closure;
}
-
/* -----------------------------------------------------------------------------
- * $Id: Printer.c,v 1.18 1999/11/29 18:59:46 sewardj Exp $
+ * $Id: Printer.c,v 1.19 2000/01/13 14:34:04 hwloidl Exp $
*
* Copyright (c) 1994-1999.
*
#include "Bytecodes.h" /* for InstrPtr */
#include "Disassembler.h"
+#include "Printer.h"
+
+// HWL: explicit fixed header size to make debugging easier
+int fixed_hs = FIXED_HS, itbl_sz = sizeofW(StgInfoTable),
+ uf_sz=sizeofW(StgUpdateFrame), sf_sz=sizeofW(StgSeqFrame);
+
/* --------------------------------------------------------------------------
* local function decls
* ------------------------------------------------------------------------*/
fprintf(stderr,")\n");
break;
+#if defined(GRAN) || defined(PAR)
+ case RBH:
+ fprintf(stderr,"RBH(");
+ printPtr((StgPtr)stgCast(StgRBH*,obj)->blocking_queue);
+ fprintf(stderr,")\n");
+ break;
+#endif
+
case CONSTR:
case CONSTR_1_0: case CONSTR_0_1:
case CONSTR_1_1: case CONSTR_0_2: case CONSTR_2_0:
}
}
+/*
+void printGraph( StgClosure *obj )
+{
+ printClosure(obj);
+}
+*/
+
StgPtr printStackObj( StgPtr sp )
{
/*fprintf(stderr,"Stack[%d] = ", &stgStack[STACK_SIZE] - sp); */
/* Causing linking trouble on Win32 plats, so I'm
disabling this for now.
*/
-#if defined(HAVE_BFD_H) && !defined(_WIN32)
+#if defined(HAVE_BFD_H) && !defined(_WIN32) && defined(USE_BSD)
#include <bfd.h>
/* -----------------------------------------------------------------------------
- * $Id: RtsFlags.c,v 1.23 2000/01/13 12:40:15 simonmar Exp $
+ * $Id: RtsFlags.c,v 1.24 2000/01/13 14:34:04 hwloidl Exp $
*
* (c) The AQUA Project, Glasgow University, 1994-1997
* (c) The GHC Team, 1998-1999
*
* ---------------------------------------------------------------------------*/
+//@menu
+//* Includes::
+//* Constants::
+//* Static function decls::
+//* Command-line option parsing routines::
+//* GranSim specific options::
+//* Aux fcts::
+//@end menu
+//*/
+
+//@node Includes, Constants
+//@subsection Includes
+
#include "Rts.h"
#include "RtsFlags.h"
#include "RtsUtils.h"
int rts_argc; /* ditto */
char *rts_argv[MAX_RTS_ARGS];
+//@node Constants, Static function decls, Includes
+//@subsection Constants
+
/*
* constants, used later
*/
#define RTS 1
#define PGM 0
+#if defined(GRAN)
+
+char *gran_debug_opts_strs[] = {
+ "DEBUG (-bDe, -bD1): event_trace; printing event trace.\n",
+ "DEBUG (-bDE, -bD2): event_stats; printing event statistics.\n",
+ "DEBUG (-bDb, -bD4): bq; check blocking queues\n",
+ "DEBUG (-bDG, -bD8): pack; routines for (un-)packing graph structures.\n",
+ "DEBUG (-bDq, -bD16): checkSparkQ; check consistency of the spark queues.\n",
+ "DEBUG (-bDf, -bD32): thunkStealing; print forwarding of fetches.\n",
+ "DEBUG (-bDr, -bD64): randomSteal; stealing sparks/threads from random PEs.\n",
+ "DEBUG (-bDF, -bD128): findWork; searching spark-pools (local & remote), thread queues for work.\n",
+ "DEBUG (-bDu, -bD256): unused; currently unused flag.\n",
+ "DEBUG (-bDS, -bD512): pri; priority sparking or scheduling.\n",
+ "DEBUG (-bD:, -bD1024): checkLight; check GranSim-Light setup.\n",
+ "DEBUG (-bDo, -bD2048): sortedQ; check whether spark/thread queues are sorted.\n",
+ "DEBUG (-bDz, -bD4096): blockOnFetch; check for blocked on fetch.\n",
+ "DEBUG (-bDP, -bD8192): packBuffer; routines handling pack buffer (GranSim internal!).\n",
+ "DEBUG (-bDt, -bD16384): blockOnFetch_sanity; check for TSO asleep on fetch.\n",
+};
+
+/* one character codes for the available debug options */
+char gran_debug_opts_flags[] = {
+ 'e', 'E', 'b', 'G', 'q', 'f', 'r', 'F', 'u', 'S', ':', 'o', 'z', 'P', 't'
+};
+
+/* prefix strings printed with the debug messages of the corresponding type */
+char *gran_debug_opts_prefix[] = {
+ "", /* event_trace */
+ "", /* event_stats */
+ "##", /* bq */
+ "**", /* pack */
+ "^^", /* checkSparkQ */
+ "==", /* thunkStealing */
+ "^^", /* randomSteal */
+ "+-", /* findWork */
+ "", /* unused */
+ "++", /* pri */
+ "::", /* checkLight */
+ "##", /* sortedQ */
+ "", /* blockOnFetch */
+ "", /* packBuffer */
+ "" /* blockOnFetch_sanity */
+};
+
+#elif defined(PAR)
+
+char *par_debug_opts_strs[] = {
+ "DEBUG (-qDv, -qD1): verbose; be generally verbose with parallel related stuff.\n",
+ "DEBUG (-qDt, -qD2): trace; trace messages.\n",
+ "DEBUG (-qDs, -qD4): schedule; scheduling of parallel threads.\n",
+ "DEBUG (-qDe, -qD8): free; free messages.\n",
+ "DEBUG (-qDr, -qD16): resume; resume messages.\n",
+ "DEBUG (-qDw, -qD32): weight; print weights for GC.\n",
+ "DEBUG (-qDF, -qD64): fetch; fetch messages.\n",
+ "DEBUG (-qDa, -qD128): ack; ack messages.\n",
+ "DEBUG (-qDf, -qD256): fish; fish messages.\n",
+ "DEBUG (-qDo, -qD512): forward; forwarding messages to other PEs.\n",
+ "DEBUG (-qDp, -qD1024): pack; packing and unpacking graphs.\n"
+};
+
+/* one character codes for the available debug options */
+char par_debug_opts_flags[] = {
+ 'v', 't', 's', 'e', 'r', 'w', 'F', 'a', 'f', 'o', 'p'
+};
+
+/* prefix strings printed with the debug messages of the corresponding type */
+char *par_debug_opts_prefix[] = {
+ " ", /* verbose */
+ "..", /* trace */
+ "--", /* schedule */
+ "!!", /* free */
+ "[]", /* resume */
+ ";;", /* weight */
+ "%%", /* fetch */
+ ",,", /* ack */
+ "$$", /* fish */
+ "", /* forward */
+ "**" /* pack */
+};
+
+#endif /* PAR */
+
+//@node Static function decls, Command-line option parsing routines, Constants
+//@subsection Static function decls
+
/* -----------------------------------------------------------------------------
Static function decls
-------------------------------------------------------------------------- */
static I_ decode(const char *s);
static void bad_option(const char *s);
+#if defined(GRAN)
+static void enable_GranSimLight(void);
+static void process_gran_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error);
+static void set_GranSim_debug_options(nat n);
+static void help_GranSim_debug_options(nat n);
+#elif defined(PAR)
+static void process_par_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error);
+static void set_par_debug_options(nat n);
+static void help_par_debug_options(nat n);
+#endif
+
+//@node Command-line option parsing routines, GranSim specific options, Static function decls
+//@subsection Command-line option parsing routines
+
/* -----------------------------------------------------------------------------
* Command-line option parsing routines.
* ---------------------------------------------------------------------------*/
#endif
#ifdef PAR
- RtsFlags.ParFlags.parallelStats = rtsFalse;
- RtsFlags.ParFlags.granSimStats = rtsFalse;
- RtsFlags.ParFlags.granSimStats_Binary = rtsFalse;
+ RtsFlags.ParFlags.ParStats.Full = rtsFalse;
+ RtsFlags.ParFlags.ParStats.Binary = rtsFalse;
+ RtsFlags.ParFlags.ParStats.Sparks = rtsFalse;
+ RtsFlags.ParFlags.ParStats.Heap = rtsFalse;
+ RtsFlags.ParFlags.ParStats.NewLogfile = rtsFalse;
+ RtsFlags.ParFlags.ParStats.Global = rtsFalse;
+
RtsFlags.ParFlags.outputDisabled = rtsFalse;
RtsFlags.ParFlags.packBufferSize = 1024;
+
+ RtsFlags.ParFlags.maxThreads = 1024;
+ RtsFlags.ParFlags.maxFishes = MAX_FISHES;
+ RtsFlags.ParFlags.fishDelay = FISH_DELAY;
#endif
#if defined(PAR) || defined(SMP)
RtsFlags.ParFlags.maxLocalSparks = 4096;
-#endif
+#endif /* PAR || SMP */
+
+#if defined(GRAN)
+ /* ToDo: check defaults for GranSim and GUM */
+ RtsFlags.ConcFlags.ctxtSwitchTime = CS_MIN_MILLISECS; /* In milliseconds */
+ RtsFlags.ConcFlags.maxThreads = 65536; // refers to mandatory threads
+ RtsFlags.GcFlags.maxStkSize = (1024 * 1024) / sizeof(W_);
+ RtsFlags.GcFlags.initialStkSize = 1024 / sizeof(W_);
+
+ RtsFlags.GranFlags.GranSimStats.Full = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.Suppressed = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.Binary = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.Sparks = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.Heap = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.NewLogfile = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.Global = rtsFalse;
-#ifdef GRAN
- RtsFlags.GranFlags.granSimStats = rtsFalse;
- RtsFlags.GranFlags.granSimStats_suppressed = rtsFalse;
- RtsFlags.GranFlags.granSimStats_Binary = rtsFalse;
- RtsFlags.GranFlags.granSimStats_Sparks = rtsFalse;
- RtsFlags.GranFlags.granSimStats_Heap = rtsFalse;
- RtsFlags.GranFlags.labelling = rtsFalse;
RtsFlags.GranFlags.packBufferSize = 1024;
RtsFlags.GranFlags.packBufferSize_internal = GRANSIM_DEFAULT_PACK_BUFFER_SIZE;
- RtsFlags.GranFlags.proc = MAX_PROC;
- RtsFlags.GranFlags.max_fishes = MAX_FISHES;
- RtsFlags.GranFlags.time_slice = GRAN_TIME_SLICE;
- RtsFlags.GranFlags.Light = rtsFalse;
-
- RtsFlags.GranFlags.gran_latency = LATENCY;
- RtsFlags.GranFlags.gran_additional_latency = ADDITIONAL_LATENCY;
- RtsFlags.GranFlags.gran_fetchtime = FETCHTIME;
- RtsFlags.GranFlags.gran_lunblocktime = LOCALUNBLOCKTIME;
- RtsFlags.GranFlags.gran_gunblocktime = GLOBALUNBLOCKTIME;
- RtsFlags.GranFlags.gran_mpacktime = MSGPACKTIME;
- RtsFlags.GranFlags.gran_munpacktime = MSGUNPACKTIME;
- RtsFlags.GranFlags.gran_mtidytime = MSGTIDYTIME;
-
- RtsFlags.GranFlags.gran_threadcreatetime = THREADCREATETIME;
- RtsFlags.GranFlags.gran_threadqueuetime = THREADQUEUETIME;
- RtsFlags.GranFlags.gran_threaddescheduletime = THREADDESCHEDULETIME;
- RtsFlags.GranFlags.gran_threadscheduletime = THREADSCHEDULETIME;
- RtsFlags.GranFlags.gran_threadcontextswitchtime = THREADCONTEXTSWITCHTIME;
-
- RtsFlags.GranFlags.gran_arith_cost = ARITH_COST;
- RtsFlags.GranFlags.gran_branch_cost = BRANCH_COST;
- RtsFlags.GranFlags.gran_load_cost = LOAD_COST;
- RtsFlags.GranFlags.gran_store_cost = STORE_COST;
- RtsFlags.GranFlags.gran_float_cost = FLOAT_COST;
-
- RtsFlags.GranFlags.gran_heapalloc_cost = HEAPALLOC_COST;
-
- RtsFlags.GranFlags.gran_pri_spark_overhead = PRI_SPARK_OVERHEAD;
- RtsFlags.GranFlags.gran_pri_sched_overhead = PRI_SCHED_OVERHEAD;
-
- RtsFlags.GranFlags.DoFairSchedule = rtsFalse;
- RtsFlags.GranFlags.DoReScheduleOnFetch = rtsFalse;
- RtsFlags.GranFlags.DoStealThreadsFirst = rtsFalse;
- RtsFlags.GranFlags.SimplifiedFetch = rtsFalse;
- RtsFlags.GranFlags.DoAlwaysCreateThreads = rtsFalse;
- RtsFlags.GranFlags.DoGUMMFetching = rtsFalse;
- RtsFlags.GranFlags.DoThreadMigration = rtsFalse;
- RtsFlags.GranFlags.FetchStrategy = 2;
+ RtsFlags.GranFlags.proc = MAX_PROC;
+ RtsFlags.GranFlags.Fishing = rtsFalse;
+ RtsFlags.GranFlags.maxFishes = MAX_FISHES;
+ RtsFlags.GranFlags.time_slice = GRAN_TIME_SLICE;
+ RtsFlags.GranFlags.Light = rtsFalse;
+
+ RtsFlags.GranFlags.Costs.latency = LATENCY;
+ RtsFlags.GranFlags.Costs.additional_latency = ADDITIONAL_LATENCY;
+ RtsFlags.GranFlags.Costs.fetchtime = FETCHTIME;
+ RtsFlags.GranFlags.Costs.lunblocktime = LOCALUNBLOCKTIME;
+ RtsFlags.GranFlags.Costs.gunblocktime = GLOBALUNBLOCKTIME;
+ RtsFlags.GranFlags.Costs.mpacktime = MSGPACKTIME;
+ RtsFlags.GranFlags.Costs.munpacktime = MSGUNPACKTIME;
+ RtsFlags.GranFlags.Costs.mtidytime = MSGTIDYTIME;
+
+ RtsFlags.GranFlags.Costs.threadcreatetime = THREADCREATETIME;
+ RtsFlags.GranFlags.Costs.threadqueuetime = THREADQUEUETIME;
+ RtsFlags.GranFlags.Costs.threaddescheduletime = THREADDESCHEDULETIME;
+ RtsFlags.GranFlags.Costs.threadscheduletime = THREADSCHEDULETIME;
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = THREADCONTEXTSWITCHTIME;
+
+ RtsFlags.GranFlags.Costs.arith_cost = ARITH_COST;
+ RtsFlags.GranFlags.Costs.branch_cost = BRANCH_COST;
+ RtsFlags.GranFlags.Costs.load_cost = LOAD_COST;
+ RtsFlags.GranFlags.Costs.store_cost = STORE_COST;
+ RtsFlags.GranFlags.Costs.float_cost = FLOAT_COST;
+
+ RtsFlags.GranFlags.Costs.heapalloc_cost = HEAPALLOC_COST;
+
+ RtsFlags.GranFlags.Costs.pri_spark_overhead = PRI_SPARK_OVERHEAD;
+ RtsFlags.GranFlags.Costs.pri_sched_overhead = PRI_SCHED_OVERHEAD;
+
+ RtsFlags.GranFlags.DoFairSchedule = rtsFalse;
+ RtsFlags.GranFlags.DoAsyncFetch = rtsFalse;
+ RtsFlags.GranFlags.DoStealThreadsFirst = rtsFalse;
+ RtsFlags.GranFlags.DoAlwaysCreateThreads = rtsFalse;
+ RtsFlags.GranFlags.DoBulkFetching = rtsFalse;
+ RtsFlags.GranFlags.DoThreadMigration = rtsFalse;
+ RtsFlags.GranFlags.FetchStrategy = 2;
RtsFlags.GranFlags.PreferSparksOfLocalNodes = rtsFalse;
- RtsFlags.GranFlags.DoPrioritySparking = rtsFalse;
- RtsFlags.GranFlags.DoPriorityScheduling = rtsFalse;
- RtsFlags.GranFlags.SparkPriority = 0;
- RtsFlags.GranFlags.SparkPriority2 = 0;
- RtsFlags.GranFlags.RandomPriorities = rtsFalse;
- RtsFlags.GranFlags.InversePriorities = rtsFalse;
- RtsFlags.GranFlags.IgnorePriorities = rtsFalse;
- RtsFlags.GranFlags.ThunksToPack = 0;
- RtsFlags.GranFlags.RandomSteal = rtsTrue;
- RtsFlags.GranFlags.NoForward = rtsFalse;
- RtsFlags.GranFlags.PrintFetchMisses = rtsFalse;
-
- RtsFlags.GranFlags.debug = 0x0;
- RtsFlags.GranFlags.event_trace = rtsFalse;
- RtsFlags.GranFlags.event_trace_all = rtsFalse;
+ RtsFlags.GranFlags.DoPrioritySparking = rtsFalse;
+ RtsFlags.GranFlags.DoPriorityScheduling = rtsFalse;
+ RtsFlags.GranFlags.SparkPriority = 0;
+ RtsFlags.GranFlags.SparkPriority2 = 0;
+ RtsFlags.GranFlags.RandomPriorities = rtsFalse;
+ RtsFlags.GranFlags.InversePriorities = rtsFalse;
+ RtsFlags.GranFlags.IgnorePriorities = rtsFalse;
+ RtsFlags.GranFlags.ThunksToPack = 0;
+ RtsFlags.GranFlags.RandomSteal = rtsTrue;
#endif
#ifdef TICKY_TICKY
# ifdef SMP
" -N<n> Use <n> OS threads (default: 1)",
# endif
+" -e<size> Size of spark pools (default 100)",
+" -t<num> Set maximum number of advisory threads per PE (default 32)",
+" -o<num> Set stack chunk size (default 1024)",
+
# ifdef PAR
-" -q Enable activity profile (output files in ~/<program>*.gr)",
-" -qb Enable binary activity profile (output file /tmp/<program>.gb)",
-" -Q<size> Set pack-buffer size (default: 1024)",
+" -qP Enable activity profile (output files in ~/<program>*.gr)",
+" -qQ<size> Set pack-buffer size (default: 1024)",
+" -qd Turn on PVM-ish debugging",
+" -qO Disable output for performance measurement",
# endif
# if defined(SMP) || defined(PAR)
" -e<n> Maximum number of outstanding local sparks (default: 4096)",
if ((n>>7)&1) RtsFlags.DebugFlags.sanity = rtsTrue;
if ((n>>8)&1) RtsFlags.DebugFlags.stable = rtsTrue;
if ((n>>9)&1) RtsFlags.DebugFlags.prof = rtsTrue;
+ if ((n>>10)&1) RtsFlags.DebugFlags.gran = rtsTrue;
+ if ((n>>11)&1) RtsFlags.DebugFlags.par = rtsTrue;
}
break;
#endif
RtsFlags.GcFlags.giveStats ++;
#ifdef PAR
/* Opening all those files would almost certainly fail... */
- RtsFlags.ParFlags.parallelStats = rtsTrue;
+ RtsFlags.ParFlags.ParStats.Full = rtsTrue;
RtsFlags.GcFlags.statsFile = stderr; /* temporary; ToDo: rm */
#else
RtsFlags.GcFlags.statsFile
}
) break;
- case 'O':
+ case 'q':
PAR_BUILD_ONLY(
- RtsFlags.ParFlags.outputDisabled = rtsTrue;
- ) break;
-
- case 'q': /* activity profile option */
- PAR_BUILD_ONLY(
- if (rts_argv[arg][2] == 'b')
- RtsFlags.ParFlags.granSimStats_Binary = rtsTrue;
- else
- RtsFlags.ParFlags.granSimStats = rtsTrue;
- ) break;
-
-#if 0 /* or??? */
- case 'q': /* quasi-parallel profile option */
- GRAN_BUILD_ONLY (
- if (rts_argv[arg][2] == 'v')
- do_qp_prof = 2;
- else
- do_qp_prof++;
- ) break;
-#endif /* 0??? */
-
- case 'Q': /* Set pack buffer size */
- PAR_BUILD_ONLY(
- if (rts_argv[arg][2] != '\0') {
- RtsFlags.ParFlags.packBufferSize = decode(rts_argv[arg]+2);
- } else {
- prog_belch("missing size of PackBuffer (for -Q)");
- error = rtsTrue;
- }
+ process_par_option(arg, rts_argc, rts_argv, &error);
) break;
/* =========== GRAN =============================== */
}
}
+#if defined(GRAN)
+
+//@node GranSim specific options, Aux fcts, Command-line option parsing routines
+//@subsection GranSim specific options
+
+static void
+enable_GranSimLight(void) {
+
+ fprintf(stderr,"GrAnSim Light enabled (infinite number of processors; 0 communication costs)\n");
+ RtsFlags.GranFlags.Light=rtsTrue;
+ RtsFlags.GranFlags.Costs.latency =
+ RtsFlags.GranFlags.Costs.fetchtime =
+ RtsFlags.GranFlags.Costs.additional_latency =
+ RtsFlags.GranFlags.Costs.gunblocktime =
+ RtsFlags.GranFlags.Costs.lunblocktime =
+ RtsFlags.GranFlags.Costs.threadcreatetime =
+ RtsFlags.GranFlags.Costs.threadqueuetime =
+ RtsFlags.GranFlags.Costs.threadscheduletime =
+ RtsFlags.GranFlags.Costs.threaddescheduletime =
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0;
+
+ RtsFlags.GranFlags.Costs.mpacktime =
+ RtsFlags.GranFlags.Costs.munpacktime = 0;
+
+ RtsFlags.GranFlags.DoFairSchedule = rtsTrue;
+ RtsFlags.GranFlags.DoAsyncFetch = rtsFalse;
+ RtsFlags.GranFlags.DoAlwaysCreateThreads = rtsTrue;
+ /* FetchStrategy is irrelevant in GrAnSim-Light */
+
+ /* GrAnSim Light often creates an abundance of parallel threads,
+ each with its own stack etc. Therefore, it's in general a good
+ idea to use small stack chunks (use the -o<size> option to
+ increase it again).
+ */
+ // RtsFlags.ConcFlags.stkChunkSize = 100;
+
+ RtsFlags.GranFlags.proc = 1;
+}
+
+static void
+process_gran_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error)
+{
+ if (rts_argv[arg][1] != 'b') /* All GranSim options start with -b */
+ return;
+
+ /* or a ridiculously idealised simulator */
+ if(strcmp((rts_argv[arg]+2),"oring")==0) {
+ RtsFlags.GranFlags.Costs.latency =
+ RtsFlags.GranFlags.Costs.fetchtime =
+ RtsFlags.GranFlags.Costs.additional_latency =
+ RtsFlags.GranFlags.Costs.gunblocktime =
+ RtsFlags.GranFlags.Costs.lunblocktime =
+ RtsFlags.GranFlags.Costs.threadcreatetime =
+ RtsFlags.GranFlags.Costs.threadqueuetime =
+ RtsFlags.GranFlags.Costs.threadscheduletime =
+ RtsFlags.GranFlags.Costs.threaddescheduletime =
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0;
+
+ RtsFlags.GranFlags.Costs.mpacktime =
+ RtsFlags.GranFlags.Costs.munpacktime = 0;
+
+ RtsFlags.GranFlags.Costs.arith_cost =
+ RtsFlags.GranFlags.Costs.float_cost =
+ RtsFlags.GranFlags.Costs.load_cost =
+ RtsFlags.GranFlags.Costs.store_cost =
+ RtsFlags.GranFlags.Costs.branch_cost = 0;
+
+ RtsFlags.GranFlags.Costs.heapalloc_cost = 1;
+
+ /* ++RtsFlags.GranFlags.DoFairSchedule; */
+ RtsFlags.GranFlags.DoStealThreadsFirst = rtsTrue; /* -bZ */
+ RtsFlags.GranFlags.DoThreadMigration = rtsTrue; /* -bM */
+ RtsFlags.GranFlags.GranSimStats.Full = rtsTrue; /* -bP */
+ return;
+ }
+
+ /* or a somewhat idealised simulator */
+ if(strcmp((rts_argv[arg]+2),"onzo")==0) {
+ RtsFlags.GranFlags.Costs.latency =
+ RtsFlags.GranFlags.Costs.fetchtime =
+ RtsFlags.GranFlags.Costs.additional_latency =
+ RtsFlags.GranFlags.Costs.gunblocktime =
+ RtsFlags.GranFlags.Costs.lunblocktime =
+ RtsFlags.GranFlags.Costs.threadcreatetime =
+ RtsFlags.GranFlags.Costs.threadqueuetime =
+ RtsFlags.GranFlags.Costs.threadscheduletime =
+ RtsFlags.GranFlags.Costs.threaddescheduletime =
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0;
+
+ RtsFlags.GranFlags.Costs.mpacktime =
+ RtsFlags.GranFlags.Costs.munpacktime = 0;
+
+ RtsFlags.GranFlags.Costs.heapalloc_cost = 1;
+
+ /* RtsFlags.GranFlags.DoFairSchedule = rtsTrue; */ /* -b-R */
+ /* RtsFlags.GranFlags.DoStealThreadsFirst = rtsTrue; */ /* -b-T */
+ RtsFlags.GranFlags.DoAsyncFetch = rtsTrue; /* -bZ */
+ RtsFlags.GranFlags.DoThreadMigration = rtsTrue; /* -bM */
+ RtsFlags.GranFlags.GranSimStats.Full = rtsTrue; /* -bP */
+# if defined(GRAN_CHECK) && defined(GRAN)
+ RtsFlags.GranFlags.Debug.event_stats = rtsTrue; /* print event statistics */
+# endif
+ return;
+ }
+
+ /* Communication and task creation cost parameters */
+ switch(rts_argv[arg][2]) {
+ case '.':
+ IgnoreYields = rtsTrue; // HWL HACK
+ break;
+
+ case ':':
+ enable_GranSimLight(); /* set flags for GrAnSim-Light mode */
+ break;
+
+ case 'l':
+ if (rts_argv[arg][3] != '\0')
+ {
+ RtsFlags.GranFlags.Costs.gunblocktime =
+ RtsFlags.GranFlags.Costs.latency = decode(rts_argv[arg]+3);
+ RtsFlags.GranFlags.Costs.fetchtime = 2*RtsFlags.GranFlags.Costs.latency;
+ }
+ else
+ RtsFlags.GranFlags.Costs.latency = LATENCY;
+ break;
+
+ case 'a':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.additional_latency = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.additional_latency = ADDITIONAL_LATENCY;
+ break;
+
+ case 'm':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.mpacktime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.mpacktime = MSGPACKTIME;
+ break;
+
+ case 'x':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.mtidytime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.mtidytime = 0;
+ break;
+
+ case 'r':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.munpacktime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.munpacktime = MSGUNPACKTIME;
+ break;
+
+ case 'g':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.fetchtime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.fetchtime = FETCHTIME;
+ break;
+
+ case 'n':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.gunblocktime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.gunblocktime = GLOBALUNBLOCKTIME;
+ break;
+
+ case 'u':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.lunblocktime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.lunblocktime = LOCALUNBLOCKTIME;
+ break;
+
+ /* Thread-related metrics */
+ case 't':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.threadcreatetime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.threadcreatetime = THREADCREATETIME;
+ break;
+
+ case 'q':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.threadqueuetime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.threadqueuetime = THREADQUEUETIME;
+ break;
+
+ case 'c':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.threadscheduletime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.threadscheduletime = THREADSCHEDULETIME;
+
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = RtsFlags.GranFlags.Costs.threadscheduletime
+ + RtsFlags.GranFlags.Costs.threaddescheduletime;
+ break;
+
+ case 'd':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.threaddescheduletime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.threaddescheduletime = THREADDESCHEDULETIME;
+
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = RtsFlags.GranFlags.Costs.threadscheduletime
+ + RtsFlags.GranFlags.Costs.threaddescheduletime;
+ break;
+
+ /* Instruction Cost Metrics */
+ case 'A':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.arith_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.arith_cost = ARITH_COST;
+ break;
+
+ case 'F':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.float_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.float_cost = FLOAT_COST;
+ break;
+
+ case 'B':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.branch_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.branch_cost = BRANCH_COST;
+ break;
+
+ case 'L':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.load_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.load_cost = LOAD_COST;
+ break;
+
+ case 'S':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.store_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.store_cost = STORE_COST;
+ break;
+
+ case 'H':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.heapalloc_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.heapalloc_cost = 0;
+ break;
+
+ case 'y':
+ RtsFlags.GranFlags.DoAsyncFetch = rtsTrue;
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.FetchStrategy = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.FetchStrategy = 2;
+ if (RtsFlags.GranFlags.FetchStrategy == 0)
+ RtsFlags.GranFlags.DoAsyncFetch = rtsFalse;
+ break;
+
+ case 'K': /* sort overhead (per elem in spark list) */
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.pri_spark_overhead = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.pri_spark_overhead = PRI_SPARK_OVERHEAD;
+ fprintf(stderr,"Overhead for pri spark: %d (per elem).\n",
+ RtsFlags.GranFlags.Costs.pri_spark_overhead);
+ break;
+
+ case 'O': /* sort overhead (per elem in spark list) */
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.pri_sched_overhead = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.pri_sched_overhead = PRI_SCHED_OVERHEAD;
+ fprintf(stderr,"Overhead for pri sched: %d (per elem).\n",
+ RtsFlags.GranFlags.Costs.pri_sched_overhead);
+ break;
+
+ /* General Parameters */
+ case 'p':
+ if (rts_argv[arg][3] != '\0')
+ {
+ RtsFlags.GranFlags.proc = decode(rts_argv[arg]+3);
+ if (RtsFlags.GranFlags.proc==0) {
+ enable_GranSimLight(); /* set flags for GrAnSim-Light mode */
+ } else if (RtsFlags.GranFlags.proc > MAX_PROC ||
+ RtsFlags.GranFlags.proc < 1)
+ {
+ fprintf(stderr,"setupRtsFlags: no more than %u processors
+allowed\n",
+ MAX_PROC);
+ *error = rtsTrue;
+ }
+ }
+ else
+ RtsFlags.GranFlags.proc = MAX_PROC;
+ break;
+
+ case 'f':
+ RtsFlags.GranFlags.Fishing = rtsTrue;
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.maxFishes = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.maxFishes = MAX_FISHES;
+ break;
+
+ case 'w':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.time_slice = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.time_slice = GRAN_TIME_SLICE;
+ break;
+
+ case 'C':
+ RtsFlags.GranFlags.DoAlwaysCreateThreads=rtsTrue;
+ RtsFlags.GranFlags.DoThreadMigration=rtsTrue;
+ break;
+
+ case 'G':
+ fprintf(stderr,"Bulk fetching enabled.\n");
+ RtsFlags.GranFlags.DoBulkFetching=rtsTrue;
+ break;
+
+ case 'M':
+ fprintf(stderr,"Thread migration enabled.\n");
+ RtsFlags.GranFlags.DoThreadMigration=rtsTrue;
+ break;
+
+ case 'R':
+ fprintf(stderr,"Fair Scheduling enabled.\n");
+ RtsFlags.GranFlags.DoFairSchedule=rtsTrue;
+ break;
+
+ case 'I':
+ fprintf(stderr,"Priority Scheduling enabled.\n");
+ RtsFlags.GranFlags.DoPriorityScheduling=rtsTrue;
+ break;
+
+ case 'T':
+ RtsFlags.GranFlags.DoStealThreadsFirst=rtsTrue;
+ RtsFlags.GranFlags.DoThreadMigration=rtsTrue;
+ break;
+
+ case 'Z':
+ RtsFlags.GranFlags.DoAsyncFetch=rtsTrue;
+ break;
+
+/* case 'z': */
+/* RtsFlags.GranFlags.SimplifiedFetch=rtsTrue; */
+/* break; */
+
+ case 'N':
+ RtsFlags.GranFlags.PreferSparksOfLocalNodes=rtsTrue;
+ break;
+
+ case 'b':
+ RtsFlags.GranFlags.GranSimStats.Binary=rtsTrue;
+ break;
+
+ case 'P':
+ /* format is -bP<c> where <c> is one char describing kind of profile */
+ RtsFlags.GranFlags.GranSimStats.Full = rtsTrue;
+ switch(rts_argv[arg][3]) {
+ case '\0': break; // nothing special, just an ordinary profile
+ case '0': RtsFlags.GranFlags.GranSimStats.Suppressed = rtsTrue;
+ break;
+ case 'b': RtsFlags.GranFlags.GranSimStats.Binary = rtsTrue;
+ break;
+ case 's': RtsFlags.GranFlags.GranSimStats.Sparks = rtsTrue;
+ break;
+ case 'h': RtsFlags.GranFlags.GranSimStats.Heap = rtsTrue;
+ break;
+ case 'n': RtsFlags.GranFlags.GranSimStats.NewLogfile = rtsTrue;
+ break;
+ case 'g': RtsFlags.GranFlags.GranSimStats.Global = rtsTrue;
+ break;
+ default: barf("Unknown option -bP%c", rts_argv[arg][3]);
+ }
+ break;
+
+ case 's':
+ RtsFlags.GranFlags.GranSimStats.Sparks=rtsTrue;
+ break;
+
+ case 'h':
+ RtsFlags.GranFlags.GranSimStats.Heap=rtsTrue;
+ break;
+
+ case 'Y': /* syntax: -bY<n>[,<n>] n ... pos int */
+ if (rts_argv[arg][3] != '\0') {
+ char *arg0, *tmp;
+
+ arg0 = rts_argv[arg]+3;
+ if ((tmp = strstr(arg0,","))==NULL) {
+ RtsFlags.GranFlags.SparkPriority = decode(arg0);
+ fprintf(stderr,"SparkPriority: %u.\n",RtsFlags.GranFlags.SparkPriority);
+ } else {
+ *(tmp++) = '\0';
+ RtsFlags.GranFlags.SparkPriority = decode(arg0);
+ RtsFlags.GranFlags.SparkPriority2 = decode(tmp);
+ fprintf(stderr,"SparkPriority: %u.\n",
+ RtsFlags.GranFlags.SparkPriority);
+ fprintf(stderr,"SparkPriority2:%u.\n",
+ RtsFlags.GranFlags.SparkPriority2);
+ if (RtsFlags.GranFlags.SparkPriority2 <
+ RtsFlags.GranFlags.SparkPriority) {
+ fprintf(stderr,"WARNING: 2nd pri < main pri (%u<%u); 2nd pri has no effect\n",
+ RtsFlags.GranFlags.SparkPriority2,
+ RtsFlags.GranFlags.SparkPriority);
+ }
+ }
+ } else {
+ /* plain pri spark is now invoked with -bX
+ RtsFlags.GranFlags.DoPrioritySparking = 1;
+ fprintf(stderr,"PrioritySparking.\n");
+ */
+ }
+ break;
+
+ case 'Q':
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.GranFlags.ThunksToPack = decode(rts_argv[arg]+3);
+ } else {
+ RtsFlags.GranFlags.ThunksToPack = 1;
+ }
+ fprintf(stderr,"Thunks To Pack in one packet: %u.\n",
+ RtsFlags.GranFlags.ThunksToPack);
+ break;
+
+ case 'e':
+ RtsFlags.GranFlags.RandomSteal = rtsFalse;
+ fprintf(stderr,"Deterministic mode (no random stealing)\n");
+ break;
+
+ /* The following class of options contains eXperimental */
+ /* features in connection with exploiting granularity */
+ /* information. I.e. if -bY is chosen these options */
+ /* tell the RTS what to do with the supplied info --HWL */
+
+ case 'W':
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.GranFlags.packBufferSize_internal = decode(rts_argv[arg]+3);
+ } else {
+ RtsFlags.GranFlags.packBufferSize_internal = GRANSIM_DEFAULT_PACK_BUFFER_SIZE;
+ }
+ fprintf(stderr,"Size of GranSim internal pack buffer: %u.\n",
+ RtsFlags.GranFlags.packBufferSize_internal);
+ break;
+
+ case 'X':
+ switch(rts_argv[arg][3]) {
+
+ case '\0':
+ RtsFlags.GranFlags.DoPrioritySparking = 1;
+ fprintf(stderr,"Priority Sparking with Normal Priorities.\n");
+ RtsFlags.GranFlags.InversePriorities = rtsFalse;
+ RtsFlags.GranFlags.RandomPriorities = rtsFalse;
+ RtsFlags.GranFlags.IgnorePriorities = rtsFalse;
+ break;
+
+ case 'I':
+ RtsFlags.GranFlags.DoPrioritySparking = 1;
+ fprintf(stderr,"Priority Sparking with Inverse Priorities.\n");
+ RtsFlags.GranFlags.InversePriorities++;
+ break;
+
+ case 'R':
+ RtsFlags.GranFlags.DoPrioritySparking = 1;
+ fprintf(stderr,"Priority Sparking with Random Priorities.\n");
+ RtsFlags.GranFlags.RandomPriorities++;
+ break;
+
+ case 'N':
+ RtsFlags.GranFlags.DoPrioritySparking = 1;
+ fprintf(stderr,"Priority Sparking with No Priorities.\n");
+ RtsFlags.GranFlags.IgnorePriorities++;
+ break;
+
+ default:
+ bad_option( rts_argv[arg] );
+ break;
+ }
+ break;
+
+ case '-':
+ switch(rts_argv[arg][3]) {
+
+ case 'C':
+ RtsFlags.GranFlags.DoAlwaysCreateThreads=rtsFalse;
+ RtsFlags.GranFlags.DoThreadMigration=rtsFalse;
+ break;
+
+ case 'G':
+ RtsFlags.GranFlags.DoBulkFetching=rtsFalse;
+ break;
+
+ case 'M':
+ RtsFlags.GranFlags.DoThreadMigration=rtsFalse;
+ break;
+
+ case 'R':
+ RtsFlags.GranFlags.DoFairSchedule=rtsFalse;
+ break;
+
+ case 'T':
+ RtsFlags.GranFlags.DoStealThreadsFirst=rtsFalse;
+ RtsFlags.GranFlags.DoThreadMigration=rtsFalse;
+ break;
+
+ case 'Z':
+ RtsFlags.GranFlags.DoAsyncFetch=rtsFalse;
+ break;
+
+ case 'N':
+ RtsFlags.GranFlags.PreferSparksOfLocalNodes=rtsFalse;
+ break;
+
+ case 'P':
+ RtsFlags.GranFlags.GranSimStats.Suppressed=rtsTrue;
+ break;
+
+ case 's':
+ RtsFlags.GranFlags.GranSimStats.Sparks=rtsFalse;
+ break;
+
+ case 'h':
+ RtsFlags.GranFlags.GranSimStats.Heap=rtsFalse;
+ break;
+
+ case 'b':
+ RtsFlags.GranFlags.GranSimStats.Binary=rtsFalse;
+ break;
+
+ case 'X':
+ RtsFlags.GranFlags.DoPrioritySparking = rtsFalse;
+ break;
+
+ case 'Y':
+ RtsFlags.GranFlags.DoPrioritySparking = rtsFalse;
+ RtsFlags.GranFlags.SparkPriority = rtsFalse;
+ break;
+
+ case 'I':
+ RtsFlags.GranFlags.DoPriorityScheduling = rtsFalse;
+ break;
+
+ case 'e':
+ RtsFlags.GranFlags.RandomSteal = rtsFalse;
+ break;
+
+ default:
+ bad_option( rts_argv[arg] );
+ break;
+ }
+ break;
+
+# if defined(GRAN_CHECK) && defined(GRAN)
+ case 'D':
+ switch(rts_argv[arg][3]) {
+ case 'Q': /* Set pack buffer size (same as 'Q' in GUM) */
+ if (rts_argv[arg][4] != '\0') {
+ RtsFlags.GranFlags.packBufferSize = decode(rts_argv[arg]+4);
+ fprintf(stderr,"Pack buffer size: %d\n",
+ RtsFlags.GranFlags.packBufferSize);
+ } else {
+ fprintf(stderr, "setupRtsFlags: missing size of PackBuffer (for -Q)\n");
+ *error = rtsTrue;
+ }
+ break;
+
+ default:
+ if (isdigit(rts_argv[arg][3])) {/* Set all debugging options in one */
+ /* hack warning: interpret the flags as a binary number */
+ nat n = decode(rts_argv[arg]+3);
+ set_GranSim_debug_options(n);
+ } else {
+ nat i;
+ for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++)
+ if (rts_argv[arg][3] == gran_debug_opts_flags[i])
+ break;
+
+ if (i==MAX_GRAN_DEBUG_OPTION+1) {
+ fprintf(stderr, "Valid GranSim debug options are:\n");
+ help_GranSim_debug_options(MAX_GRAN_DEBUG_MASK);
+ bad_option( rts_argv[arg] );
+ } else { // flag found; now set it
+ set_GranSim_debug_options(GRAN_DEBUG_MASK(i)); // 2^i
+ }
+ }
+ break;
+
+#if 0
+ case 'e': /* event trace; also -bD1 */
+ fprintf(stderr,"DEBUG: event_trace; printing event trace.\n");
+ RtsFlags.GranFlags.Debug.event_trace = rtsTrue;
+ /* RtsFlags.GranFlags.event_trace=rtsTrue; */
+ break;
+
+ case 'E': /* event statistics; also -bD2 */
+ fprintf(stderr,"DEBUG: event_stats; printing event statistics.\n");
+ RtsFlags.GranFlags.Debug.event_stats = rtsTrue;
+ /* RtsFlags.GranFlags.Debug |= 0x20; print event statistics */
+ break;
+
+ case 'f': /* thunkStealing; also -bD4 */
+ fprintf(stderr,"DEBUG: thunkStealing; printing forwarding of FETCHNODES.\n");
+ RtsFlags.GranFlags.Debug.thunkStealing = rtsTrue;
+ /* RtsFlags.GranFlags.Debug |= 0x2; print fwd messages */
+ break;
+
+ case 'z': /* blockOnFetch; also -bD8 */
+ fprintf(stderr,"DEBUG: blockOnFetch; check for blocked on fetch.\n");
+ RtsFlags.GranFlags.Debug.blockOnFetch = rtsTrue;
+ /* RtsFlags.GranFlags.Debug |= 0x4; debug non-reschedule-on-fetch */
+ break;
+
+ case 't': /* blockOnFetch_sanity; also -bD16 */
+ fprintf(stderr,"DEBUG: blockOnFetch_sanity; check for TSO asleep on fetch.\n");
+ RtsFlags.GranFlags.Debug.blockOnFetch_sanity = rtsTrue;
+ /* RtsFlags.GranFlags.Debug |= 0x10; debug TSO asleep for fetch */
+ break;
+
+ case 'S': /* priSpark; also -bD32 */
+ fprintf(stderr,"DEBUG: priSpark; priority sparking.\n");
+ RtsFlags.GranFlags.Debug.priSpark = rtsTrue;
+ break;
+
+ case 's': /* priSched; also -bD64 */
+ fprintf(stderr,"DEBUG: priSched; priority scheduling.\n");
+ RtsFlags.GranFlags.Debug.priSched = rtsTrue;
+ break;
+
+ case 'F': /* findWork; also -bD128 */
+ fprintf(stderr,"DEBUG: findWork; searching spark-pools (local & remote), thread queues for work.\n");
+ RtsFlags.GranFlags.Debug.findWork = rtsTrue;
+ break;
+
+ case 'g': /* globalBlock; also -bD256 */
+ fprintf(stderr,"DEBUG: globalBlock; blocking on remote closures (FETCHMEs etc in GUM).\n");
+ RtsFlags.GranFlags.Debug.globalBlock = rtsTrue;
+ break;
+
+ case 'G': /* pack; also -bD512 */
+ fprintf(stderr,"DEBUG: pack; routines for (un-)packing graph structures.\n");
+ RtsFlags.GranFlags.Debug.pack = rtsTrue;
+ break;
+
+ case 'P': /* packBuffer; also -bD1024 */
+ fprintf(stderr,"DEBUG: packBuffer; routines handling pack buffer (GranSim internal!).\n");
+ RtsFlags.GranFlags.Debug.packBuffer = rtsTrue;
+ break;
+
+ case 'o': /* sortedQ; also -bD2048 */
+ fprintf(stderr,"DEBUG: sortedQ; check whether spark/thread queues are sorted.\n");
+ RtsFlags.GranFlags.Debug.sortedQ = rtsTrue;
+ break;
+
+ case 'r': /* randomSteal; also -bD4096 */
+ fprintf(stderr,"DEBUG: randomSteal; stealing sparks/threads from random PEs.\n");
+ RtsFlags.GranFlags.Debug.randomSteal = rtsTrue;
+ break;
+
+ case 'q': /* checkSparkQ; also -bD8192 */
+ fprintf(stderr,"DEBUG: checkSparkQ; check consistency of the spark queues.\n");
+ RtsFlags.GranFlags.Debug.checkSparkQ = rtsTrue;
+ break;
+
+ case ':': /* checkLight; also -bD16384 */
+ fprintf(stderr,"DEBUG: checkLight; check GranSim-Light setup.\n");
+ RtsFlags.GranFlags.Debug.checkLight = rtsTrue;
+ break;
+
+ case 'b': /* bq; also -bD32768 */
+ fprintf(stderr,"DEBUG: bq; check blocking queues\n");
+ RtsFlags.GranFlags.Debug.bq = rtsTrue;
+ break;
+
+ case 'd': /* all options turned on */
+ fprintf(stderr,"DEBUG: all options turned on.\n");
+ set_GranSim_debug_options(MAX_GRAN_DEBUG_MASK);
+ /* RtsFlags.GranFlags.Debug |= 0x40; */
+ break;
+
+/* case '\0': */
+/* RtsFlags.GranFlags.Debug = 1; */
+/* break; */
+#endif
+
+ }
+ break;
+# endif /* GRAN_CHECK */
+ default:
+ bad_option( rts_argv[arg] );
+ break;
+ }
+}
+
+/*
+ Interpret n as a binary number masking GranSim debug options and set the
+ correxponding option. See gran_debug_opts_strs for explanations of the flags.
+*/
+static void
+set_GranSim_debug_options(nat n) {
+ nat i;
+
+ for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++)
+ if ((n>>i)&1) {
+ fprintf(stderr, gran_debug_opts_strs[i]);
+ switch (i) {
+ case 0: RtsFlags.GranFlags.Debug.event_trace = rtsTrue; break;
+ case 1: RtsFlags.GranFlags.Debug.event_stats = rtsTrue; break;
+ case 2: RtsFlags.GranFlags.Debug.bq = rtsTrue; break;
+ case 3: RtsFlags.GranFlags.Debug.pack = rtsTrue; break;
+ case 4: RtsFlags.GranFlags.Debug.checkSparkQ = rtsTrue; break;
+ case 5: RtsFlags.GranFlags.Debug.thunkStealing = rtsTrue; break;
+ case 6: RtsFlags.GranFlags.Debug.randomSteal = rtsTrue; break;
+ case 7: RtsFlags.GranFlags.Debug.findWork = rtsTrue; break;
+ case 8: RtsFlags.GranFlags.Debug.unused = rtsTrue; break;
+ case 9: RtsFlags.GranFlags.Debug.pri = rtsTrue; break;
+ case 10: RtsFlags.GranFlags.Debug.checkLight = rtsTrue; break;
+ case 11: RtsFlags.GranFlags.Debug.sortedQ = rtsTrue; break;
+ case 12: RtsFlags.GranFlags.Debug.blockOnFetch = rtsTrue; break;
+ case 13: RtsFlags.GranFlags.Debug.packBuffer = rtsTrue; break;
+ case 14: RtsFlags.GranFlags.Debug.blockOnFetch_sanity = rtsTrue; break;
+ default: barf("set_GranSim_debug_options: only %d debug options expected");
+ } /* switch */
+ } /* if */
+}
+
+/*
+ Print one line explanation for each of the GranSim debug options specified
+ in the bitmask n.
+*/
+static void
+help_GranSim_debug_options(nat n) {
+ nat i;
+
+ for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++)
+ if ((n>>i)&1)
+ fprintf(stderr, gran_debug_opts_strs[i]);
+}
+
+# elif defined(PAR)
+
+static void
+process_par_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error)
+{
+ if (rts_argv[arg][1] != 'q') /* All GUM options start with -q */
+ return;
+
+ /* Communication and task creation cost parameters */
+ switch(rts_argv[arg][2]) {
+ case 'e': /* -qe<n> ... allow <n> local sparks */
+ if (rts_argv[arg][3] != '\0') { /* otherwise, stick w/ the default */
+ RtsFlags.ParFlags.maxLocalSparks
+ = strtol(rts_argv[arg]+3, (char **) NULL, 10);
+
+ if (RtsFlags.ParFlags.maxLocalSparks <= 0) {
+ belch("setupRtsFlags: bad value for -e\n");
+ *error = rtsTrue;
+ }
+ }
+ IF_PAR_DEBUG(verbose,
+ belch("-qe<n>: max %d local sparks",
+ RtsFlags.ParFlags.maxLocalSparks));
+ break;
+
+ case 't':
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.ParFlags.maxThreads
+ = strtol(rts_argv[arg]+3, (char **) NULL, 10);
+ } else {
+ belch("setupRtsFlags: missing size for -qt\n");
+ *error = rtsTrue;
+ }
+ IF_PAR_DEBUG(verbose,
+ belch("-qt<n>: max %d threads",
+ RtsFlags.ParFlags.maxThreads));
+ break;
+
+ case 'f':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.ParFlags.maxFishes = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.ParFlags.maxFishes = MAX_FISHES;
+ break;
+ IF_PAR_DEBUG(verbose,
+ belch("-qf<n>: max %d fishes sent out at one time",
+ RtsFlags.ParFlags.maxFishes));
+ break;
+
+
+ case 'd':
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.ParFlags.fishDelay
+ = strtol(rts_argv[arg]+3, (char **) NULL, 10);
+ } else {
+ belch("setupRtsFlags: missing fish delay time for -qd\n");
+ *error = rtsTrue;
+ }
+ IF_PAR_DEBUG(verbose,
+ belch("-qd<n>: fish delay time %d",
+ RtsFlags.ParFlags.fishDelay));
+ break;
+
+ case 'O':
+ RtsFlags.ParFlags.outputDisabled = rtsTrue;
+ IF_PAR_DEBUG(verbose,
+ belch("-qO: output disabled"));
+ break;
+
+ case 'P': /* -qP for writing a log file */
+ RtsFlags.ParFlags.ParStats.Full = rtsTrue;
+ /* same encoding as in GranSim after -bP */
+ switch(rts_argv[arg][3]) {
+ case '\0': break; // nothing special, just an ordinary profile
+ //case '0': RtsFlags.ParFlags.ParStats.Suppressed = rtsTrue;
+ // break;
+ case 'b': RtsFlags.ParFlags.ParStats.Binary = rtsTrue;
+ break;
+ case 's': RtsFlags.ParFlags.ParStats.Sparks = rtsTrue;
+ break;
+ //case 'h': RtsFlags.parFlags.ParStats.Heap = rtsTrue;
+ // break;
+ case 'n': RtsFlags.ParFlags.ParStats.NewLogfile = rtsTrue;
+ break;
+ case 'g': RtsFlags.ParFlags.ParStats.Global = rtsTrue;
+ break;
+ default: barf("Unknown option -qP%c", rts_argv[arg][2]);
+ }
+ IF_PAR_DEBUG(verbose,
+ belch("(-qP) writing to log-file (RtsFlags.ParFlags.ParStats.Full=%s)",
+ (RtsFlags.ParFlags.ParStats.Full ? "rtsTrue" : "rtsFalse")));
+ break;
+
+ case 'Q': /* -qQ<n> ... set pack buffer size to <n> */
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.ParFlags.packBufferSize = decode(rts_argv[arg]+3);
+ } else {
+ belch("setupRtsFlags: missing size of PackBuffer (for -Q)\n");
+ error = rtsTrue;
+ }
+ IF_PAR_DEBUG(verbose,
+ belch("-qQ<n>: pack buffer size set to %d",
+ RtsFlags.ParFlags.packBufferSize));
+ break;
+
+# if defined(DEBUG)
+ case 'w':
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.ParFlags.wait
+ = strtol(rts_argv[arg]+3, (char **) NULL, 10);
+ } else {
+ RtsFlags.ParFlags.wait = 1000;
+ }
+ IF_PAR_DEBUG(verbose,
+ belch("-qw<n>: length of wait loop after synchr before reduction: %d",
+ RtsFlags.ParFlags.wait));
+ break;
+
+ case 'D': /* -qD ... all the debugging options */
+ if (isdigit(rts_argv[arg][3])) {/* Set all debugging options in one */
+ /* hack warning: interpret the flags as a binary number */
+ nat n = decode(rts_argv[arg]+3);
+ set_par_debug_options(n);
+ } else {
+ nat i;
+ for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++)
+ if (rts_argv[arg][3] == par_debug_opts_flags[i])
+ break;
+
+ if (i==MAX_PAR_DEBUG_OPTION+1) {
+ fprintf(stderr, "Valid GUM debug options are:\n");
+ help_par_debug_options(MAX_PAR_DEBUG_MASK);
+ bad_option( rts_argv[arg] );
+ } else { // flag found; now set it
+ set_par_debug_options(PAR_DEBUG_MASK(i)); // 2^i
+ }
+ }
+ break;
+# endif
+ default:
+ belch("Unknown option -q%c", rts_argv[arg][2]);
+ break;
+ } /* switch */
+}
+
+/*
+ Interpret n as a binary number masking Par debug options and set the
+ correxponding option. See par_debug_opts_strs for explanations of the flags.
+*/
+static void
+set_par_debug_options(nat n) {
+ nat i;
+
+ for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++)
+ if ((n>>i)&1) {
+ fprintf(stderr, par_debug_opts_strs[i]);
+ switch (i) {
+ case 0: RtsFlags.ParFlags.Debug.verbose = rtsTrue; break;
+ case 1: RtsFlags.ParFlags.Debug.trace = rtsTrue; break;
+ case 2: RtsFlags.ParFlags.Debug.schedule = rtsTrue; break;
+ case 3: RtsFlags.ParFlags.Debug.free = rtsTrue; break;
+ case 4: RtsFlags.ParFlags.Debug.resume = rtsTrue; break;
+ case 5: RtsFlags.ParFlags.Debug.weight = rtsTrue; break;
+ case 6: RtsFlags.ParFlags.Debug.fetch = rtsTrue; break;
+ case 7: RtsFlags.ParFlags.Debug.ack = rtsTrue; break;
+ case 8: RtsFlags.ParFlags.Debug.fish = rtsTrue; break;
+ case 9: RtsFlags.ParFlags.Debug.forward = rtsTrue; break;
+ case 10: RtsFlags.ParFlags.Debug.pack = rtsTrue; break;
+ default: barf("set_par_debug_options: only %d debug options expected");
+ } /* switch */
+ } /* if */
+}
+
+/*
+ Print one line explanation for each of the GranSim debug options specified
+ in the bitmask n.
+*/
+static void
+help_par_debug_options(nat n) {
+ nat i;
+
+ for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++)
+ if ((n>>i)&1)
+ fprintf(stderr, par_debug_opts_strs[i]);
+}
+
+#endif /* GRAN */
+
+//@node Aux fcts, , GranSim specific options
+//@subsection Aux fcts
+
static FILE * /* return NULL on error */
open_stats_file (
I_ arg,
/* -----------------------------------------------------------------------------
- * $Id: RtsFlags.h,v 1.19 2000/01/12 15:15:17 simonmar Exp $
+ * $Id: RtsFlags.h,v 1.20 2000/01/13 14:34:04 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
rtsBool stable : 1; /* 256 */
rtsBool prof : 1; /* 512 */
+ rtsBool gran : 1; /* 1024 */
+ rtsBool par : 1; /* 2048 */
};
#if defined(PROFILING) || defined(PAR)
};
#ifdef PAR
+/* currently the same as GRAN_STATS_FLAGS */
+struct PAR_STATS_FLAGS {
+ rtsBool Full; /* Full .gr profile (rtsTrue) or only END events? */
+ // rtsBool Suppressed; /* No .gr profile at all */
+ rtsBool Binary; /* Binary profile? (not yet implemented) */
+ rtsBool Sparks; /* Info on sparks in profile? */
+ rtsBool Heap; /* Info on heap allocs in profile? */
+ rtsBool NewLogfile; /* Use new log-file format? (not yet implemented) */
+ rtsBool Global; /* Global statistics? (printed on shutdown; no log file) */
+};
+
+struct PAR_DEBUG_FLAGS {
+ /* flags to control debugging output in various subsystems */
+ rtsBool verbose : 1; /* 1 */
+ rtsBool trace : 1; /* 2 */
+ rtsBool schedule : 1; /* 4 */
+ rtsBool free : 1; /* 8 */
+ rtsBool resume : 1; /* 16 */
+ rtsBool weight : 1; /* 32 */
+ rtsBool fetch : 1; /* 64 */
+ rtsBool ack : 1; /* 128 */
+ rtsBool fish : 1; /* 256 */
+ rtsBool forward : 1; /* 512 */
+ rtsBool pack : 1; /* 1024 */
+};
+
+#define MAX_PAR_DEBUG_OPTION 10
+#define PAR_DEBUG_MASK(n) ((nat)(ldexp(1,n)))
+#define MAX_PAR_DEBUG_MASK ((nat)(ldexp(1,(MAX_PAR_DEBUG_OPTION+1))-1))
+
struct PAR_FLAGS {
- rtsBool parallelStats; /* Gather parallel statistics */
- rtsBool granSimStats; /* Full .gr profile (rtsTrue) or only END events? */
- rtsBool granSimStats_Binary;
-
- rtsBool outputDisabled; /* Disable output for performance purposes */
-
- unsigned int packBufferSize;
- unsigned int maxLocalSparks;
+ struct PAR_STATS_FLAGS ParStats; /* profile and stats output */
+ struct PAR_DEBUG_FLAGS Debug; /* debugging options */
+ rtsBool outputDisabled; /* Disable output for performance purposes */
+ nat packBufferSize;
+ nat maxLocalSparks; /* spark pool size */
+ nat maxThreads; /* thread pool size */
+ nat maxFishes; /* max number of active fishes */
+ rtsTime fishDelay; /* delay before sending a new fish */
+ long wait;
};
#endif /* PAR */
nat nNodes; /* number of threads to run simultaneously */
unsigned int maxLocalSparks;
};
-#endif
+#endif /* SMP */
#ifdef GRAN
+struct GRAN_STATS_FLAGS {
+ rtsBool Full; /* Full .gr profile (rtsTrue) or only END events? */
+ rtsBool Suppressed; /* No .gr profile at all */
+ rtsBool Binary; /* Binary profile? (not yet implemented) */
+ rtsBool Sparks; /* Info on sparks in profile? */
+ rtsBool Heap; /* Info on heap allocs in profile? */
+ rtsBool NewLogfile; /* Use new log-file format? (not yet implemented) */
+ rtsBool Global; /* Global statistics? (printed on shutdown; no log file) */
+};
+
+struct GRAN_COST_FLAGS {
+ /* Communication Cost Variables -- set in main program */
+ nat latency; /* Latency for single packet */
+ nat additional_latency; /* Latency for additional packets */
+ nat fetchtime;
+ nat lunblocktime; /* Time for local unblock */
+ nat gunblocktime; /* Time for global unblock */
+ nat mpacktime; /* Cost of creating a packet */
+ nat munpacktime; /* Cost of receiving a packet */
+ nat mtidytime; /* Cost of cleaning up after send */
+
+ nat threadcreatetime; /* Thread creation costs */
+ nat threadqueuetime; /* Cost of adding a thread to the running/runnable queue */
+ nat threaddescheduletime; /* Cost of descheduling a thread */
+ nat threadscheduletime; /* Cost of scheduling a thread */
+ nat threadcontextswitchtime; /* Cost of context switch */
+
+ /* Instruction Costs */
+ nat arith_cost; /* arithmetic instructions (+,i,< etc) */
+ nat branch_cost; /* branch instructions */
+ nat load_cost; /* load into register */
+ nat store_cost; /* store into memory */
+ nat float_cost; /* floating point operations */
+
+ nat heapalloc_cost; /* heap allocation costs */
+
+ /* Overhead for granularity control mechanisms */
+ /* overhead per elem of spark queue */
+ nat pri_spark_overhead;
+ /* overhead per elem of thread queue */
+ nat pri_sched_overhead;
+};
+
+struct GRAN_DEBUG_FLAGS {
+ /* flags to control debugging output in various subsystems */
+ rtsBool event_trace : 1; /* 1 */
+ rtsBool event_stats : 1; /* 2 */
+ rtsBool bq : 1; /* 4 */
+ rtsBool pack : 1; /* 8 */
+ rtsBool checkSparkQ : 1; /* 16 */
+ rtsBool thunkStealing : 1; /* 32 */
+ rtsBool randomSteal : 1; /* 64 */
+ rtsBool findWork : 1; /* 128 */
+ rtsBool unused : 1; /* 256 */
+ rtsBool pri : 1; /* 512 */
+ rtsBool checkLight : 1; /* 1024 */
+ rtsBool sortedQ : 1; /* 2048 */
+ rtsBool blockOnFetch : 1; /* 4096 */
+ rtsBool packBuffer : 1; /* 8192 */
+ rtsBool blockOnFetch_sanity : 1; /* 16384 */
+};
+
+#define MAX_GRAN_DEBUG_OPTION 14
+#define GRAN_DEBUG_MASK(n) ((nat)(ldexp(1,n)))
+#define MAX_GRAN_DEBUG_MASK ((nat)(ldexp(1,(MAX_GRAN_DEBUG_OPTION+1))-1))
+
struct GRAN_FLAGS {
- rtsBool granSimStats; /* Full .gr profile (rtsTrue) or only END events? */
- rtsBool granSimStats_suppressed; /* No .gr profile at all */
- rtsBool granSimStats_Binary;
- rtsBool granSimStats_Sparks;
- rtsBool granSimStats_Heap;
- rtsBool labelling;
- unsigned int packBufferSize;
- unsigned int packBufferSize_internal;
-
- int proc; /* number of processors */
- int max_fishes; /* max number of spark or thread steals */
- TIME time_slice; /* max time slice of one reduction thread */
-
- /* Communication Cost Variables -- set in main program */
- unsigned int gran_latency; /* Latency for single packet */
- unsigned int gran_additional_latency; /* Latency for additional packets */
- unsigned int gran_fetchtime;
- unsigned int gran_lunblocktime; /* Time for local unblock */
- unsigned int gran_gunblocktime; /* Time for global unblock */
- unsigned int gran_mpacktime; /* Cost of creating a packet */
- unsigned int gran_munpacktime; /* Cost of receiving a packet */
- unsigned int gran_mtidytime; /* Cost of cleaning up after send */
-
- unsigned int gran_threadcreatetime; /* Thread creation costs */
- unsigned int gran_threadqueuetime; /* Cost of adding a thread to the running/runnable queue */
- unsigned int gran_threaddescheduletime; /* Cost of descheduling a thread */
- unsigned int gran_threadscheduletime; /* Cost of scheduling a thread */
- unsigned int gran_threadcontextswitchtime; /* Cost of context switch */
-
- /* Instruction Costs */
- unsigned int gran_arith_cost; /* arithmetic instructions (+,i,< etc) */
- unsigned int gran_branch_cost; /* branch instructions */
- unsigned int gran_load_cost; /* load into register */
- unsigned int gran_store_cost; /* store into memory */
- unsigned int gran_float_cost; /* floating point operations */
-
- unsigned int gran_heapalloc_cost; /* heap allocation costs */
-
- /* Overhead for granularity control mechanisms */
- /* overhead per elem of spark queue */
- unsigned int gran_pri_spark_overhead;
- /* overhead per elem of thread queue */
- unsigned int gran_pri_sched_overhead;
+ struct GRAN_STATS_FLAGS GranSimStats; /* profile and stats output */
+ struct GRAN_COST_FLAGS Costs; /* cost metric for simulation */
+ struct GRAN_DEBUG_FLAGS Debug; /* debugging options */
+
+ // rtsBool labelling;
+ nat packBufferSize;
+ nat packBufferSize_internal;
+
+ PEs proc; /* number of processors */
+ rtsBool Fishing; /* Simulate GUM style fishing mechanism? */
+ nat maxFishes; /* max number of spark or thread steals */
+ rtsTime time_slice; /* max time slice of one reduction thread */
/* GrAnSim-Light: This version puts no bound on the number of
processors but in exchange doesn't model communication costs
rtsBool Light;
rtsBool DoFairSchedule ; /* fair scheduling alg? default: unfair */
- rtsBool DoReScheduleOnFetch ; /* async. communication? */
+ rtsBool DoAsyncFetch; /* async. communication? */
rtsBool DoStealThreadsFirst; /* prefer threads over sparks when stealing */
- rtsBool SimplifiedFetch; /* fast but inaccurate fetch modelling */
- rtsBool DoAlwaysCreateThreads; /* eager thread creation */
- rtsBool DoGUMMFetching; /* bulk fetching */
- rtsBool DoThreadMigration; /* allow to move threads */
- int FetchStrategy; /* what to do when waiting for data */
- rtsBool PreferSparksOfLocalNodes; /* prefer local over global sparks */
- rtsBool DoPrioritySparking; /* sparks sorted by priorities */
- rtsBool DoPriorityScheduling; /* threads sorted by priorities */
- int SparkPriority; /* threshold for cut-off mechanism */
- int SparkPriority2;
- rtsBool RandomPriorities;
- rtsBool InversePriorities;
- rtsBool IgnorePriorities;
- int ThunksToPack; /* number of thunks in packet + 1 */
- rtsBool RandomSteal; /* steal spark/thread from random proc */
- rtsBool NoForward; /* no forwarding of fetch messages */
- rtsBool PrintFetchMisses; /* print number of fetch misses */
-
- unsigned int debug;
- rtsBool event_trace;
- rtsBool event_trace_all;
-
+ rtsBool DoAlwaysCreateThreads; /* eager thread creation */
+ rtsBool DoBulkFetching; /* bulk fetching */
+ rtsBool DoThreadMigration; /* allow to move threads */
+ nat FetchStrategy; /* what to do when waiting for data */
+ rtsBool PreferSparksOfLocalNodes; /* prefer local over global sparks */
+ rtsBool DoPrioritySparking; /* sparks sorted by priorities */
+ rtsBool DoPriorityScheduling; /* threads sorted by priorities */
+ nat SparkPriority; /* threshold for cut-off mechanism */
+ nat SparkPriority2;
+ rtsBool RandomPriorities;
+ rtsBool InversePriorities;
+ rtsBool IgnorePriorities;
+ nat ThunksToPack; /* number of thunks in packet + 1 */
+ rtsBool RandomSteal; /* steal spark/thread from random proc */
+ rtsBool NoForward; /* no forwarding of fetch messages */
+
+ // unsigned int debug;
+ // rtsBool event_trace;
+ // rtsBool event_trace_all;
};
#endif /* GRAN */
/* -----------------------------------------------------------------------------
- * $Id: RtsStartup.c,v 1.25 1999/12/20 10:34:37 simonpj Exp $
+ * $Id: RtsStartup.c,v 1.26 2000/01/13 14:34:04 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
# include "ProfHeap.h"
#endif
-#ifdef PAR
+#if defined(GRAN)
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+#endif
+
+#if defined(PAR)
#include "ParInit.h"
#include "Parallel.h"
#include "LLC.h"
struct RTS_FLAGS RtsFlags;
static int rts_has_started_up = 0;
+#if defined(PAR)
+static ullong startTime = 0;
+#endif
void
startupHaskell(int argc, char *argv[])
else
rts_has_started_up=1;
-#if defined(PAR)
- int nPEs = 0; /* Number of PEs */
-#endif
-
/* The very first thing we do is grab the start time...just in case we're
* collecting timing statistics.
*/
#ifdef PAR
/*
- *The parallel system needs to be initialised and synchronised before
- *the program is run.
+ * The parallel system needs to be initialised and synchronised before
+ * the program is run.
*/
+ fprintf(stderr, "startupHaskell: argv[0]=%s\n", argv[0]);
if (*argv[0] == '-') { /* Look to see whether we're the Main Thread */
IAmMainThread = rtsTrue;
argv++; argc--; /* Strip off flag argument */
-/* fprintf(stderr, "I am Main Thread\n"); */
+ // IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "[%x] I am Main Thread\n", mytid);
}
/*
* Grab the number of PEs out of the argument vector, and
argv[1] = argv[0];
argv++; argc--;
initEachPEHook(); /* HWL: hook to be execed on each PE */
- SynchroniseSystem();
#endif
/* Set the RTS flags to default values. */
prog_argc = argc;
prog_argv = argv;
-#ifdef PAR
- /* Initialise the parallel system -- before initHeap! */
- initParallelSystem();
- /* And start GranSim profiling if required: omitted for now
- *if (Rtsflags.ParFlags.granSimStats)
- *init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv);
- */
+#if defined(PAR)
+ /* NB: this really must be done after processing the RTS flags */
+ fprintf(stderr, "Synchronising system (%d PEs)\n", nPEs);
+ SynchroniseSystem(); // calls initParallelSystem etc
#endif /* PAR */
/* initialise scheduler data structures (needs to be done before
*/
initScheduler();
+#if defined(GRAN)
+ /* And start GranSim profiling if required: */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv);
+#elif defined(PAR)
+ /* And start GUM profiling if required: */
+ if (RtsFlags.ParFlags.ParStats.Full)
+ init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv);
+#endif /* PAR || GRAN */
+
/* initialize the storage manager */
initStorage();
/* start timing the shutdown */
stat_startExit();
+#if !defined(GRAN)
/* Finalize any remaining weak pointers */
finalizeWeakPointersNow();
+#endif
#if defined(GRAN)
- #error FixMe.
- if (!RTSflags.GranFlags.granSimStats_suppressed)
+ /* end_gr_simulation prints global stats if requested -- HWL */
+ if (!RtsFlags.GranFlags.GranSimStats.Suppressed)
end_gr_simulation();
#endif
#endif
rts_has_started_up=0;
-}
+#if defined(PAR)
+ shutdownParallelSystem(0);
+#endif
+
+}
/*
* called from STG-land to exit the program
void
stg_exit(I_ n)
{
-#ifdef PAR
+#if 0 /* def PAR */
par_exit(n);
#else
exit(n);
/* -----------------------------------------------------------------------------
- * $Id: RtsUtils.c,v 1.12 2000/01/13 12:40:16 simonmar Exp $
+ * $Id: RtsUtils.c,v 1.13 2000/01/13 14:34:04 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
* ---------------------------------------------------------------------------*/
#include "Rts.h"
+#include "RtsTypes.h"
#include "RtsAPI.h"
#include "RtsFlags.h"
#include "Hooks.h"
#include <fcntl.h>
#endif
+#ifdef HAVE_GETTIMEOFDAY
+#include <sys/time.h>
+#endif
+
#include <stdarg.h>
/* variable-argument error function. */
Get the current time as a string. Used in profiling reports.
-------------------------------------------------------------------------- */
-#if defined(PROFILING) || defined(DEBUG)
+#if defined(PROFILING) || defined(DEBUG) || defined(PAR) || defined(GRAN)
char *
time_str(void)
{
#endif
}
+#if 0
+static ullong startTime = 0;
+
+/* used in a parallel setup */
+ullong
+msTime(void)
+{
+# if defined(HAVE_GETCLOCK) && !defined(alpha_TARGET_ARCH)
+ struct timespec tv;
+
+ if (getclock(TIMEOFDAY, &tv) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "Clock failed\n");
+ stg_exit(EXIT_FAILURE);
+ }
+ return tv.tv_sec * LL(1000) + tv.tv_nsec / LL(1000000) - startTime;
+# elif HAVE_GETTIMEOFDAY && !defined(alpha_TARGET_ARCH)
+ struct timeval tv;
+
+ if (gettimeofday(&tv, NULL) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "Clock failed\n");
+ stg_exit(EXIT_FAILURE);
+ }
+ return tv.tv_sec * LL(1000) + tv.tv_usec / LL(1000) - startTime;
+# else
+ time_t t;
+ if ((t = time(NULL)) == (time_t) -1) {
+ fflush(stdout);
+ fprintf(stderr, "Clock failed\n");
+ stg_exit(EXIT_FAILURE);
+ }
+ return t * LL(1000) - startTime;
+# endif
+}
+#endif
+
+
/* -----------------------------------------------------------------------------
Print large numbers, with punctuation.
-------------------------------------------------------------------------- */
/* -----------------------------------------------------------------------------
- * $Id: RtsUtils.h,v 1.6 2000/01/12 15:15:17 simonmar Exp $
+ * $Id: RtsUtils.h,v 1.7 2000/01/13 14:34:04 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
/*Defined in Main.c, but made visible here*/
extern void stg_exit(I_ n) __attribute__((noreturn));
-char * time_str(void);
-
+char *time_str(void);
char *ullong_format_string(ullong, char *, rtsBool);
+//ullong msTime(void);
+
/* -----------------------------------------------------------------------------
- * $Id: Sanity.c,v 1.14 1999/05/21 14:37:12 sof Exp $
+ * $Id: Sanity.c,v 1.15 2000/01/13 14:34:04 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
*
* ---------------------------------------------------------------------------*/
+//@menu
+//* Includes::
+//* Macros::
+//* Stack sanity::
+//* Heap Sanity::
+//* TSO Sanity::
+//* Thread Queue Sanity::
+//* Blackhole Sanity::
+//@end menu
+
+//@node Includes, Macros
+//@subsection Includes
+
#include "Rts.h"
-#ifdef DEBUG
+#ifdef DEBUG /* whole file */
#include "RtsFlags.h"
#include "RtsUtils.h"
#include "BlockAlloc.h"
#include "Sanity.h"
+//@node Macros, Stack sanity, Includes
+//@subsection Macros
+
#define LOOKS_LIKE_PTR(r) (LOOKS_LIKE_STATIC_CLOSURE(r) || ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1)))
+//@node Stack sanity, Heap Sanity, Macros
+//@subsection Stack sanity
+
/* -----------------------------------------------------------------------------
Check stack sanity
-------------------------------------------------------------------------- */
void checkClosureShallow( StgClosure* p );
+//@cindex checkSmallBitmap
static StgOffset
checkSmallBitmap( StgPtr payload, StgWord32 bitmap )
{
return i;
}
-
+//@cindex checkLargeBitmap
static StgOffset
checkLargeBitmap( StgPtr payload, StgLargeBitmap* large_bitmap )
{
return i;
}
+//@cindex checkStackClosure
StgOffset
checkStackClosure( StgClosure* c )
{
case RET_BCO: /* small bitmap (<= 32 entries) */
case RET_SMALL:
case RET_VEC_SMALL:
+ return 1 + checkSmallBitmap((StgPtr)c + 1,info->layout.bitmap);
+
case UPDATE_FRAME:
case CATCH_FRAME:
case STOP_FRAME:
case SEQ_FRAME:
- return 1 + checkSmallBitmap((StgPtr)c + 1,info->layout.bitmap);
+#if defined(GRAN)
+ return 2 +
+#else
+ return 1 +
+#endif
+ checkSmallBitmap((StgPtr)c + 1,info->layout.bitmap);
case RET_BIG: /* large bitmap (> 32 entries) */
case RET_VEC_BIG:
return 1 + checkLargeBitmap((StgPtr)c + 1,info->layout.large_bitmap);
case FUN:
case FUN_STATIC: /* probably a slow-entry point return address: */
- return 1;
+#if 0 && defined(GRAN)
+ return 2;
+#else
+ return 1;
+#endif
default:
/* if none of the above, maybe it's a closure which looks a
* little like an infotable
* chunks.
*/
+//@cindex checkClosureShallow
void
checkClosureShallow( StgClosure* p )
{
}
/* check an individual stack object */
+//@cindex checkStackObject
StgOffset
checkStackObject( StgPtr sp )
{
}
/* check sections of stack between update frames */
+//@cindex checkStackChunk
void
checkStackChunk( StgPtr sp, StgPtr stack_end )
{
while (p < stack_end) {
p += checkStackObject( p );
}
- ASSERT( p == stack_end );
+ // ASSERT( p == stack_end ); -- HWL
}
+//@cindex checkStackChunk
StgOffset
checkClosure( StgClosure* p )
{
case BLOCKED_FETCH:
case FETCH_ME:
case EVACUATED:
- barf("checkClosure: unimplemented/strange closure type");
+ barf("checkClosure: unimplemented/strange closure type %d",
+ info->type);
default:
- barf("checkClosure");
+ barf("checkClosure (closure type %d)", info->type);
}
#undef LOOKS_LIKE_PTR
}
+//@node Heap Sanity, TSO Sanity, Stack sanity
+//@subsection Heap Sanity
+
/* -----------------------------------------------------------------------------
Check Heap Sanity
all the objects in the remainder of the chain.
-------------------------------------------------------------------------- */
+//@cindex checkHeap
extern void
checkHeap(bdescr *bd, StgPtr start)
{
}
}
+//@cindex checkChain
extern void
checkChain(bdescr *bd)
{
}
/* check stack - making sure that update frames are linked correctly */
+//@cindex checkStack
void
checkStack(StgPtr sp, StgPtr stack_end, StgUpdateFrame* su )
{
ASSERT(stgCast(StgPtr,su) == stack_end);
}
+//@node TSO Sanity, Thread Queue Sanity, Heap Sanity
+//@subsection TSO Sanity
+
+//@cindex checkTSO
extern void
checkTSO(StgTSO *tso)
{
checkStack(sp, stack_end, su);
}
+#if defined(GRAN)
+//@cindex checkTSOsSanity
+extern void
+checkTSOsSanity(void) {
+ nat i, tsos;
+ StgTSO *tso;
+
+ belch("Checking sanity of all runnable TSOs:");
+
+ for (i=0, tsos=0; i<RtsFlags.GranFlags.proc; i++) {
+ for (tso=run_queue_hds[i]; tso!=END_TSO_QUEUE; tso=tso->link) {
+ fprintf(stderr, "TSO %p on PE %d ...", tso, i);
+ checkTSO(tso);
+ fprintf(stderr, "OK, ");
+ tsos++;
+ }
+ }
+
+ belch(" checked %d TSOs on %d PEs; ok\n", tsos, RtsFlags.GranFlags.proc);
+}
+
+//@node Thread Queue Sanity, Blackhole Sanity, TSO Sanity
+//@subsection Thread Queue Sanity
+
+// still GRAN only
+
+//@cindex checkThreadQSanity
+extern rtsBool
+checkThreadQSanity (PEs proc, rtsBool check_TSO_too)
+{
+ StgTSO *tso, *prev;
+
+ /* the NIL value for TSOs is END_TSO_QUEUE; thus, finding NULL is an error */
+ ASSERT(run_queue_hds[proc]!=NULL);
+ ASSERT(run_queue_tls[proc]!=NULL);
+ /* if either head or tail is NIL then the other one must be NIL, too */
+ ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE || run_queue_tls[proc]==END_TSO_QUEUE);
+ ASSERT(run_queue_tls[proc]!=END_TSO_QUEUE || run_queue_hds[proc]==END_TSO_QUEUE);
+ for (tso=run_queue_hds[proc], prev=END_TSO_QUEUE;
+ tso!=END_TSO_QUEUE;
+ prev=tso, tso=tso->link) {
+ ASSERT((prev!=END_TSO_QUEUE || tso==run_queue_hds[proc]) &&
+ (prev==END_TSO_QUEUE || prev->link==tso));
+ if (check_TSO_too)
+ checkTSO(tso);
+ }
+ ASSERT(prev==run_queue_tls[proc]);
+}
+
+//@cindex checkThreadQsSanity
+extern rtsBool
+checkThreadQsSanity (rtsBool check_TSO_too)
+{
+ PEs p;
+
+ for (p=0; p<RtsFlags.GranFlags.proc; p++)
+ checkThreadQSanity(p, check_TSO_too);
+}
+#endif /* GRAN */
+
+//@node Blackhole Sanity, Index, Thread Queue Sanity
+//@subsection Blackhole Sanity
+
/* -----------------------------------------------------------------------------
Check Blackhole Sanity
the update frame list.
-------------------------------------------------------------------------- */
-rtsBool isBlackhole( StgTSO* tso, StgClosure* p )
+//@cindex isBlackhole
+rtsBool
+isBlackhole( StgTSO* tso, StgClosure* p )
{
StgUpdateFrame* su = tso->su;
do {
} while (1);
}
+//@node Index, , Blackhole Sanity
+//@subsection Index
+
+//@index
+//* checkChain:: @cindex\s-+checkChain
+//* checkClosureShallow:: @cindex\s-+checkClosureShallow
+//* checkHeap:: @cindex\s-+checkHeap
+//* checkLargeBitmap:: @cindex\s-+checkLargeBitmap
+//* checkSmallBitmap:: @cindex\s-+checkSmallBitmap
+//* checkStack:: @cindex\s-+checkStack
+//* checkStackChunk:: @cindex\s-+checkStackChunk
+//* checkStackChunk:: @cindex\s-+checkStackChunk
+//* checkStackClosure:: @cindex\s-+checkStackClosure
+//* checkStackObject:: @cindex\s-+checkStackObject
+//* checkTSO:: @cindex\s-+checkTSO
+//* checkTSOsSanity:: @cindex\s-+checkTSOsSanity
+//* checkThreadQSanity:: @cindex\s-+checkThreadQSanity
+//* checkThreadQsSanity:: @cindex\s-+checkThreadQsSanity
+//* isBlackhole:: @cindex\s-+isBlackhole
+//@end index
+
#endif /* DEBUG */
+
/* -----------------------------------------------------------------------------
- * $Id: Sanity.h,v 1.4 1999/02/05 16:02:52 simonm Exp $
+ * $Id: Sanity.h,v 1.5 2000/01/13 14:34:04 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
extern void checkChain ( bdescr *bd );
extern void checkStack ( StgPtr sp, StgPtr stack_end, StgUpdateFrame* su );
extern void checkTSO ( StgTSO* tso );
+#if defined(GRAN)
+extern void checkTSOsSanity(void);
+extern rtsBool checkThreadQSanity (PEs proc, rtsBool check_TSO_too);
+extern rtsBool checkThreadQsSanity (rtsBool check_TSO_too);
+#endif
extern StgOffset checkClosure( StgClosure* p );
-/* -----------------------------------------------------------------------------
- * $Id: Schedule.c,v 1.40 2000/01/13 10:37:31 simonmar Exp $
+/* ---------------------------------------------------------------------------
+ * $Id: Schedule.c,v 1.41 2000/01/13 14:34:05 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
* Scheduler
*
- * ---------------------------------------------------------------------------*/
+ * The main scheduling code in GranSim is quite different from that in std
+ * (concurrent) Haskell: while concurrent Haskell just iterates over the
+ * threads in the runnable queue, GranSim is event driven, i.e. it iterates
+ * over the events in the global event queue. -- HWL
+ * --------------------------------------------------------------------------*/
+
+//@node Main scheduling code, , ,
+//@section Main scheduling code
/* Version with scheduler monitor support for SMPs.
SDM & KH, 10/99
*/
+//@menu
+//* Includes::
+//* Variables and Data structures::
+//* Prototypes::
+//* Main scheduling loop::
+//* Suspend and Resume::
+//* Run queue code::
+//* Garbage Collextion Routines::
+//* Blocking Queue Routines::
+//* Exception Handling Routines::
+//* Debugging Routines::
+//* Index::
+//@end menu
+
+//@node Includes, Variables and Data structures, Main scheduling code, Main scheduling code
+//@subsection Includes
+
#include "Rts.h"
#include "SchedAPI.h"
#include "RtsUtils.h"
#include "Sanity.h"
#include "Stats.h"
#include "Sparks.h"
+#if defined(GRAN) || defined(PAR)
+# include "GranSimRts.h"
+# include "GranSim.h"
+# include "ParallelRts.h"
+# include "Parallel.h"
+# include "ParallelDebug.h"
+# include "FetchMe.h"
+# include "HLC.h"
+#endif
#include <stdarg.h>
+//@node Variables and Data structures, Prototypes, Includes, Main scheduling code
+//@subsection Variables and Data structures
+
/* Main threads:
*
* These are the threads which clients have requested that we run.
*
* Main threads information is kept in a linked list:
*/
+//@cindex StgMainThread
typedef struct StgMainThread_ {
StgTSO * tso;
SchedulerStatus stat;
/* Thread queues.
* Locks required: sched_mutex.
*/
+
+#if DEBUG
+char *whatNext_strs[] = {
+ "ThreadEnterGHC",
+ "ThreadRunGHC",
+ "ThreadEnterHugs",
+ "ThreadKilled",
+ "ThreadComplete"
+};
+
+char *threadReturnCode_strs[] = {
+ "HeapOverflow", /* might also be StackOverflow */
+ "StackOverflow",
+ "ThreadYielding",
+ "ThreadBlocked",
+ "ThreadFinished"
+};
+#endif
+
+#if defined(GRAN)
+
+StgTSO* ActiveTSO = NULL; /* for assigning system costs; GranSim-Light only */
+// rtsTime TimeOfNextEvent, EndOfTimeSlice; now in GranSim.c
+
+/*
+ In GranSim we have a runable and a blocked queue for each processor.
+ In order to minimise code changes new arrays run_queue_hds/tls
+ are created. run_queue_hd is then a short cut (macro) for
+ run_queue_hds[CurrentProc] (see GranSim.h).
+ -- HWL
+*/
+StgTSO *run_queue_hds[MAX_PROC], *run_queue_tls[MAX_PROC];
+StgTSO *blocked_queue_hds[MAX_PROC], *blocked_queue_tls[MAX_PROC];
+StgTSO *ccalling_threadss[MAX_PROC];
+
+#else /* !GRAN */
+
+//@cindex run_queue_hd
+//@cindex run_queue_tl
+//@cindex blocked_queue_hd
+//@cindex blocked_queue_tl
StgTSO *run_queue_hd, *run_queue_tl;
StgTSO *blocked_queue_hd, *blocked_queue_tl;
static void GetRoots(void);
static StgTSO *threadStackOverflow(StgTSO *tso);
+#endif
/* KH: The following two flags are shared memory locations. There is no need
to lock them, since they are only unset at the end of a scheduler
*/
/* flag set by signal handler to precipitate a context switch */
+//@cindex context_switch
nat context_switch;
/* if this flag is set as well, give up execution */
+//@cindex interrupted
rtsBool interrupted;
/* Next thread ID to allocate.
* Locks required: sched_mutex
*/
+//@cindex next_thread_id
StgThreadID next_thread_id = 1;
/*
* Locks required: sched_mutex.
*/
#ifdef SMP
-Capability *free_capabilities; /* Available capabilities for running threads */
-nat n_free_capabilities; /* total number of available capabilities */
+//@cindex free_capabilities
+//@cindex n_free_capabilities
+Capability *free_capabilities; /* Available capabilities for running threads */
+nat n_free_capabilities; /* total number of available capabilities */
+#else
+//@cindex MainRegTable
+Capability MainRegTable; /* for non-SMP, we have one global capability */
+#endif
+
+#if defined(GRAN)
+StgTSO *CurrentTSOs[MAX_PROC];
#else
-Capability MainRegTable; /* for non-SMP, we have one global capability */
+StgTSO *CurrentTSO;
#endif
rtsBool ready_to_gc;
/* All our current task ids, saved in case we need to kill them later.
*/
#ifdef SMP
+//@cindex task_ids
task_info *task_ids;
#endif
#endif
#ifdef SMP
+//@cindex sched_mutex
+//@cindex term_mutex
+//@cindex thread_ready_cond
+//@cindex gc_pending_cond
pthread_mutex_t sched_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t term_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t thread_ready_cond = PTHREAD_COND_INITIALIZER;
nat await_death;
#endif
-/* -----------------------------------------------------------------------------
+#if defined(PAR)
+StgTSO *LastTSO;
+rtsTime TimeOfLastYield;
+#endif
+
+/*
+ * The thread state for the main thread.
+// ToDo: check whether not needed any more
+StgTSO *MainTSO;
+ */
+
+
+//@node Prototypes, Main scheduling loop, Variables and Data structures, Main scheduling code
+//@subsection Prototypes
+
+#if 0 && defined(GRAN)
+// ToDo: replace these with macros
+static /* inline */ void add_to_run_queue(StgTSO* tso);
+static /* inline */ void push_on_run_queue(StgTSO* tso);
+static /* inline */ StgTSO *take_off_run_queue(StgTSO *tso);
+
+/* Thread management */
+void initScheduler(void);
+#endif
+
+//@node Main scheduling loop, Suspend and Resume, Prototypes, Main scheduling code
+//@subsection Main scheduling loop
+
+/* ---------------------------------------------------------------------------
Main scheduling loop.
We use round-robin scheduling, each thread returning to the
* waiting for work, or
* waiting for a GC to complete.
- -------------------------------------------------------------------------- */
-
+ ------------------------------------------------------------------------ */
+//@cindex schedule
static void
schedule( void )
{
StgTSO *t;
Capability *cap;
StgThreadReturnCode ret;
+#if defined(GRAN)
+ rtsEvent *event;
+#elif defined(PAR)
+ rtsSpark spark;
+ StgTSO *tso;
+ GlobalTaskId pe;
+#endif
ACQUIRE_LOCK(&sched_mutex);
+#if defined(GRAN)
+# error ToDo: implement GranSim scheduler
+#elif defined(PAR)
+ while (!GlobalStopPending) { /* GlobalStopPending set in par_exit */
+
+ if (PendingFetches != END_BF_QUEUE) {
+ processFetches();
+ }
+#else
while (1) {
+#endif
/* If we're interrupted (the user pressed ^C, or some other
* termination condition occurred), kill all the currently running
* number of threads in the run queue equal to the number of
* free capabilities.
*/
-#if defined(SMP) || defined(PAR)
+#if defined(SMP)
{
nat n = n_free_capabilities;
StgTSO *tso = run_queue_hd;
if (spark == NULL) {
break; /* no more sparks in the pool */
} else {
+ // I'd prefer this to be done in activateSpark -- HWL
StgTSO *tso;
tso = createThread_(RtsFlags.GcFlags.initialStkSize, rtsTrue);
pushClosure(tso,spark);
PUSH_ON_RUN_QUEUE(tso);
-#ifdef ToDo
+#ifdef PAR
advisory_thread_count++;
#endif
pthread_cond_signal(&thread_ready_cond);
}
}
-#endif /* SMP || PAR */
+#endif /* SMP */
/* Check whether any waiting threads need to be woken up. If the
* run queue is empty, and there are no other tasks running, we
IF_DEBUG(scheduler, sched_belch("work now available"));
}
#endif
+
+#if defined(GRAN)
+# error ToDo: implement GranSim scheduler
+#elif defined(PAR)
+ // ToDo: phps merge with spark activation above
+ /* check whether we have local work and send requests if we have none */
+ if (run_queue_hd == END_TSO_QUEUE) { /* no runnable threads */
+ /* :-[ no local threads => look out for local sparks */
+ if (advisory_thread_count < RtsFlags.ParFlags.maxThreads &&
+ (pending_sparks_hd[REQUIRED_POOL] < pending_sparks_tl[REQUIRED_POOL] ||
+ pending_sparks_hd[ADVISORY_POOL] < pending_sparks_tl[ADVISORY_POOL])) {
+ /*
+ * ToDo: add GC code check that we really have enough heap afterwards!!
+ * Old comment:
+ * If we're here (no runnable threads) and we have pending
+ * sparks, we must have a space problem. Get enough space
+ * to turn one of those pending sparks into a
+ * thread...
+ */
+
+ spark = findSpark(); /* get a spark */
+ if (spark != (rtsSpark) NULL) {
+ tso = activateSpark(spark); /* turn the spark into a thread */
+ IF_PAR_DEBUG(verbose,
+ belch("== [%x] schedule: Created TSO %p (%d); %d threads active",
+ mytid, tso, tso->id, advisory_thread_count));
+
+ if (tso==END_TSO_QUEUE) { // failed to activate spark -> back to loop
+ belch("^^ failed to activate spark");
+ goto next_thread;
+ } // otherwise fall through & pick-up new tso
+ } else {
+ IF_PAR_DEBUG(verbose,
+ belch("^^ no local sparks (spark pool contains only NFs: %d)",
+ spark_queue_len(ADVISORY_POOL)));
+ goto next_thread;
+ }
+ } else
+ /* =8-[ no local sparks => look for work on other PEs */
+ {
+ /*
+ * We really have absolutely no work. Send out a fish
+ * (there may be some out there already), and wait for
+ * something to arrive. We clearly can't run any threads
+ * until a SCHEDULE or RESUME arrives, and so that's what
+ * we're hoping to see. (Of course, we still have to
+ * respond to other types of messages.)
+ */
+ if (//!fishing &&
+ outstandingFishes < RtsFlags.ParFlags.maxFishes ) { // &&
+ // (last_fish_arrived_at+FISH_DELAY < CURRENT_TIME)) {
+ /* fishing set in sendFish, processFish;
+ avoid flooding system with fishes via delay */
+ pe = choosePE();
+ sendFish(pe, mytid, NEW_FISH_AGE, NEW_FISH_HISTORY,
+ NEW_FISH_HUNGER);
+ }
+
+ processMessages();
+ goto next_thread;
+ // ReSchedule(0);
+ }
+ } else if (PacketsWaiting()) { /* Look for incoming messages */
+ processMessages();
+ }
+
+ /* Now we are sure that we have some work available */
+ ASSERT(run_queue_hd != END_TSO_QUEUE);
+ /* Take a thread from the run queue, if we have work */
+ t = take_off_run_queue(END_TSO_QUEUE);
+
+ /* ToDo: write something to the log-file
+ if (RTSflags.ParFlags.granSimStats && !sameThread)
+ DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
+ */
+
+ CurrentTSO = t;
+
+ IF_DEBUG(scheduler, belch("--^^ %d sparks on [%#x] (hd=%x; tl=%x; lim=%x)",
+ spark_queue_len(ADVISORY_POOL), CURRENT_PROC,
+ pending_sparks_hd[ADVISORY_POOL],
+ pending_sparks_tl[ADVISORY_POOL],
+ pending_sparks_lim[ADVISORY_POOL]));
+
+ IF_DEBUG(scheduler, belch("--== %d threads on [%#x] (hd=%x; tl=%x)",
+ run_queue_len(), CURRENT_PROC,
+ run_queue_hd, run_queue_tl));
+
+ if (t != LastTSO) {
+ /*
+ we are running a different TSO, so write a schedule event to log file
+ NB: If we use fair scheduling we also have to write a deschedule
+ event for LastTSO; with unfair scheduling we know that the
+ previous tso has blocked whenever we switch to another tso, so
+ we don't need it in GUM for now
+ */
+ DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
+ GR_SCHEDULE, t, (StgClosure *)NULL, 0, 0);
+
+ }
+
+#else /* !GRAN && !PAR */
/* grab a thread from the run queue
*/
t = POP_RUN_QUEUE();
+
+#endif
/* grab a capability
*/
IF_DEBUG(scheduler,sched_belch("running thread %d", t->id));
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
/* Run the current thread
*/
switch (cap->rCurrentTSO->whatNext) {
default:
barf("schedule: invalid whatNext field");
}
+ /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
/* Costs for the scheduler are assigned to CCS_SYSTEM */
#ifdef PROFILING
break;
case ThreadYielding:
+#if defined(GRAN)
+ IF_DEBUG(gran,
+ DumpGranEvent(GR_DESCHEDULE, t));
+ globalGranStats.tot_yields++;
+#elif defined(PAR)
+ IF_DEBUG(par,
+ DumpGranEvent(GR_DESCHEDULE, t));
+#endif
/* put the thread back on the run queue. Then, if we're ready to
* GC, check whether this is the last task to stop. If so, wake
* up the GC thread. getThread will block during a GC until the
break;
case ThreadBlocked:
+#if defined(GRAN)
+# error ToDo: implement GranSim scheduler
+#elif defined(PAR)
+ IF_DEBUG(par,
+ DumpGranEvent(GR_DESCHEDULE, t));
+#else
+#endif
/* don't need to do anything. Either the thread is blocked on
* I/O, in which case we'll have called addToBlockedQueue
* previously, or it's blocked on an MVar or Blackhole, in which
*/
IF_DEBUG(scheduler,belch("thread %ld finished", t->id));
t->whatNext = ThreadComplete;
+#if defined(GRAN)
+ // ToDo: endThread(t, CurrentProc); // clean-up the thread
+#elif defined(PAR)
+ advisory_thread_count--;
+ if (RtsFlags.ParFlags.ParStats.Full)
+ DumpEndEvent(CURRENT_PROC, t, rtsFalse /* not mandatory */);
+#endif
break;
default:
#endif
#ifdef SMP
- if (ready_to_gc && n_free_capabilities == RtsFlags.ParFlags.nNodes) {
+ if (ready_to_gc && n_free_capabilities == RtsFlags.ParFlags.nNodes)
#else
- if (ready_to_gc) {
+ if (ready_to_gc)
#endif
+ {
/* everybody back, start the GC.
* Could do it in this thread, or signal a condition var
* to do it in another thread. Either way, we need to
pthread_cond_broadcast(&gc_pending_cond);
#endif
}
+#if defined(GRAN)
+ next_thread:
+ IF_GRAN_DEBUG(unused,
+ print_eventq(EventHd));
+
+ event = get_next_event();
+
+#elif defined(PAR)
+ next_thread:
+ /* ToDo: wait for next message to arrive rather than busy wait */
+
+#else /* GRAN */
+ /* not any more
+ next_thread:
+ t = take_off_run_queue(END_TSO_QUEUE);
+ */
+#endif /* GRAN */
} /* end of while(1) */
}
-
/* A hack for Hugs concurrency support. Needs sanitisation (?) */
void deleteAllThreads ( void )
{
blocked_queue_hd = blocked_queue_tl = END_TSO_QUEUE;
}
+/* startThread and insertThread are now in GranSim.c -- HWL */
-/* -----------------------------------------------------------------------------
+//@node Suspend and Resume, Run queue code, Main scheduling loop, Main scheduling code
+//@subsection Suspend and Resume
+
+/* ---------------------------------------------------------------------------
* Suspending & resuming Haskell threads.
*
* When making a "safe" call to C (aka _ccall_GC), the task gives back
* duration of the call, on the susepended_ccalling_threads queue. We
* give out a token to the task, which it can use to resume the thread
* on return from the C function.
- * -------------------------------------------------------------------------- */
+ * ------------------------------------------------------------------------- */
StgInt
suspendThread( Capability *cap )
return cap;
}
-/* -----------------------------------------------------------------------------
+
+/* ---------------------------------------------------------------------------
* Static functions
- * -------------------------------------------------------------------------- */
+ * ------------------------------------------------------------------------ */
static void unblockThread(StgTSO *tso);
-/* -----------------------------------------------------------------------------
+/* ---------------------------------------------------------------------------
* Comparing Thread ids.
*
* This is used from STG land in the implementation of the
* instances of Eq/Ord for ThreadIds.
- * -------------------------------------------------------------------------- */
+ * ------------------------------------------------------------------------ */
int cmp_thread(const StgTSO *tso1, const StgTSO *tso2)
{
return 0;
}
-/* -----------------------------------------------------------------------------
+/* ---------------------------------------------------------------------------
Create a new thread.
The new thread starts with the given stack size. Before the
createGenThread() and createIOThread() (in SchedAPI.h) are
convenient packaged versions of this function.
- -------------------------------------------------------------------------- */
+ ------------------------------------------------------------------------ */
+//@cindex createThread
+#if defined(GRAN)
+/* currently pri (priority) is only used in a GRAN setup -- HWL */
+StgTSO *
+createThread(nat stack_size, StgInt pri)
+{
+ return createThread_(stack_size, rtsFalse, pri);
+}
+static StgTSO *
+createThread_(nat size, rtsBool have_lock, StgInt pri)
+{
+#else
StgTSO *
-createThread(nat size)
+createThread(nat stack_size)
{
- return createThread_(size, rtsFalse);
+ return createThread_(stack_size, rtsFalse);
}
static StgTSO *
createThread_(nat size, rtsBool have_lock)
{
- StgTSO *tso;
- nat stack_size;
+#endif
+ StgTSO *tso;
+ nat stack_size;
+
+ /* First check whether we should create a thread at all */
+#if defined(PAR)
+ /* check that no more than RtsFlags.ParFlags.maxThreads threads are created */
+ if (advisory_thread_count >= RtsFlags.ParFlags.maxThreads) {
+ threadsIgnored++;
+ belch("{createThread}Daq ghuH: refusing to create another thread; no more than %d threads allowed (currently %d)",
+ RtsFlags.ParFlags.maxThreads, advisory_thread_count);
+ return END_TSO_QUEUE;
+ }
+ threadsCreated++;
+#endif
+
+#if defined(GRAN)
+ ASSERT(!RtsFlags.GranFlags.Light || CurrentProc==0);
+#endif
+
+ // ToDo: check whether size = stack_size - TSO_STRUCT_SIZEW
/* catch ridiculously small stack sizes */
if (size < MIN_STACK_WORDS + TSO_STRUCT_SIZEW) {
stack_size = size - TSO_STRUCT_SIZEW;
+ // Hmm, this CCS_MAIN is not protected by a PROFILING cpp var;
SET_HDR(tso, &TSO_info, CCS_MAIN);
- tso->whatNext = ThreadEnterGHC;
-
+#if defined(GRAN)
+ SET_GRAN_HDR(tso, ThisPE);
+#endif
+ tso->whatNext = ThreadEnterGHC;
+
/* tso->id needs to be unique. For now we use a heavyweight mutex to
protect the increment operation on next_thread_id.
In future, we could use an atomic increment instead.
SET_HDR((StgClosure*)tso->sp,(StgInfoTable *)&stg_stop_thread_info,CCS_MAIN);
tso->su = (StgUpdateFrame*)tso->sp;
+ IF_DEBUG(scheduler,belch("---- Initialised TSO %ld (%p), stack size = %lx words",
+ tso->id, tso, tso->stack_size));
+
+ // ToDo: check this
+#if defined(GRAN)
+ tso->link = END_TSO_QUEUE;
+ /* uses more flexible routine in GranSim */
+ insertThread(tso, CurrentProc);
+#else
+ add_to_run_queue(tso);
+#endif
+
+#if defined(GRAN)
+ tso->gran.pri = pri;
+ tso->gran.magic = TSO_MAGIC; // debugging only
+ tso->gran.sparkname = 0;
+ tso->gran.startedat = CURRENT_TIME;
+ tso->gran.exported = 0;
+ tso->gran.basicblocks = 0;
+ tso->gran.allocs = 0;
+ tso->gran.exectime = 0;
+ tso->gran.fetchtime = 0;
+ tso->gran.fetchcount = 0;
+ tso->gran.blocktime = 0;
+ tso->gran.blockcount = 0;
+ tso->gran.blockedat = 0;
+ tso->gran.globalsparks = 0;
+ tso->gran.localsparks = 0;
+ if (RtsFlags.GranFlags.Light)
+ tso->gran.clock = Now; /* local clock */
+ else
+ tso->gran.clock = 0;
+
+ IF_DEBUG(gran,printTSO(tso));
+#elif defined(PAR)
+ tso->par.sparkname = 0;
+ tso->par.startedat = CURRENT_TIME;
+ tso->par.exported = 0;
+ tso->par.basicblocks = 0;
+ tso->par.allocs = 0;
+ tso->par.exectime = 0;
+ tso->par.fetchtime = 0;
+ tso->par.fetchcount = 0;
+ tso->par.blocktime = 0;
+ tso->par.blockcount = 0;
+ tso->par.blockedat = 0;
+ tso->par.globalsparks = 0;
+ tso->par.localsparks = 0;
+#endif
+
+#if defined(GRAN)
+ globalGranStats.tot_threads_created++;
+ globalGranStats.threads_created_on_PE[CurrentProc]++;
+ globalGranStats.tot_sq_len += spark_queue_len(CurrentProc);
+ globalGranStats.tot_sq_probes++;
+#endif
+
IF_DEBUG(scheduler,sched_belch("created thread %ld, stack size = %lx words",
tso->id, tso->stack_size));
return tso;
}
-
-/* -----------------------------------------------------------------------------
+/* ---------------------------------------------------------------------------
* scheduleThread()
*
* scheduleThread puts a thread on the head of the runnable queue.
* The caller of scheduleThread must create the thread using e.g.
* createThread and push an appropriate closure
* on this thread's stack before the scheduler is invoked.
- * -------------------------------------------------------------------------- */
+ * ------------------------------------------------------------------------ */
void
scheduleThread(StgTSO *tso)
RELEASE_LOCK(&sched_mutex);
}
-
-/* -----------------------------------------------------------------------------
+/* ---------------------------------------------------------------------------
* startTasks()
*
* Start up Posix threads to run each of the scheduler tasks.
* I believe the task ids are not needed in the system as defined.
- * KH @ 25/10/99
- * -------------------------------------------------------------------------- */
+ * KH @ 25/10/99
+ * ------------------------------------------------------------------------ */
#ifdef SMP
static void *
}
#endif
-/* -----------------------------------------------------------------------------
+/* ---------------------------------------------------------------------------
* initScheduler()
*
* Initialise the scheduler. This resets all the queues - if the
* next pass.
*
* This now calls startTasks(), so should only be called once! KH @ 25/10/99
- * -------------------------------------------------------------------------- */
+ * ------------------------------------------------------------------------ */
#ifdef SMP
static void
}
#endif
-void initScheduler(void)
+//@cindex initScheduler
+void
+initScheduler(void)
{
+#if defined(GRAN)
+ nat i;
+
+ for (i=0; i<=MAX_PROC; i++) {
+ run_queue_hds[i] = END_TSO_QUEUE;
+ run_queue_tls[i] = END_TSO_QUEUE;
+ blocked_queue_hds[i] = END_TSO_QUEUE;
+ blocked_queue_tls[i] = END_TSO_QUEUE;
+ ccalling_threadss[i] = END_TSO_QUEUE;
+ }
+#else
run_queue_hd = END_TSO_QUEUE;
run_queue_tl = END_TSO_QUEUE;
blocked_queue_hd = END_TSO_QUEUE;
blocked_queue_tl = END_TSO_QUEUE;
+#endif
suspended_ccalling_threads = END_TSO_QUEUE;
return stat;
}
-
-/* -----------------------------------------------------------------------------
- Debugging: why is a thread blocked
- -------------------------------------------------------------------------- */
-#ifdef DEBUG
-void printThreadBlockage(StgTSO *tso)
+//@node Run queue code, Garbage Collextion Routines, Suspend and Resume, Main scheduling code
+//@subsection Run queue code
+
+#if 0
+/*
+ NB: In GranSim we have many run queues; run_queue_hd is actually a macro
+ unfolding to run_queue_hds[CurrentProc], thus CurrentProc is an
+ implicit global variable that has to be correct when calling these
+ fcts -- HWL
+*/
+
+/* Put the new thread on the head of the runnable queue.
+ * The caller of createThread better push an appropriate closure
+ * on this thread's stack before the scheduler is invoked.
+ */
+static /* inline */ void
+add_to_run_queue(tso)
+StgTSO* tso;
{
- switch (tso->why_blocked) {
- case BlockedOnRead:
- fprintf(stderr,"blocked on read from fd %d", tso->block_info.fd);
- break;
- case BlockedOnWrite:
- fprintf(stderr,"blocked on write to fd %d", tso->block_info.fd);
- break;
- case BlockedOnDelay:
- fprintf(stderr,"blocked on delay of %d ms", tso->block_info.delay);
- break;
- case BlockedOnMVar:
- fprintf(stderr,"blocked on an MVar");
- break;
- case BlockedOnException:
- fprintf(stderr,"blocked on delivering an exception to thread %d",
- tso->block_info.tso->id);
- break;
- case BlockedOnBlackHole:
- fprintf(stderr,"blocked on a black hole");
- break;
- case NotBlocked:
- fprintf(stderr,"not blocked");
- break;
+ ASSERT(tso!=run_queue_hd && tso!=run_queue_tl);
+ tso->link = run_queue_hd;
+ run_queue_hd = tso;
+ if (run_queue_tl == END_TSO_QUEUE) {
+ run_queue_tl = tso;
}
}
-#endif
-/* -----------------------------------------------------------------------------
+/* Put the new thread at the end of the runnable queue. */
+static /* inline */ void
+push_on_run_queue(tso)
+StgTSO* tso;
+{
+ ASSERT(get_itbl((StgClosure *)tso)->type == TSO);
+ ASSERT(run_queue_hd!=NULL && run_queue_tl!=NULL);
+ ASSERT(tso!=run_queue_hd && tso!=run_queue_tl);
+ if (run_queue_hd == END_TSO_QUEUE) {
+ run_queue_hd = tso;
+ } else {
+ run_queue_tl->link = tso;
+ }
+ run_queue_tl = tso;
+}
+
+/*
+ Should be inlined because it's used very often in schedule. The tso
+ argument is actually only needed in GranSim, where we want to have the
+ possibility to schedule *any* TSO on the run queue, irrespective of the
+ actual ordering. Therefore, if tso is not the nil TSO then we traverse
+ the run queue and dequeue the tso, adjusting the links in the queue.
+*/
+//@cindex take_off_run_queue
+static /* inline */ StgTSO*
+take_off_run_queue(StgTSO *tso) {
+ StgTSO *t, *prev;
+
+ /*
+ qetlaHbogh Qu' ngaSbogh ghomDaQ {tso} yIteq!
+
+ if tso is specified, unlink that tso from the run_queue (doesn't have
+ to be at the beginning of the queue); GranSim only
+ */
+ if (tso!=END_TSO_QUEUE) {
+ /* find tso in queue */
+ for (t=run_queue_hd, prev=END_TSO_QUEUE;
+ t!=END_TSO_QUEUE && t!=tso;
+ prev=t, t=t->link)
+ /* nothing */ ;
+ ASSERT(t==tso);
+ /* now actually dequeue the tso */
+ if (prev!=END_TSO_QUEUE) {
+ ASSERT(run_queue_hd!=t);
+ prev->link = t->link;
+ } else {
+ /* t is at beginning of thread queue */
+ ASSERT(run_queue_hd==t);
+ run_queue_hd = t->link;
+ }
+ /* t is at end of thread queue */
+ if (t->link==END_TSO_QUEUE) {
+ ASSERT(t==run_queue_tl);
+ run_queue_tl = prev;
+ } else {
+ ASSERT(run_queue_tl!=t);
+ }
+ t->link = END_TSO_QUEUE;
+ } else {
+ /* take tso from the beginning of the queue; std concurrent code */
+ t = run_queue_hd;
+ if (t != END_TSO_QUEUE) {
+ run_queue_hd = t->link;
+ t->link = END_TSO_QUEUE;
+ if (run_queue_hd == END_TSO_QUEUE) {
+ run_queue_tl = END_TSO_QUEUE;
+ }
+ }
+ }
+ return t;
+}
+
+#endif /* 0 */
+
+nat
+run_queue_len(void)
+{
+ nat i;
+ StgTSO *tso;
+
+ for (i=0, tso=run_queue_hd;
+ tso != END_TSO_QUEUE;
+ i++, tso=tso->link)
+ /* nothing */
+
+ return i;
+}
+
+
+//@node Garbage Collextion Routines, Blocking Queue Routines, Run queue code, Main scheduling code
+//@subsection Garbage Collextion Routines
+
+/* ---------------------------------------------------------------------------
Where are the roots that we know about?
- all the threads on the runnable queue
- all the thread currently executing a _ccall_GC
- all the "main threads"
- -------------------------------------------------------------------------- */
+ ------------------------------------------------------------------------ */
/* This has to be protected either by the scheduler monitor, or by the
garbage collection monitor (probably the latter).
static void GetRoots(void)
{
StgMainThread *m;
+ nat i;
+
+#if defined(GRAN)
+ for (i=0; i<=RtsFlags.GranFlags.proc; i++) {
+ if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL)))
+ run_queue_hds[i] = (StgTSO *)MarkRoot((StgClosure *)run_queue_hds[i]);
+ if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL)))
+ run_queue_tls[i] = (StgTSO *)MarkRoot((StgClosure *)run_queue_tls[i]);
+
+ if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL)))
+ blocked_queue_hds[i] = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hds[i]);
+ if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL)))
+ blocked_queue_tls[i] = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tls[i]);
+ if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL)))
+ ccalling_threadss[i] = (StgTSO *)MarkRoot((StgClosure *)ccalling_threadss[i]);
+ }
+ markEventQueue();
+#elif defined(PAR)
+ run_queue_hd = (StgTSO *)MarkRoot((StgClosure *)run_queue_hd);
+ run_queue_tl = (StgTSO *)MarkRoot((StgClosure *)run_queue_tl);
+ blocked_queue_hd = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hd);
+ blocked_queue_tl = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tl);
+#else
run_queue_hd = (StgTSO *)MarkRoot((StgClosure *)run_queue_hd);
run_queue_tl = (StgTSO *)MarkRoot((StgClosure *)run_queue_tl);
blocked_queue_hd = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hd);
blocked_queue_tl = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tl);
+#endif
for (m = main_threads; m != NULL; m = m->link) {
m->tso = (StgTSO *)MarkRoot((StgClosure *)m->tso);
return dest;
}
-/* -----------------------------------------------------------------------------
+//@node Blocking Queue Routines, Exception Handling Routines, Garbage Collextion Routines, Main scheduling code
+//@subsection Blocking Queue Routines
+
+/* ---------------------------------------------------------------------------
Wake up a queue that was blocked on some resource.
- -------------------------------------------------------------------------- */
+ ------------------------------------------------------------------------ */
+
+// ToDo: check push_on_run_queue vs. PUSH_ON_RUN_QUEUE
+
+#if defined(GRAN)
+# error FixME
+#elif defined(PAR)
+static inline void
+unblockCount ( StgBlockingQueueElement *bqe, StgClosure *node )
+{
+ /* write RESUME events to log file and
+ update blocked and fetch time (depending on type of the orig closure) */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
+ GR_RESUMEQ, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure,
+ 0, 0 /* spark_queue_len(ADVISORY_POOL) */);
+
+ switch (get_itbl(node)->type) {
+ case FETCH_ME_BQ:
+ ((StgTSO *)bqe)->par.fetchtime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat;
+ break;
+ case RBH:
+ case FETCH_ME:
+ case BLACKHOLE_BQ:
+ ((StgTSO *)bqe)->par.blocktime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat;
+ break;
+ default:
+ barf("{unblockOneLocked}Daq Qagh: unexpected closure in blocking queue");
+ }
+ }
+}
+#endif
+
+#if defined(GRAN)
+# error FixME
+#elif defined(PAR)
+static StgBlockingQueueElement *
+unblockOneLocked(StgBlockingQueueElement *bqe, StgClosure *node)
+{
+ StgBlockingQueueElement *next;
+
+ switch (get_itbl(bqe)->type) {
+ case TSO:
+ ASSERT(((StgTSO *)bqe)->why_blocked != NotBlocked);
+ /* if it's a TSO just push it onto the run_queue */
+ next = bqe->link;
+ // ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging?
+ PUSH_ON_RUN_QUEUE((StgTSO *)bqe);
+ THREAD_RUNNABLE();
+ unblockCount(bqe, node);
+ /* reset blocking status after dumping event */
+ ((StgTSO *)bqe)->why_blocked = NotBlocked;
+ break;
+
+ case BLOCKED_FETCH:
+ /* if it's a BLOCKED_FETCH put it on the PendingFetches list */
+ next = bqe->link;
+ bqe->link = PendingFetches;
+ PendingFetches = bqe;
+ break;
+# if defined(DEBUG)
+ /* can ignore this case in a non-debugging setup;
+ see comments on RBHSave closures above */
+ case CONSTR:
+ /* check that the closure is an RBHSave closure */
+ ASSERT(get_itbl((StgClosure *)bqe) == &RBH_Save_0_info ||
+ get_itbl((StgClosure *)bqe) == &RBH_Save_1_info ||
+ get_itbl((StgClosure *)bqe) == &RBH_Save_2_info);
+ break;
+
+ default:
+ barf("{unblockOneLocked}Daq Qagh: Unexpected IP (%#lx; %s) in blocking queue at %#lx\n",
+ get_itbl((StgClosure *)bqe), info_type((StgClosure *)bqe),
+ (StgClosure *)bqe);
+# endif
+ }
+ // IF_DEBUG(scheduler,sched_belch("waking up thread %ld", tso->id));
+ return next;
+}
+
+#else /* !GRAN && !PAR */
static StgTSO *
unblockOneLocked(StgTSO *tso)
{
IF_DEBUG(scheduler,sched_belch("waking up thread %ld", tso->id));
return next;
}
+#endif
+#if defined(GRAN)
+# error FixME
+#elif defined(PAR)
+inline StgTSO *
+unblockOne(StgTSO *tso, StgClosure *node)
+{
+ ACQUIRE_LOCK(&sched_mutex);
+ tso = unblockOneLocked(tso, node);
+ RELEASE_LOCK(&sched_mutex);
+ return tso;
+}
+#else
inline StgTSO *
unblockOne(StgTSO *tso)
{
RELEASE_LOCK(&sched_mutex);
return tso;
}
+#endif
+#if defined(GRAN)
+# error FixME
+#elif defined(PAR)
+void
+awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node)
+{
+ StgBlockingQueueElement *bqe, *next;
+
+ ACQUIRE_LOCK(&sched_mutex);
+
+ IF_PAR_DEBUG(verbose,
+ belch("## AwBQ for node %p on [%x]: ",
+ node, mytid));
+
+ ASSERT(get_itbl(q)->type == TSO ||
+ get_itbl(q)->type == BLOCKED_FETCH ||
+ get_itbl(q)->type == CONSTR);
+
+ bqe = q;
+ while (get_itbl(bqe)->type==TSO ||
+ get_itbl(bqe)->type==BLOCKED_FETCH) {
+ bqe = unblockOneLocked(bqe, node);
+ }
+ RELEASE_LOCK(&sched_mutex);
+}
+
+#else /* !GRAN && !PAR */
void
awakenBlockedQueue(StgTSO *tso)
{
}
RELEASE_LOCK(&sched_mutex);
}
+#endif
-/* -----------------------------------------------------------------------------
+#if 0
+// ngoq ngo'
+
+#if defined(GRAN)
+/*
+ Awakening a blocking queue in GranSim means checking for each of the
+ TSOs in the queue whether they are local or not, issuing a ResumeThread
+ or an UnblockThread event, respectively. The basic iteration over the
+ blocking queue is the same as in the standard setup.
+*/
+void
+awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node)
+{
+ StgBlockingQueueElement *bqe, *next;
+ StgTSO *tso;
+ PEs node_loc, tso_loc;
+ rtsTime bq_processing_time = 0;
+ nat len = 0, len_local = 0;
+
+ IF_GRAN_DEBUG(bq,
+ belch("## AwBQ for node %p on PE %d @ %ld by TSO %d (%p): ", \
+ node, CurrentProc, CurrentTime[CurrentProc],
+ CurrentTSO->id, CurrentTSO));
+
+ node_loc = where_is(node);
+
+ ASSERT(get_itbl(q)->type == TSO || // q is either a TSO or an RBHSave
+ get_itbl(q)->type == CONSTR); // closure (type constructor)
+ ASSERT(is_unique(node));
+
+ /* FAKE FETCH: magically copy the node to the tso's proc;
+ no Fetch necessary because in reality the node should not have been
+ moved to the other PE in the first place
+ */
+ if (CurrentProc!=node_loc) {
+ IF_GRAN_DEBUG(bq,
+ belch("## node %p is on PE %d but CurrentProc is %d (TSO %d); assuming fake fetch and adjusting bitmask (old: %#x)",
+ node, node_loc, CurrentProc, CurrentTSO->id,
+ // CurrentTSO, where_is(CurrentTSO),
+ node->header.gran.procs));
+ node->header.gran.procs = (node->header.gran.procs) | PE_NUMBER(CurrentProc);
+ IF_GRAN_DEBUG(bq,
+ belch("## new bitmask of node %p is %#x",
+ node, node->header.gran.procs));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_fake_fetches++;
+ }
+ }
+
+ next = q;
+ // ToDo: check: ASSERT(CurrentProc==node_loc);
+ while (get_itbl(next)->type==TSO) { // q != END_TSO_QUEUE) {
+ bqe = next;
+ next = bqe->link;
+ /*
+ bqe points to the current element in the queue
+ next points to the next element in the queue
+ */
+ tso = (StgTSO *)bqe; // wastes an assignment to get the type right
+ tso_loc = where_is(tso);
+ if (IS_LOCAL_TO(PROCS(node),tso_loc)) { // TSO is local
+ /* !fake_fetch => TSO is on CurrentProc is same as IS_LOCAL_TO */
+ ASSERT(CurrentProc!=node_loc || tso_loc==CurrentProc);
+ bq_processing_time += RtsFlags.GranFlags.Costs.lunblocktime;
+ // insertThread(tso, node_loc);
+ new_event(tso_loc, tso_loc,
+ CurrentTime[CurrentProc]+bq_processing_time,
+ ResumeThread,
+ tso, node, (rtsSpark*)NULL);
+ tso->link = END_TSO_QUEUE; // overwrite link just to be sure
+ len_local++;
+ len++;
+ } else { // TSO is remote (actually should be FMBQ)
+ bq_processing_time += RtsFlags.GranFlags.Costs.mpacktime;
+ bq_processing_time += RtsFlags.GranFlags.Costs.gunblocktime;
+ new_event(tso_loc, CurrentProc,
+ CurrentTime[CurrentProc]+bq_processing_time+
+ RtsFlags.GranFlags.Costs.latency,
+ UnblockThread,
+ tso, node, (rtsSpark*)NULL);
+ tso->link = END_TSO_QUEUE; // overwrite link just to be sure
+ bq_processing_time += RtsFlags.GranFlags.Costs.mtidytime;
+ len++;
+ }
+ /* the thread-queue-overhead is accounted for in either Resume or UnblockThread */
+ IF_GRAN_DEBUG(bq,
+ fprintf(stderr," %s TSO %d (%p) [PE %d] (blocked_on=%p) (next=%p) ,",
+ (node_loc==tso_loc ? "Local" : "Global"),
+ tso->id, tso, CurrentProc, tso->block_info.closure, tso->link))
+ tso->block_info.closure = NULL;
+ IF_DEBUG(scheduler,belch("-- Waking up thread %ld (%p)",
+ tso->id, tso));
+ }
+
+ /* if this is the BQ of an RBH, we have to put back the info ripped out of
+ the closure to make room for the anchor of the BQ */
+ if (next!=END_BQ_QUEUE) {
+ ASSERT(get_itbl(node)->type == RBH && get_itbl(next)->type == CONSTR);
+ /*
+ ASSERT((info_ptr==&RBH_Save_0_info) ||
+ (info_ptr==&RBH_Save_1_info) ||
+ (info_ptr==&RBH_Save_2_info));
+ */
+ /* cf. convertToRBH in RBH.c for writing the RBHSave closure */
+ ((StgRBH *)node)->blocking_queue = ((StgRBHSave *)next)->payload[0];
+ ((StgRBH *)node)->mut_link = ((StgRBHSave *)next)->payload[1];
+
+ IF_GRAN_DEBUG(bq,
+ belch("## Filled in RBH_Save for %p (%s) at end of AwBQ",
+ node, info_type(node)));
+ }
+
+ /* statistics gathering */
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_bq_processing_time += bq_processing_time;
+ globalGranStats.tot_bq_len += len; // total length of all bqs awakened
+ globalGranStats.tot_bq_len_local += len_local; // same for local TSOs only
+ globalGranStats.tot_awbq++; // total no. of bqs awakened
+ }
+ IF_GRAN_DEBUG(bq,
+ fprintf(stderr,"## BQ Stats of %p: [%d entries, %d local] %s\n",
+ node, len, len_local, (next!=END_TSO_QUEUE) ? "RBH" : ""));
+}
+
+#elif defined(PAR)
+
+/*
+ Awakening a blocking queue in GUM has to check whether an entry in the
+ queue is a normal TSO or a BLOCKED_FETCH. The later indicates that a TSO is
+ waiting for the result of this computation on another PE. Thus, when
+ finding a BLOCKED_FETCH we have to send off a message to that PE.
+ Actually, we defer sending off a message, by just putting the BLOCKED_FETCH
+ onto the PendingFetches queue, which will be later traversed by
+ processFetches, sending off a RESUME message for each BLOCKED_FETCH.
+
+ NB: There is no check for an RBHSave closure (type CONSTR) in the code
+ below. The reason is, if we awaken the BQ of an RBH closure (RBHSaves
+ only exist at the end of such BQs) we know that the closure has been
+ unpacked successfully on the other PE, and we can discard the info
+ contained in the RBHSave closure. The current closure will be turned
+ into a FetchMe closure anyway.
+*/
+void
+awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node)
+{
+ StgBlockingQueueElement *bqe, *next;
+
+ IF_PAR_DEBUG(verbose,
+ belch("## AwBQ for node %p on [%x]: ",
+ node, mytid));
+
+ ASSERT(get_itbl(q)->type == TSO ||
+ get_itbl(q)->type == BLOCKED_FETCH ||
+ get_itbl(q)->type == CONSTR);
+
+ next = q;
+ while (get_itbl(next)->type==TSO ||
+ get_itbl(next)->type==BLOCKED_FETCH) {
+ bqe = next;
+ switch (get_itbl(bqe)->type) {
+ case TSO:
+ /* if it's a TSO just push it onto the run_queue */
+ next = bqe->link;
+#if defined(DEBUG)
+ ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging only
+#endif
+ push_on_run_queue((StgTSO *)bqe); // HWL: was: PUSH_ON_RUN_QUEUE(tso);
+
+ /* write RESUME events to log file and
+ update blocked and fetch time (depending on type of the orig closure) */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
+ GR_RESUMEQ, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure,
+ 0, spark_queue_len(ADVISORY_POOL));
+
+ switch (get_itbl(node)->type) {
+ case FETCH_ME_BQ:
+ ((StgTSO *)bqe)->par.fetchtime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat;
+ break;
+ case RBH:
+ case FETCH_ME:
+ case BLACKHOLE_BQ:
+ ((StgTSO *)bqe)->par.blocktime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat;
+ break;
+ default:
+ barf("{awaken_blocked_queue}Daq Qagh: unexpected closure %p (%s) with blocking queue",
+ node, info_type(node));
+ }
+ }
+ /* reset block_info.closure field after dumping event */
+ ((StgTSO *)bqe)->block_info.closure = NULL;
+
+ /* rest of this branch is debugging only */
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr," TSO %d (%p) [PE %lx] (block_info.closure=%p) (next=%p) ,",
+ ((StgTSO *)bqe)->id, (StgTSO *)bqe,
+ mytid, ((StgTSO *)bqe)->block_info.closure, ((StgTSO *)bqe)->link));
+
+ IF_DEBUG(scheduler,
+ if (!RtsFlags.ParFlags.Debug.verbose)
+ belch("-- Waking up thread %ld (%p)",
+ ((StgTSO *)bqe)->id, (StgTSO *)bqe));
+ break;
+
+ case BLOCKED_FETCH:
+ /* if it's a BLOCKED_FETCH put it on the PendingFetches list */
+ next = bqe->link;
+ bqe->link = PendingFetches;
+ PendingFetches = bqe;
+ // bqe.tso->block_info.closure = NULL;
+
+ /* rest of this branch is debugging only */
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr," BLOCKED_FETCH (%p) on node %p [PE %lx] (next=%p) ,",
+ ((StgBlockedFetch *)bqe),
+ ((StgBlockedFetch *)bqe)->node,
+ mytid, ((StgBlockedFetch *)bqe)->link));
+ break;
+
+# if defined(DEBUG)
+ /* can ignore this case in a non-debugging setup;
+ see comments on RBHSave closures above */
+ case CONSTR:
+ /* check that the closure is an RBHSave closure */
+ ASSERT(get_itbl((StgClosure *)bqe) == &RBH_Save_0_info ||
+ get_itbl((StgClosure *)bqe) == &RBH_Save_1_info ||
+ get_itbl((StgClosure *)bqe) == &RBH_Save_2_info);
+ break;
+
+ default:
+ barf("{awaken_blocked_queue}Daq Qagh: Unexpected IP (%#lx; %s) in blocking queue at %#lx\n",
+ get_itbl((StgClosure *)bqe), info_type((StgClosure *)bqe),
+ (StgClosure *)bqe);
+# endif
+ }
+ }
+}
+
+#else /* !GRAN && !PAR */
+
+void
+awaken_blocked_queue(StgTSO *q) { awakenBlockedQueue(q); }
+
+/*
+{
+ StgTSO *tso;
+
+ while (q != END_TSO_QUEUE) {
+ ASSERT(get_itbl(q)->type == TSO);
+ tso = q;
+ q = tso->link;
+ push_on_run_queue(tso); // HWL: was: PUSH_ON_RUN_QUEUE(tso);
+ //tso->block_info.closure = NULL;
+ IF_DEBUG(scheduler, belch("-- Waking up thread %ld (%p)", tso->id, tso));
+ }
+}
+*/
+#endif /* GRAN */
+#endif /* 0 */
+
+//@node Exception Handling Routines, Debugging Routines, Blocking Queue Routines, Main scheduling code
+//@subsection Exception Handling Routines
+
+/* ---------------------------------------------------------------------------
Interrupt execution
- usually called inside a signal handler so it mustn't do anything fancy.
- -------------------------------------------------------------------------- */
+ ------------------------------------------------------------------------ */
void
interruptStgRts(void)
This is for use when we raise an exception in another thread, which
may be blocked.
+ This has nothing to do with the UnblockThread event in GranSim. -- HWL
-------------------------------------------------------------------------- */
static void
barf("raiseAsync");
}
+//@node Debugging Routines, Index, Exception Handling Routines, Main scheduling code
+//@subsection Debugging Routines
+
/* -----------------------------------------------------------------------------
- Debuggery...
+ Debugging: why is a thread blocked
-------------------------------------------------------------------------- */
#ifdef DEBUG
+
+void printThreadBlockage(StgTSO *tso)
+{
+ switch (tso->why_blocked) {
+ case BlockedOnRead:
+ fprintf(stderr,"blocked on read from fd %d", tso->block_info.fd);
+ break;
+ case BlockedOnWrite:
+ fprintf(stderr,"blocked on write to fd %d", tso->block_info.fd);
+ break;
+ case BlockedOnDelay:
+ fprintf(stderr,"blocked on delay of %d ms", tso->block_info.delay);
+ break;
+ case BlockedOnMVar:
+ fprintf(stderr,"blocked on an MVar");
+ break;
+ case BlockedOnException:
+ fprintf(stderr,"blocked on delivering an exception to thread %d",
+ tso->block_info.tso->id);
+ break;
+ case BlockedOnBlackHole:
+ fprintf(stderr,"blocked on a black hole");
+ break;
+ case NotBlocked:
+ fprintf(stderr,"not blocked");
+ break;
+#if defined(PAR)
+ case BlockedOnGA:
+ fprintf(stderr,"blocked on global address");
+ break;
+#endif
+ }
+}
+
+/*
+ Print a whole blocking queue attached to node (debugging only).
+*/
+//@cindex print_bq
+# if defined(PAR)
+void
+print_bq (StgClosure *node)
+{
+ StgBlockingQueueElement *bqe;
+ StgTSO *tso;
+ rtsBool end;
+
+ fprintf(stderr,"## BQ of closure %p (%s): ",
+ node, info_type(node));
+
+ /* should cover all closures that may have a blocking queue */
+ ASSERT(get_itbl(node)->type == BLACKHOLE_BQ ||
+ get_itbl(node)->type == FETCH_ME_BQ ||
+ get_itbl(node)->type == RBH);
+
+ ASSERT(node!=(StgClosure*)NULL); // sanity check
+ /*
+ NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure;
+ */
+ for (bqe = ((StgBlockingQueue*)node)->blocking_queue, end = (bqe==END_BQ_QUEUE);
+ !end; // iterate until bqe points to a CONSTR
+ end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE), bqe = end ? END_BQ_QUEUE : bqe->link) {
+ ASSERT(bqe != END_BQ_QUEUE); // sanity check
+ ASSERT(bqe != (StgTSO*)NULL); // sanity check
+ /* types of closures that may appear in a blocking queue */
+ ASSERT(get_itbl(bqe)->type == TSO ||
+ get_itbl(bqe)->type == BLOCKED_FETCH ||
+ get_itbl(bqe)->type == CONSTR);
+ /* only BQs of an RBH end with an RBH_Save closure */
+ ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH);
+
+ switch (get_itbl(bqe)->type) {
+ case TSO:
+ fprintf(stderr," TSO %d (%x),",
+ ((StgTSO *)bqe)->id, ((StgTSO *)bqe));
+ break;
+ case BLOCKED_FETCH:
+ fprintf(stderr," BF (node=%p, ga=((%x, %d, %x)),",
+ ((StgBlockedFetch *)bqe)->node,
+ ((StgBlockedFetch *)bqe)->ga.payload.gc.gtid,
+ ((StgBlockedFetch *)bqe)->ga.payload.gc.slot,
+ ((StgBlockedFetch *)bqe)->ga.weight);
+ break;
+ case CONSTR:
+ fprintf(stderr," %s (IP %p),",
+ (get_itbl(bqe) == &RBH_Save_0_info ? "RBH_Save_0" :
+ get_itbl(bqe) == &RBH_Save_1_info ? "RBH_Save_1" :
+ get_itbl(bqe) == &RBH_Save_2_info ? "RBH_Save_2" :
+ "RBH_Save_?"), get_itbl(bqe));
+ break;
+ default:
+ barf("Unexpected closure type %s in blocking queue of %p (%s)",
+ info_type(bqe), node, info_type(node));
+ break;
+ }
+ } /* for */
+ fputc('\n', stderr);
+}
+# elif defined(GRAN)
+void
+print_bq (StgClosure *node)
+{
+ StgBlockingQueueElement *bqe;
+ StgTSO *tso;
+ PEs node_loc, tso_loc;
+ rtsBool end;
+
+ /* should cover all closures that may have a blocking queue */
+ ASSERT(get_itbl(node)->type == BLACKHOLE_BQ ||
+ get_itbl(node)->type == FETCH_ME_BQ ||
+ get_itbl(node)->type == RBH);
+
+ ASSERT(node!=(StgClosure*)NULL); // sanity check
+ node_loc = where_is(node);
+
+ fprintf(stderr,"## BQ of closure %p (%s) on [PE %d]: ",
+ node, info_type(node), node_loc);
+
+ /*
+ NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure;
+ */
+ for (bqe = ((StgBlockingQueue*)node)->blocking_queue, end = (bqe==END_BQ_QUEUE);
+ !end; // iterate until bqe points to a CONSTR
+ end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE), bqe = end ? END_BQ_QUEUE : bqe->link) {
+ ASSERT(bqe != END_BQ_QUEUE); // sanity check
+ ASSERT(bqe != (StgTSO*)NULL); // sanity check
+ /* types of closures that may appear in a blocking queue */
+ ASSERT(get_itbl(bqe)->type == TSO ||
+ get_itbl(bqe)->type == CONSTR);
+ /* only BQs of an RBH end with an RBH_Save closure */
+ ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH);
+
+ tso_loc = where_is((StgClosure *)bqe);
+ switch (get_itbl(bqe)->type) {
+ case TSO:
+ fprintf(stderr," TSO %d (%x) on [PE %d],",
+ ((StgTSO *)bqe)->id, ((StgTSO *)bqe), tso_loc);
+ break;
+ case CONSTR:
+ fprintf(stderr," %s (IP %p),",
+ (get_itbl(bqe) == &RBH_Save_0_info ? "RBH_Save_0" :
+ get_itbl(bqe) == &RBH_Save_1_info ? "RBH_Save_1" :
+ get_itbl(bqe) == &RBH_Save_2_info ? "RBH_Save_2" :
+ "RBH_Save_?"), get_itbl(bqe));
+ break;
+ default:
+ barf("Unexpected closure type %s in blocking queue of %p (%s)",
+ info_type(bqe), node, info_type(node));
+ break;
+ }
+ } /* for */
+ fputc('\n', stderr);
+}
+#else
+/*
+ Nice and easy: only TSOs on the blocking queue
+*/
+void
+print_bq (StgClosure *node)
+{
+ StgTSO *tso;
+
+ ASSERT(node!=(StgClosure*)NULL); // sanity check
+ for (tso = ((StgBlockingQueue*)node)->blocking_queue;
+ tso != END_TSO_QUEUE;
+ tso=tso->link) {
+ ASSERT(tso!=(StgTSO*)NULL && tso!=END_TSO_QUEUE); // sanity check
+ ASSERT(get_itbl(tso)->type == TSO); // guess what, sanity check
+ fprintf(stderr," TSO %d (%x),", tso->id, tso);
+ }
+ fputc('\n', stderr);
+}
+# endif
+
+/* A debugging function used all over the place in GranSim and GUM.
+ Dummy function in other setups.
+*/
+# if !defined(GRAN) && !defined(PAR)
+char *
+info_type(StgClosure *closure){
+ return "petaQ";
+}
+
+char *
+info_type_by_ip(StgInfoTable *ip){
+ return "petaQ";
+}
+#endif
+
static void
sched_belch(char *s, ...)
{
vfprintf(stderr, s, ap);
fprintf(stderr, "\n");
}
-#endif
+
+#endif /* DEBUG */
+
+//@node Index, , Debugging Routines, Main scheduling code
+//@subsection Index
+
+//@index
+//* MainRegTable:: @cindex\s-+MainRegTable
+//* StgMainThread:: @cindex\s-+StgMainThread
+//* awaken_blocked_queue:: @cindex\s-+awaken_blocked_queue
+//* blocked_queue_hd:: @cindex\s-+blocked_queue_hd
+//* blocked_queue_tl:: @cindex\s-+blocked_queue_tl
+//* context_switch:: @cindex\s-+context_switch
+//* createThread:: @cindex\s-+createThread
+//* free_capabilities:: @cindex\s-+free_capabilities
+//* gc_pending_cond:: @cindex\s-+gc_pending_cond
+//* initScheduler:: @cindex\s-+initScheduler
+//* interrupted:: @cindex\s-+interrupted
+//* n_free_capabilities:: @cindex\s-+n_free_capabilities
+//* next_thread_id:: @cindex\s-+next_thread_id
+//* print_bq:: @cindex\s-+print_bq
+//* run_queue_hd:: @cindex\s-+run_queue_hd
+//* run_queue_tl:: @cindex\s-+run_queue_tl
+//* sched_mutex:: @cindex\s-+sched_mutex
+//* schedule:: @cindex\s-+schedule
+//* take_off_run_queue:: @cindex\s-+take_off_run_queue
+//* task_ids:: @cindex\s-+task_ids
+//* term_mutex:: @cindex\s-+term_mutex
+//* thread_ready_cond:: @cindex\s-+thread_ready_cond
+//@end index
/* -----------------------------------------------------------------------------
- * $Id: Schedule.h,v 1.12 2000/01/12 15:15:18 simonmar Exp $
+ * $Id: Schedule.h,v 1.13 2000/01/13 14:34:05 hwloidl Exp $
*
* (c) The GHC Team 1998-1999
*
* Prototypes for functions in Schedule.c
* (RTS internal scheduler interface)
*
- * ---------------------------------------------------------------------------*/
+ * -------------------------------------------------------------------------*/
+//@menu
+//* Scheduler Functions::
+//* Scheduler Vars and Data Types::
+//* Some convenient macros::
+//* Index::
+//@end menu
+
+//@node Scheduler Functions, Scheduler Vars and Data Types
+//@subsection Scheduler Functions
+
+//@cindex initScheduler
+//@cindex exitScheduler
+//@cindex startTasks
/* initScheduler(), exitScheduler(), startTasks()
*
* Called from STG : no
void startTasks( void );
#endif
+//@cindex awakenBlockedQueue
/* awakenBlockedQueue()
*
* Takes a pointer to the beginning of a blocked TSO queue, and
* Called from STG : yes
* Locks assumed : none
*/
+#if defined(GRAN)
+# error FixME
+#elif defined(PAR)
+void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
+#else
void awakenBlockedQueue(StgTSO *tso);
+#endif
+//@cindex unblockOne
/* unblockOne()
*
* Takes a pointer to the beginning of a blocked TSO queue, and
* Called from STG : yes
* Locks assumed : none
*/
+#if defined(GRAN)
+# error FixME
+#elif defined(PAR)
+StgTSO *unblockOne(StgTSO *tso, StgClosure *node);
+#else
StgTSO *unblockOne(StgTSO *tso);
+#endif
+//@cindex raiseAsync
/* raiseAsync()
*
* Raises an exception asynchronously in the specified thread.
*/
void raiseAsync(StgTSO *tso, StgClosure *exception);
+//@cindex awaitEvent
/* awaitEvent()
*
* Raises an exception asynchronously in the specified thread.
*/
void awaitEvent(rtsBool wait); /* In Select.c */
+// ToDo: check whether all fcts below are used in the SMP version, too
+//@cindex awaken_blocked_queue
+#if defined(GRAN)
+void awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node);
+void unlink_from_bq(StgTSO* tso, StgClosure* node);
+void initThread(StgTSO *tso, nat stack_size, StgInt pri);
+#elif defined(PAR)
+nat run_queue_len(void);
+void awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node);
+void initThread(StgTSO *tso, nat stack_size);
+#else
+char *info_type(StgClosure *closure); // dummy
+char *info_type_by_ip(StgInfoTable *ip); // dummy
+void awaken_blocked_queue(StgTSO *q);
+void initThread(StgTSO *tso, nat stack_size);
+#endif
+
+// debugging only
+#ifdef DEBUG
+extern void printThreadBlockage(StgTSO *tso);
+#endif
+void print_bq (StgClosure *node);
+
+//@node Scheduler Vars and Data Types, Some convenient macros, Scheduler Functions
+//@subsection Scheduler Vars and Data Types
+
+//@cindex context_switch
/* Context switch flag.
* Locks required : sched_mutex
*/
extern nat ticks_since_select;
+//@cindex Capability
/* Capability type
*/
typedef StgRegTable Capability;
extern StgTSO *run_queue_hd, *run_queue_tl;
extern StgTSO *blocked_queue_hd, *blocked_queue_tl;
-#ifdef DEBUG
-extern void printThreadBlockage(StgTSO *tso);
-#endif
-
#ifdef SMP
+//@cindex sched_mutex
+//@cindex thread_ready_cond
+//@cindex gc_pending_cond
extern pthread_mutex_t sched_mutex;
extern pthread_cond_t thread_ready_cond;
extern pthread_cond_t gc_pending_cond;
#endif
+//@cindex task_info
#ifdef SMP
typedef struct {
pthread_t id;
extern task_info *task_ids;
#endif
+#if !defined(GRAN)
+extern StgTSO *run_queue_hd, *run_queue_tl;
+extern StgTSO *blocked_queue_hd, *blocked_queue_tl;
+#endif
+
/* Needed by Hugs.
*/
void interruptStgRts ( void );
+// ?? needed -- HWL
+void raiseAsync(StgTSO *tso, StgClosure *exception);
+
+//@node Some convenient macros, Index, Scheduler Vars and Data Types
+//@subsection Some convenient macros
/* -----------------------------------------------------------------------------
* Some convenient macros...
#define END_TSO_QUEUE ((StgTSO *)(void*)&END_TSO_QUEUE_closure)
#define END_CAF_LIST ((StgCAF *)(void*)&END_TSO_QUEUE_closure)
+//@cindex APPEND_TO_RUN_QUEUE
/* Add a thread to the end of the run queue.
* NOTE: tso->link should be END_TSO_QUEUE before calling this macro.
*/
} \
run_queue_tl = tso;
+//@cindex PUSH_ON_RUN_QUEUE
/* Push a thread on the beginning of the run queue. Used for
* newly awakened threads, so they get run as soon as possible.
*/
if (run_queue_tl == END_TSO_QUEUE) { \
run_queue_tl = tso; \
}
-
+
+//@cindex POP_RUN_QUEUE
/* Pop the first thread off the runnable queue.
*/
#define POP_RUN_QUEUE() \
t; \
})
+//@cindex APPEND_TO_BLOCKED_QUEUE
/* Add a thread to the end of the blocked queue.
*/
#define APPEND_TO_BLOCKED_QUEUE(tso) \
} \
blocked_queue_tl = tso;
+//@cindex THREAD_RUNNABLE
/* Signal that a runnable thread has become available, in
* case there are any waiting tasks to execute it.
*/
#define THREAD_RUNNABLE() /* nothing */
#endif
+//@node Index, , Some convenient macros
+//@subsection Index
+
+//@index
+//* APPEND_TO_BLOCKED_QUEUE:: @cindex\s-+APPEND_TO_BLOCKED_QUEUE
+//* APPEND_TO_RUN_QUEUE:: @cindex\s-+APPEND_TO_RUN_QUEUE
+//* Capability:: @cindex\s-+Capability
+//* POP_RUN_QUEUE :: @cindex\s-+POP_RUN_QUEUE
+//* PUSH_ON_RUN_QUEUE:: @cindex\s-+PUSH_ON_RUN_QUEUE
+//* THREAD_RUNNABLE:: @cindex\s-+THREAD_RUNNABLE
+//* awaitEvent:: @cindex\s-+awaitEvent
+//* awakenBlockedQueue:: @cindex\s-+awakenBlockedQueue
+//* awaken_blocked_queue:: @cindex\s-+awaken_blocked_queue
+//* context_switch:: @cindex\s-+context_switch
+//* exitScheduler:: @cindex\s-+exitScheduler
+//* gc_pending_cond:: @cindex\s-+gc_pending_cond
+//* initScheduler:: @cindex\s-+initScheduler
+//* raiseAsync:: @cindex\s-+raiseAsync
+//* sched_mutex:: @cindex\s-+sched_mutex
+//* startTasks:: @cindex\s-+startTasks
+//* task_info:: @cindex\s-+task_info
+//* thread_ready_cond:: @cindex\s-+thread_ready_cond
+//* unblockOne:: @cindex\s-+unblockOne
+//@end index
/* -----------------------------------------------------------------------------
- * $Id: StgMiscClosures.hc,v 1.30 1999/11/30 11:43:26 simonmar Exp $
+ * $Id: StgMiscClosures.hc,v 1.31 2000/01/13 14:34:05 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
#include "Rts.h"
#include "RtsUtils.h"
+#include "RtsFlags.h"
#include "StgMiscClosures.h"
#include "HeapStackCheck.h" /* for stg_gen_yield */
#include "Storage.h"
#include "StoragePriv.h"
#include "ProfRts.h"
#include "SMP.h"
+#if defined(GRAN) || defined(PAR)
+# include "GranSimRts.h" /* for DumpRawGranEvent */
+# include "StgRun.h" /* for StgReturn and register saving */
+#endif
#ifdef HAVE_STDIO_H
#include <stdio.h>
*/
#define DUMP_ERRMSG(msg) STGCALL2(fprintf,stderr,msg)
+/*
+ Template for the entry code of non-enterable closures.
+*/
+
+#define NON_ENTERABLE_ENTRY_CODE(type) \
+STGFUN(type##_entry) \
+{ \
+ FB_ \
+ DUMP_ERRMSG(#type " object entered!\n"); \
+ STGCALL1(raiseError, errorHandler); \
+ stg_exit(EXIT_FAILURE); /* not executed */ \
+ FE_ \
+}
+
/* -----------------------------------------------------------------------------
Entry code for an indirection.
STGFUN(BLACKHOLE_entry)
{
FB_
+#if defined(GRAN)
+ /* Before overwriting TSO_LINK */
+ STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/);
+#endif
+
#ifdef SMP
CMPXCHG(R1.cl->header.info, &BLACKHOLE_info, &WHITEHOLE_info);
#endif
TICK_ENT_BH();
/* Put ourselves on the blocking queue for this black hole */
- CurrentTSO->link = (StgTSO *)&END_TSO_QUEUE_closure;
+#if defined(GRAN) || defined(PAR)
+ /* in fact, only difference is the type of the end-of-queue marker! */
+ CurrentTSO->link = END_BQ_QUEUE;
+ ((StgBlockingQueue *)R1.p)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO;
+#else
+ CurrentTSO->link = END_TSO_QUEUE;
((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO;
+#endif
+ /* jot down why and on what closure we are blocked */
CurrentTSO->why_blocked = BlockedOnBlackHole;
CurrentTSO->block_info.closure = R1.cl;
+ /* closure is mutable since something has just been added to its BQ */
recordMutable((StgMutClosure *)R1.cl);
/* Change the BLACKHOLE into a BLACKHOLE_BQ */
((StgBlockingQueue *)R1.p)->header.info = &BLACKHOLE_BQ_info;
+
+#if defined(PAR)
+ /* Save the Thread State here, before calling RTS routines below! */
+ SAVE_THREAD_STATE(1);
+
+ /* if collecting stats update the execution time etc */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ //rtsTime now = CURRENT_TIME; /* Now */
+ CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat;
+ CurrentTSO->par.blockcount++;
+ CurrentTSO->par.blockedat = CURRENT_TIME;
+ DumpRawGranEvent(CURRENT_PROC, thisPE,
+ GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0);
+ }
+
+ THREAD_RETURN(1); /* back to the scheduler */
+#else
/* stg_gen_block is too heavyweight, use a specialised one */
BLOCK_NP(1);
+#endif
+
FE_
}
STGFUN(BLACKHOLE_BQ_entry)
{
FB_
+#if defined(GRAN)
+ /* Before overwriting TSO_LINK */
+ STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/);
+#endif
+
#ifdef SMP
CMPXCHG(R1.cl->header.info, &BLACKHOLE_BQ_info, &WHITEHOLE_info);
#endif
TICK_ENT_BH();
/* Put ourselves on the blocking queue for this black hole */
- CurrentTSO->why_blocked = BlockedOnBlackHole;
- CurrentTSO->block_info.closure = R1.cl;
CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue;
((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO;
+ /* jot down why and on what closure we are blocked */
+ CurrentTSO->why_blocked = BlockedOnBlackHole;
+ CurrentTSO->block_info.closure = R1.cl;
#ifdef SMP
((StgBlockingQueue *)R1.p)->header.info = &BLACKHOLE_BQ_info;
#endif
+#if defined(PAR)
+ /* Save the Thread State here, before calling RTS routines below! */
+ SAVE_THREAD_STATE(1);
+
+ /* if collecting stats update the execution time etc */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ //rtsTime now = CURRENT_TIME; /* Now */
+ CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat;
+ CurrentTSO->par.blockcount++;
+ CurrentTSO->par.blockedat = CURRENT_TIME;
+ DumpRawGranEvent(CURRENT_PROC, thisPE,
+ GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0);
+ }
+
+ THREAD_RETURN(1); /* back to the scheduler */
+#else
/* stg_gen_block is too heavyweight, use a specialised one */
BLOCK_NP(1);
+#endif
+ FE_
+}
+
+/*
+ Revertible black holes are needed in the parallel world, to handle
+ negative acknowledgements of messages containing updatable closures.
+ The idea is that when the original message is transmitted, the closure
+ is turned into a revertible black hole...an object which acts like a
+ black hole when local threads try to enter it, but which can be reverted
+ back to the original closure if necessary.
+
+ It's actually a lot like a blocking queue (BQ) entry, because revertible
+ black holes are initially set up with an empty blocking queue.
+*/
+
+#if defined(PAR) || defined(GRAN)
+
+INFO_TABLE(RBH_info, RBH_entry,1,1,RBH,,EF_,0,0);
+STGFUN(RBH_entry)
+{
+ FB_
+# if defined(GRAN)
+ /* mainly statistics gathering for GranSim simulation */
+ STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/);
+# endif
+
+ /* exactly the same as a BLACKHOLE_BQ_entry -- HWL */
+ /* Put ourselves on the blocking queue for this black hole */
+ CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue;
+ ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO;
+ /* jot down why and on what closure we are blocked */
+ CurrentTSO->why_blocked = BlockedOnBlackHole;
+ CurrentTSO->block_info.closure = R1.cl;
+
+#if defined(PAR)
+ /* Save the Thread State here, before calling RTS routines below! */
+ SAVE_THREAD_STATE(1);
+
+ /* if collecting stats update the execution time etc */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ //rtsTime now = CURRENT_TIME; /* Now */
+ CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat;
+ CurrentTSO->par.blockcount++;
+ CurrentTSO->par.blockedat = CURRENT_TIME;
+ DumpRawGranEvent(CURRENT_PROC, thisPE,
+ GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0);
+ }
+
+ THREAD_RETURN(1); /* back to the scheduler */
+#else
+ /* saves thread state and leaves thread in ThreadEnterGHC state; */
+ /* stg_gen_block is too heavyweight, use a specialised one */
+ BLOCK_NP(1);
+#endif
+
FE_
}
+INFO_TABLE(RBH_Save_0_info, RBH_Save_0_entry,0,2,CONSTR,,EF_,0,0);
+NON_ENTERABLE_ENTRY_CODE(RBH_Save_0);
+
+INFO_TABLE(RBH_Save_1_info, RBH_Save_1_entry,1,1,CONSTR,,EF_,0,0);
+NON_ENTERABLE_ENTRY_CODE(RBH_Save_1);
+
+INFO_TABLE(RBH_Save_2_info, RBH_Save_2_entry,2,0,CONSTR,,EF_,0,0);
+NON_ENTERABLE_ENTRY_CODE(RBH_Save_2);
+#endif /* defined(PAR) || defined(GRAN) */
+
/* identical to BLACKHOLEs except for the infotag */
INFO_TABLE(CAF_BLACKHOLE_info, CAF_BLACKHOLE_entry,0,2,CAF_BLACKHOLE,,EF_,0,0);
STGFUN(CAF_BLACKHOLE_entry)
{
FB_
+#if defined(GRAN)
+ /* mainly statistics gathering for GranSim simulation */
+ STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/);
+#endif
+
#ifdef SMP
CMPXCHG(R1.cl->header.info, &CAF_BLACKHOLE_info, &WHITEHOLE_info);
+#endif
TICK_ENT_BH();
/* Put ourselves on the blocking queue for this black hole */
- CurrentTSO->link = (StgTSO *)&END_TSO_QUEUE_closure;
+#if defined(GRAN) || defined(PAR)
+ /* in fact, only difference is the type of the end-of-queue marker! */
+ CurrentTSO->link = END_BQ_QUEUE;
+ ((StgBlockingQueue *)R1.p)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO;
+#else
+ CurrentTSO->link = END_TSO_QUEUE;
((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO;
+#endif
+ /* jot down why and on what closure we are blocked */
CurrentTSO->why_blocked = BlockedOnBlackHole;
CurrentTSO->block_info.closure = R1.cl;
+ /* closure is mutable since something has just been added to its BQ */
recordMutable((StgMutClosure *)R1.cl);
/* Change the CAF_BLACKHOLE into a BLACKHOLE_BQ */
((StgBlockingQueue *)R1.p)->header.info = &BLACKHOLE_BQ_info;
- /* stg_gen_block is too heavyweight, use a specialised one */
- BLOCK_NP(1);
+#if defined(PAR)
+ /* Save the Thread State here, before calling RTS routines below! */
+ SAVE_THREAD_STATE(1);
+
+ /* if collecting stats update the execution time etc */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ //rtsTime now = CURRENT_TIME; /* Now */
+ CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat;
+ CurrentTSO->par.blockcount++;
+ CurrentTSO->par.blockedat = CURRENT_TIME;
+ DumpRawGranEvent(CURRENT_PROC, thisPE,
+ GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0);
+ }
+
+ THREAD_RETURN(1); /* back to the scheduler */
#else
- JMP_(BLACKHOLE_entry);
+ /* stg_gen_block is too heavyweight, use a specialised one */
+ BLOCK_NP(1);
#endif
FE_
/* -----------------------------------------------------------------------------
Some static info tables for things that don't get entered, and
therefore don't need entry code (i.e. boxed but unpointed objects)
+ NON_ENTERABLE_ENTRY_CODE now defined at the beginning of the file
-------------------------------------------------------------------------- */
-#define NON_ENTERABLE_ENTRY_CODE(type) \
-STGFUN(type##_entry) \
-{ \
- FB_ \
- DUMP_ERRMSG(#type " object entered!\n"); \
- STGCALL1(shutdownHaskellAndExit, EXIT_FAILURE); \
- FE_ \
-}
-
INFO_TABLE(TSO_info, TSO_entry, 0,0,TSO,,EF_,0,0);
NON_ENTERABLE_ENTRY_CODE(TSO);
/* -----------------------------------------------------------------------------
- * $Id: Storage.h,v 1.13 1999/11/11 11:49:26 simonmar Exp $
+ * $Id: Storage.h,v 1.14 2000/01/13 14:34:05 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
extern StgCAF* enteredCAFs;
+#if defined(DEBUG)
+void printMutOnceList(generation *gen);
+void printMutableList(generation *gen);
+#endif DEBUG
+
#endif STORAGE_H
/* -----------------------------------------------------------------------------
- * $Id: Updates.hc,v 1.24 1999/12/01 14:34:39 simonmar Exp $
+ * $Id: Updates.hc,v 1.25 2000/01/13 14:34:05 hwloidl Exp $
*
* (c) The GHC Team, 1998-1999
*
#include "HeapStackCheck.h"
#include "Storage.h"
#include "ProfRts.h"
+#if defined(GRAN) || defined(PAR)
+# include "FetchMe.h"
+#endif
/*
The update frame return address must be *polymorphic*, that means
*/
Fun = R1.cl;
-#if defined(GRAN_COUNT)
-#error Fixme.
- ++nPAPs;
-#endif
-
/* Just copy the whole block of stack between the stack pointer
* and the update frame pointer.
*/
--- /dev/null
+/*-----------------------------------------------------------------------------
+ * $Id: 0Hash.c,v 1.2 2000/01/13 14:34:06 hwloidl Exp $
+ *
+ * (c) The AQUA Project, Glasgow University, 1995-1998
+ * (c) The GHC Team, 1999
+ *
+ * Dynamically expanding linear hash tables, as described in
+ * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
+ * pp. 446 -- 457.
+ * -------------------------------------------------------------------------- */
+
+/*
+ Replaced with ghc/rts/Hash.c in the new RTS
+*/
+
+#if 0
+
+#include "Rts.h"
+#include "Hash.h"
+#include "RtsUtils.h"
+
+#define HSEGSIZE 1024 /* Size of a single hash table segment */
+ /* Also the minimum size of a hash table */
+#define HDIRSIZE 1024 /* Size of the segment directory */
+ /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
+#define HLOAD 5 /* Maximum average load of a single hash bucket */
+
+#define HCHUNK (1024 * sizeof(W_) / sizeof(HashList))
+ /* Number of HashList cells to allocate in one go */
+
+
+/* Linked list of (key, data) pairs for separate chaining */
+struct hashlist {
+ StgWord key;
+ void *data;
+ struct hashlist *next; /* Next cell in bucket chain (same hash value) */
+};
+
+typedef struct hashlist HashList;
+
+struct hashtable {
+ int split; /* Next bucket to split when expanding */
+ int max; /* Max bucket of smaller table */
+ int mask1; /* Mask for doing the mod of h_1 (smaller table) */
+ int mask2; /* Mask for doing the mod of h_2 (larger table) */
+ int kcount; /* Number of keys */
+ int bcount; /* Number of buckets */
+ HashList **dir[HDIRSIZE]; /* Directory of segments */
+};
+
+/* -----------------------------------------------------------------------------
+ * Hash first using the smaller table. If the bucket is less than the
+ * next bucket to be split, re-hash using the larger table.
+ * -------------------------------------------------------------------------- */
+
+static int
+hash(HashTable *table, W_ key)
+{
+ int bucket;
+
+ /* Strip the boring zero bits */
+ key /= sizeof(StgWord);
+
+ /* Mod the size of the hash table (a power of 2) */
+ bucket = key & table->mask1;
+
+ if (bucket < table->split) {
+ /* Mod the size of the expanded hash table (also a power of 2) */
+ bucket = key & table->mask2;
+ }
+ return bucket;
+}
+
+/* -----------------------------------------------------------------------------
+ * Allocate a new segment of the dynamically growing hash table.
+ * -------------------------------------------------------------------------- */
+
+static void
+allocSegment(HashTable *table, int segment)
+{
+ table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
+ "allocSegment");
+}
+
+
+/* -----------------------------------------------------------------------------
+ * Expand the larger hash table by one bucket, and split one bucket
+ * from the smaller table into two parts. Only the bucket referenced
+ * by @table->split@ is affected by the expansion.
+ * -------------------------------------------------------------------------- */
+
+static void
+expand(HashTable *table)
+{
+ int oldsegment;
+ int oldindex;
+ int newbucket;
+ int newsegment;
+ int newindex;
+ HashList *hl;
+ HashList *next;
+ HashList *old, *new;
+
+ if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
+ /* Wow! That's big. Too big, so don't expand. */
+ return;
+
+ /* Calculate indices of bucket to split */
+ oldsegment = table->split / HSEGSIZE;
+ oldindex = table->split % HSEGSIZE;
+
+ newbucket = table->max + table->split;
+
+ /* And the indices of the new bucket */
+ newsegment = newbucket / HSEGSIZE;
+ newindex = newbucket % HSEGSIZE;
+
+ if (newindex == 0)
+ allocSegment(table, newsegment);
+
+ if (++table->split == table->max) {
+ table->split = 0;
+ table->max *= 2;
+ table->mask1 = table->mask2;
+ table->mask2 = table->mask2 << 1 | 1;
+ }
+ table->bcount++;
+
+ /* Split the bucket, paying no attention to the original order */
+
+ old = new = NULL;
+ for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
+ next = hl->next;
+ if (hash(table, hl->key) == newbucket) {
+ hl->next = new;
+ new = hl;
+ } else {
+ hl->next = old;
+ old = hl;
+ }
+ }
+ table->dir[oldsegment][oldindex] = old;
+ table->dir[newsegment][newindex] = new;
+
+ return;
+}
+
+void *
+lookupHashTable(HashTable *table, StgWord key)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
+ if (hl->key == key)
+ return hl->data;
+
+ /* It's not there */
+ return NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * We allocate the hashlist cells in large chunks to cut down on malloc
+ * overhead. Although we keep a free list of hashlist cells, we make
+ * no effort to actually return the space to the malloc arena.
+ * -------------------------------------------------------------------------- */
+
+static HashList *freeList = NULL;
+
+static HashList *
+allocHashList(void)
+{
+ HashList *hl, *p;
+
+ if ((hl = freeList) != NULL) {
+ freeList = hl->next;
+ } else {
+ hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
+
+ freeList = hl + 1;
+ for (p = freeList; p < hl + HCHUNK - 1; p++)
+ p->next = p + 1;
+ p->next = NULL;
+ }
+ return hl;
+}
+
+static void
+freeHashList(HashList *hl)
+{
+ hl->next = freeList;
+ freeList = hl;
+}
+
+void
+insertHashTable(HashTable *table, StgWord key, void *data)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+
+ /* We want no duplicates */
+ ASSERT(lookupHashTable(table, key) == NULL);
+
+ /* When the average load gets too high, we expand the table */
+ if (++table->kcount >= HLOAD * table->bcount)
+ expand(table);
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ hl = allocHashList();
+
+ hl->key = key;
+ hl->data = data;
+ hl->next = table->dir[segment][index];
+ table->dir[segment][index] = hl;
+
+}
+
+void *
+removeHashTable(HashTable *table, StgWord key, void *data)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+ HashList *prev = NULL;
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
+ if (hl->key == key && (data == NULL || hl->data == data)) {
+ if (prev == NULL)
+ table->dir[segment][index] = hl->next;
+ else
+ prev->next = hl->next;
+ table->kcount--;
+ return hl->data;
+ }
+ prev = hl;
+ }
+
+ /* It's not there */
+ ASSERT(data == NULL);
+ return NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * When we free a hash table, we are also good enough to free the
+ * data part of each (key, data) pair, as long as our caller can tell
+ * us how to do it.
+ * -------------------------------------------------------------------------- */
+
+void
+freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
+{
+ long segment;
+ long index;
+ HashList *hl;
+ HashList *next;
+
+ /* The last bucket with something in it is table->max + table->split - 1 */
+ segment = (table->max + table->split - 1) / HSEGSIZE;
+ index = (table->max + table->split - 1) % HSEGSIZE;
+
+ while (segment >= 0) {
+ while (index >= 0) {
+ for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
+ next = hl->next;
+ if (freeDataFun != NULL)
+ (*freeDataFun)(hl->data);
+ freeHashList(hl);
+ }
+ index--;
+ }
+ free(table->dir[segment]);
+ segment--;
+ index = HSEGSIZE - 1;
+ }
+ free(table);
+}
+
+/* -----------------------------------------------------------------------------
+ * When we initialize a hash table, we set up the first segment as well,
+ * initializing all of the first segment's hash buckets to NULL.
+ * -------------------------------------------------------------------------- */
+
+HashTable *
+allocHashTable(void)
+{
+ HashTable *table;
+ HashList **hb;
+
+ table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
+
+ allocSegment(table, 0);
+
+ for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
+ *hb = NULL;
+
+ table->split = 0;
+ table->max = HSEGSIZE;
+ table->mask1 = HSEGSIZE - 1;
+ table->mask2 = 2 * HSEGSIZE - 1;
+ table->kcount = 0;
+ table->bcount = HSEGSIZE;
+
+ return table;
+}
+#endif
--- /dev/null
+/*
+ Time-stamp: <Mon Oct 04 1999 14:50:28 Stardate: [-30]3692.88 hwloidl>
+
+ Definitions for parallel machines.
+
+This section contains definitions applicable only to programs compiled
+to run on a parallel machine, i.e. on GUM. Some of these definitions
+are also used when simulating parallel execution, i.e. on GranSim.
+ */
+
+/*
+ ToDo: Check the PAR specfic part of this file
+ Move stuff into Closures.h and ClosureMacros.h
+ Clean-up GRAN specific code
+ -- HWL
+ */
+
+#ifndef PARALLEL_H
+#define PARALLEL_H
+
+#if defined(PAR) || defined(GRAN) /* whole file */
+
+#include "Rts.h"
+#include "GranSim.h"
+//#include "ClosureTypes.h"
+
+//@menu
+//* Basic definitions::
+//* Externs and types::
+//* Dummy defs::
+//* Par specific fixed headers::
+//* Parallel only heap objects::
+//* Packing definitions::
+//* End of File::
+//@end menu
+//*/
+
+//@node Basic definitions, Externs and types
+//@section Basic definitions
+
+/* SET_PAR_HDR and SET_STATIC_PAR_HDR now live in ClosureMacros.h */
+
+/* Needed for dumping routines */
+#if defined(PAR)
+# define TIME ullong
+# define CURRENT_TIME msTime()
+# define TIME_ON_PROC(p) msTime()
+# define CURRENT_PROC thisPE
+# define BINARY_STATS RtsFlags.ParFlags.granSimStats_Binary
+#elif defined(GRAN)
+# define TIME rtsTime
+# define CURRENT_TIME CurrentTime[CurrentProc]
+# define TIME_ON_PROC(p) CurrentTime[p]
+# define CURRENT_PROC CurrentProc
+# define BINARY_STATS RtsFlags.GranFlags.granSimStats_Binary
+#endif
+
+#if defined(PAR)
+# define MAX_PES 256 /* Maximum number of processors */
+ /* MAX_PES is enforced by SysMan, which does not
+ allow more than this many "processors".
+ This is important because PackGA [GlobAddr.lc]
+ **assumes** that a PE# can fit in 8+ bits.
+ */
+#endif
+
+//@node Externs and types, Dummy defs, Basic definitions
+//@section Externs and types
+
+#if defined(PAR)
+/* GUM: one spark queue on each PE, and each PE sees only its own spark queue */
+extern rtsSparkQ pending_sparks_hd;
+extern rtsSparkQ pending_sparks_tl;
+#elif defined(GRAN)
+/* GranSim: a globally visible array of spark queues */
+extern rtsSparkQ pending_sparks_hds[];
+extern rtsSparkQ pending_sparks_tls[];
+#endif
+extern unsigned int /* nat */ spark_queue_len(PEs proc);
+
+extern StgInt SparksAvail; /* How many sparks are available */
+
+/* prototypes of spark routines */
+/* ToDo: check whether all have to be visible -- HWL */
+#if defined(GRAN)
+rtsSpark *newSpark(StgClosure *node, StgInt name, StgInt gran_info, StgInt size_info, StgInt par_info, StgInt local);
+void disposeSpark(rtsSpark *spark);
+void disposeSparkQ(rtsSparkQ spark);
+void add_to_spark_queue(rtsSpark *spark);
+void delete_from_spark_queue (rtsSpark *spark);
+#endif
+
+#define STATS_FILENAME_MAXLEN 128
+
+/* Where to write the log file */
+//extern FILE *gr_file;
+extern char gr_filename[STATS_FILENAME_MAXLEN];
+
+#if defined(GRAN)
+int init_gr_simulation(char *rts_argv[], int rts_argc, char *prog_argv[], int prog_argc);
+void end_gr_simulation(void);
+#endif
+
+#if defined(PAR)
+extern I_ do_sp_profile;
+
+extern P_ PendingFetches;
+extern GLOBAL_TASK_ID *PEs;
+
+extern rtsBool IAmMainThread, GlobalStopPending;
+extern rtsBool fishing;
+extern GLOBAL_TASK_ID SysManTask;
+extern int seed; /*pseudo-random-number generator seed:*/
+ /*Initialised in ParInit*/
+extern I_ threadId; /*Number of Threads that have existed on a PE*/
+extern GLOBAL_TASK_ID mytid;
+
+extern int nPEs;
+
+extern rtsBool InGlobalGC; /* Are we in the midst of performing global GC */
+
+extern HashTable *pGAtoGALAtable;
+extern HashTable *LAtoGALAtable;
+extern GALA *freeIndirections;
+extern GALA *liveIndirections;
+extern GALA *freeGALAList;
+extern GALA *liveRemoteGAs;
+extern int thisPE;
+
+void RunParallelSystem (StgPtr program_closure);
+void initParallelSystem();
+void SynchroniseSystem();
+
+void registerTask (GLOBAL_TASK_ID gtid);
+globalAddr *LAGAlookup (P_ addr);
+P_ GALAlookup (globalAddr *ga);
+globalAddr *MakeGlobal (P_ addr, rtsBool preferred);
+globalAddr *setRemoteGA (P_ addr, globalAddr *ga, rtsBool preferred);
+void splitWeight (globalAddr *to, globalAddr *from);
+globalAddr *addWeight (globalAddr *ga);
+void initGAtables();
+W_ taskIDtoPE (GLOBAL_TASK_ID gtid);
+void RebuildLAGAtable();
+
+void *lookupHashTable (HashTable *table, StgWord key);
+void insertHashTable (HashTable *table, StgWord key, void *data);
+void freeHashTable (HashTable *table, void (*freeDataFun) ((void *data)));
+HashTable *allocHashTable();
+void *removeHashTable (HashTable *table, StgWord key, void *data);
+#endif /* PAR */
+
+/* Interface for dumping routines (i.e. writing to log file) */
+void DumpGranEvent(GranEventType name, StgTSO *tso);
+void DumpRawGranEvent(PEs proc, PEs p, GranEventType name,
+ StgTSO *tso, StgClosure *node, StgInt sparkname, StgInt len);
+//void DumpEndEvent(PEs proc, StgTSO *tso, rtsBool mandatory_thread);
+
+//@node Dummy defs, Par specific fixed headers, Externs and types
+//@section Dummy defs
+
+/*
+Get this out of the way. These are all null definitions.
+*/
+
+
+//# define GA_HDR_SIZE 0
+//# define GA(closure) /*nothing */
+
+//# define SET_GA(closure,ga) /* nothing */
+//# define SET_STATIC_GA(closure) /* nothing */
+//# define SET_GRAN_HDR(closure,pe) /* nothing */
+//# define SET_STATIC_PROCS(closure) /* nothing */
+
+//# define SET_TASK_ACTIVITY(act) /* nothing */
+
+#if defined(GRAN)
+
+# define GA_HDR_SIZE 1
+
+# define PROCS_HDR_POSN PAR_HDR_POSN
+# define PROCS_HDR_SIZE 1
+
+/* Accessing components of the field */
+# define PROCS(closure) ((closure)->header.gran.procs)
+/* SET_PROCS is now SET_GRAN_HEADER in ClosureMacros.h. */
+#endif
+
+
+//@node Par specific fixed headers, Parallel only heap objects, Dummy defs
+//@section Par specific fixed headers
+
+/*
+Definitions relating to the entire parallel-only fixed-header field.
+
+On GUM, the global addresses for each local closure are stored in a separate
+hash table, rather then with the closure in the heap. We call @getGA@ to
+look up the global address associated with a local closure (0 is returned
+for local closures that have no global address), and @setGA@ to store a new
+global address for a local closure which did not previously have one.
+*/
+
+#if defined(PAR)
+
+# define GA_HDR_SIZE 0
+
+# define GA(closure) getGA(closure)
+
+# define SET_GA(closure, ga) setGA(closure,ga)
+# define SET_STATIC_GA(closure)
+# define SET_GRAN_HDR(closure,pe)
+# define SET_STATIC_PROCS(closure)
+
+# define MAX_GA_WEIGHT 0 /* Treat as 2^n */
+
+W_ PackGA ((W_, int));
+ /* There was a PACK_GA macro here; but we turned it into the PackGA
+ routine [GlobAddr.lc] (because it needs to do quite a bit of
+ paranoia checking. Phil & Will (95/08)
+ */
+
+/* At the moment, there is no activity profiling for GUM. This may change. */
+# define SET_TASK_ACTIVITY(act) /* nothing */
+#endif
+
+//@node Parallel only heap objects, Packing definitions, Par specific fixed headers
+//@section Parallel only heap objects
+
+// NB: The following definitons are BOTH for GUM and GrAnSim -- HWL
+
+/* All in Closures.h and CLosureMacros.h */
+
+//@node Packing definitions, End of File, Parallel only heap objects
+//@section Packing definitions
+
+//@menu
+//* GUM::
+//* GranSim::
+//@end menu
+//*/
+
+//@node GUM, GranSim, Packing definitions, Packing definitions
+//@subsection GUM
+
+#if defined(PAR)
+/*
+Symbolic constants for the packing code.
+
+This constant defines how many words of data we can pack into a single
+packet in the parallel (GUM) system.
+*/
+
+//@menu
+//* Externs::
+//* Prototypes::
+//* Macros::
+//@end menu
+//*/
+
+//@node Externs, Prototypes, GUM, GUM
+//@subsubsection Externs
+
+extern W_ *PackBuffer; /* size: can be set via option */
+extern long *buffer; /* HWL_ */
+extern W_ *freeBuffer; /* HWL_ */
+extern W_ *packBuffer; /* HWL_ */
+
+extern void InitPackBuffer(STG_NO_ARGS);
+extern void InitMoreBuffers(STG_NO_ARGS);
+extern void InitPendingGABuffer(W_ size);
+extern void AllocClosureQueue(W_ size);
+
+//@node Prototypes, Macros, Externs, GUM
+//@subsubsection Prototypes
+
+void InitPackBuffer();
+P_ PackTSO (P_ tso, W_ *size);
+P_ PackStkO (P_ stko, W_ *size);
+P_ AllocateHeap (W_ size); /* Doesn't belong */
+
+void InitClosureQueue ();
+P_ DeQueueClosure();
+void QueueClosure (P_ closure);
+rtsBool QueueEmpty();
+void PrintPacket (P_ buffer);
+
+P_ get_closure_info (P_ closure, W_ *size, W_ *ptrs, W_ *nonptrs, W_ *vhs, char *type);
+
+rtsBool isOffset (globalAddr *ga),
+ isFixed (globalAddr *ga);
+
+void doGlobalGC();
+
+P_ PackNearbyGraph (P_ closure,W_ *size);
+P_ UnpackGraph (W_ *buffer, globalAddr **gamap, W_ *nGAs);
+
+
+//@node Macros, , Prototypes, GUM
+//@subsubsection Macros
+
+# define PACK_HEAP_REQUIRED \
+ ((RtsFlags.ParFlags.packBufferSize - PACK_HDR_SIZE) / (PACK_GA_SIZE + _FHS) * (SPEC_HS + 2))
+
+# define MAX_GAS (RtsFlags.ParFlags.packBufferSize / PACK_GA_SIZE)
+
+
+# define PACK_GA_SIZE 3 /* Size of a packed GA in words */
+ /* Size of a packed fetch-me in words */
+# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS)
+
+# define PACK_HDR_SIZE 1 /* Words of header in a packet */
+
+# define PACK_PLC_SIZE 2 /* Size of a packed PLC in words */
+
+#endif /* PAR */
+
+//@node GranSim, , GUM, Packing definitions
+//@subsection GranSim
+
+#if defined(GRAN)
+/* ToDo: Check which of the PAR routines are needed in GranSim -- HWL */
+
+//@menu
+//* Types::
+//* Prototypes::
+//* Macros::
+//@end menu
+//*/
+
+//@node Types, Prototypes, GranSim, GranSim
+//@subsubsection Types
+
+typedef struct rtsPackBuffer_ {
+ StgInt /* nat */ size;
+ StgInt /* nat */ unpacked_size;
+ StgTSO *tso;
+ StgClosure **buffer;
+} rtsPackBuffer;
+
+//@node Prototypes, Macros, Types, GranSim
+//@subsubsection Prototypes
+
+
+/* main packing functions */
+/*
+rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso, nat *packbuffersize);
+rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso, nat *packbuffersize);
+void PrintPacket(rtsPackBuffer *buffer);
+StgClosure *UnpackGraph(rtsPackBuffer* buffer);
+*/
+/* important auxiliary functions */
+
+//StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty);
+int IS_BLACK_HOLE(StgClosure* node);
+StgClosure *IS_INDIRECTION(StgClosure* node);
+int IS_THUNK(StgClosure* closure);
+char *display_info_type(StgClosure* closure, char *str);
+
+/*
+OLD CODE -- HWL
+void InitPackBuffer(void);
+P_ AllocateHeap (W_ size);
+P_ PackNearbyGraph (P_ closure, P_ tso, W_ *packbuffersize);
+P_ PackOneNode (P_ closure, P_ tso, W_ *packbuffersize);
+P_ UnpackGraph (P_ buffer);
+
+void InitClosureQueue (void);
+P_ DeQueueClosure(void);
+void QueueClosure (P_ closure);
+// rtsBool QueueEmpty();
+void PrintPacket (P_ buffer);
+*/
+
+// StgInfoTable *get_closure_info(StgClosure* node, unsigned int /* nat */ *size, unsigned int /* nat */ *ptrs, unsigned int /* nat */ *nonptrs, unsigned int /* nat */ *vhs, char *info_hdr_ty);
+// int /* rtsBool */ IS_BLACK_HOLE(StgClosure* node) ;
+
+//@node Macros, , Prototypes, GranSim
+//@subsubsection Macros
+
+/* These are needed in the packing code to get the size of the packet
+ right. The closures itself are never built in GrAnSim. */
+# define FETCHME_VHS IND_VHS
+# define FETCHME_HS IND_HS
+
+# define FETCHME_GA_LOCN FETCHME_HS
+
+# define FETCHME_CLOSURE_SIZE(closure) IND_CLOSURE_SIZE(closure)
+# define FETCHME_CLOSURE_NoPTRS(closure) 0L
+# define FETCHME_CLOSURE_NoNONPTRS(closure) (IND_CLOSURE_SIZE(closure)-IND_VHS)
+
+# define MAX_GAS (RtsFlags.GranFlags.packBufferSize / PACK_GA_SIZE)
+# define PACK_GA_SIZE 3 /* Size of a packed GA in words */
+ /* Size of a packed fetch-me in words */
+# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS)
+# define PACK_HDR_SIZE 4 /* Words of header in a packet */
+
+# define PACK_HEAP_REQUIRED \
+ (RtsFlags.GranFlags.packBufferSize * sizeofW(StgClosure*) + \
+ 2 * sizeofW(StgInt) + sizeofW(StgTSO*))
+
+# define PACK_FLAG_LOCN 0
+# define PACK_TSO_LOCN 1
+# define PACK_UNPACKED_SIZE_LOCN 2
+# define PACK_SIZE_LOCN 3
+# define MAGIC_PACK_FLAG 0xfabc
+
+#endif /* GRAN */
+
+//@node End of File, , Packing definitions
+//@section End of File
+
+#endif /* defined(PAR) || defined(GRAN) whole file */
+#endif /* Parallel_H */
+
+
--- /dev/null
+/*
+ Time-stamp: <Wed Jan 12 2000 13:29:08 Stardate: [-30]4193.85 hwloidl>
+
+ Unpacking closures which have been exported to remote processors
+
+ This module defines routines for unpacking closures in the parallel
+ runtime system (GUM).
+
+ In the case of GrAnSim, this module defines routines for *simulating* the
+ unpacking of closures as it is done in the parallel runtime system.
+*/
+
+/*
+ Code in this file has been merged with Pack.c
+*/
+
+#if 0
+
+//@node Unpacking closures, , ,
+//@section Unpacking closures
+
+//@menu
+//* Includes::
+//* Prototypes::
+//* GUM code::
+//* GranSim Code::
+//* Index::
+//@end menu
+//*/
+
+//@node Includes, Prototypes, Unpacking closures, Unpacking closures
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+#include "ParallelDebug.h"
+#include "FetchMe.h"
+#include "Storage.h"
+
+//@node Prototypes, GUM code, Includes, Unpacking closures
+//@subsection Prototypes
+
+void InitPacking(void);
+# if defined(PAR)
+void InitPackBuffer(void);
+# endif
+/* Interface for ADT of closure queues */
+void AllocClosureQueue(nat size);
+void InitClosureQueue(void);
+rtsBool QueueEmpty(void);
+void QueueClosure(StgClosure *closure);
+StgClosure *DeQueueClosure(void);
+
+StgPtr AllocateHeap(nat size);
+
+//@node GUM code, GranSim Code, Prototypes, Unpacking closures
+//@subsection GUM code
+
+#if defined(PAR)
+
+//@node Local Definitions, , GUM code, GUM code
+//@subsubsection Local Definitions
+
+//@cindex PendingGABuffer
+static globalAddr *PendingGABuffer;
+/* is initialised in main; */
+
+//@cindex InitPendingGABuffer
+void
+InitPendingGABuffer(size)
+nat size;
+{
+ PendingGABuffer = (globalAddr *)
+ stgMallocBytes((size-PACK_HDR_SIZE)*2*sizeof(globalAddr),
+ "InitPendingGABuffer");
+}
+
+/*
+ @CommonUp@ commons up two closures which we have discovered to be
+ variants of the same object. One is made an indirection to the other. */
+
+//@cindex CommonUp
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ StgBlockingQueueElement *bqe;
+
+ ASSERT(src != dst);
+ switch (get_itbl(src)->type) {
+ case BLACKHOLE_BQ:
+ bqe = ((StgBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case FETCH_ME_BQ:
+ bqe = ((StgFetchMeBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case RBH:
+ bqe = ((StgRBH *)src)->blocking_queue;
+ break;
+
+ case BLACKHOLE:
+ case FETCH_ME:
+ bqe = END_BQ_QUEUE;
+ break;
+
+ default:
+ /* Don't common up anything else */
+ return;
+ }
+ /* We do not use UPD_IND because that would awaken the bq, too */
+ // UPD_IND(src, dst);
+ updateWithIndirection(get_itbl(src), src, dst);
+ //ASSERT(!IS_BIG_MOTHER(INFO_PTR(dst)));
+ if (bqe != END_BQ_QUEUE)
+ awaken_blocked_queue(bqe, src);
+}
+
+/*
+ @UnpackGraph@ unpacks the graph contained in a message buffer. It
+ returns a pointer to the new graph. The @gamap@ parameter is set to
+ point to an array of (oldGA,newGA) pairs which were created as a result
+ of unpacking the buffer; @nGAs@ is set to the number of GA pairs which
+ were created.
+
+ The format of graph in the pack buffer is as defined in @Pack.lc@. */
+
+//@cindex UnpackGraph
+StgClosure *
+UnpackGraph(packBuffer, gamap, nGAs)
+rtsPackBuffer *packBuffer;
+globalAddr **gamap;
+nat *nGAs;
+{
+ nat size, ptrs, nonptrs, vhs;
+ StgWord **buffer, **bufptr, **slotptr;
+ globalAddr ga, *gaga;
+ StgClosure *closure, *existing,
+ *graphroot, *graph, *parent;
+ StgInfoTable *ip, *oldip;
+ nat bufsize, i,
+ pptr = 0, pptrs = 0, pvhs;
+ char str[80];
+
+ InitPackBuffer(); /* in case it isn't already init'd */
+ graphroot = (StgClosure *)NULL;
+
+ gaga = PendingGABuffer;
+
+ InitClosureQueue();
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ buffer = packBuffer->buffer;
+ bufptr = buffer;
+
+ /* allocate heap */
+ if (bufsize > 0) {
+ graph = allocate(bufsize);
+ ASSERT(graph != NULL);
+ }
+
+ parent = (StgClosure *)NULL;
+
+ do {
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+
+ /* First, unpack the next GA or PLC */
+ ga.weight = (rtsWeight) *bufptr++;
+
+ if (ga.weight > 0) {
+ ga.payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga.payload.gc.slot = (int) *bufptr++;
+ } else
+ ga.payload.plc = (StgPtr) *bufptr++;
+
+ /* Now unpack the closure body, if there is one */
+ if (isFixed(&ga)) {
+ /* No more to unpack; just set closure to local address */
+ IF_PAR_DEBUG(pack,
+ belch("Unpacked PLC at %x", ga.payload.plc));
+ closure = ga.payload.plc;
+ } else if (isOffset(&ga)) {
+ /* No more to unpack; just set closure to cached address */
+ ASSERT(parent != (StgClosure *)NULL);
+ closure = (StgClosure *) buffer[ga.payload.gc.slot];
+ } else {
+ /* Now we have to build something. */
+
+ ASSERT(bufsize > 0);
+
+ /*
+ * Close your eyes. You don't want to see where we're looking. You
+ * can't get closure info until you've unpacked the variable header,
+ * but you don't know how big it is until you've got closure info.
+ * So...we trust that the closure in the buffer is organized the
+ * same way as they will be in the heap...at least up through the
+ * end of the variable header.
+ */
+ ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str);
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* Fill in the fixed header */
+ for (i = 0; i < FIXED_HS; i++)
+ ((StgPtr)graph)[i] = *bufptr++;
+
+ if (ip->type == FETCH_ME)
+ size = ptrs = nonptrs = vhs = 0;
+
+ /* Fill in the packed variable header */
+ for (i = 0; i < vhs; i++)
+ ((StgPtr)graph)[FIXED_HS + i] = *bufptr++;
+
+ /* Pointers will be filled in later */
+
+ /* Fill in the packed non-pointers */
+ for (i = 0; i < nonptrs; i++)
+ ((StgPtr)graph)[FIXED_HS + i + vhs + ptrs] = *bufptr++;
+
+ /* Indirections are never packed */
+ // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE);
+
+ /* Add to queue for processing */
+ QueueClosure(graph);
+
+ /*
+ * Common up the new closure with any existing closure having the same
+ * GA
+ */
+
+ if ((existing = GALAlookup(&ga)) == NULL) {
+ globalAddr *newGA;
+ /* Just keep the new object */
+ IF_PAR_DEBUG(pack,
+ belch("Unpacking new (%x, %d, %x)\n",
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight));
+
+ closure = graph;
+ newGA = setRemoteGA(graph, &ga, rtsTrue);
+ if (ip->type == FETCH_ME)
+ // FETCHME_GA(closure) = newGA;
+ ((StgFetchMe *)closure)->ga = newGA;
+ } else {
+ /* Two closures, one global name. Someone loses */
+ oldip = get_itbl(existing);
+
+ if ((oldip->type == FETCH_ME || IS_BLACK_HOLE(existing)) &&
+ ip->type != FETCH_ME) {
+
+ /* What we had wasn't worth keeping */
+ closure = graph;
+ CommonUp(existing, graph);
+ } else {
+
+ /*
+ * Either we already had something worthwhile by this name or
+ * the new thing is just another FetchMe. However, the thing we
+ * just unpacked has to be left as-is, or the child unpacking
+ * code will fail. Remember that the way pointer words are
+ * filled in depends on the info pointers of the parents being
+ * the same as when they were packed.
+ */
+ IF_PAR_DEBUG(pack,
+ belch("Unpacking old (%x, %d, %x), keeping %#lx",
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight,
+ existing));
+
+ closure = existing;
+ }
+ /* Pool the total weight in the stored ga */
+ (void) addWeight(&ga);
+ }
+
+ /* Sort out the global address mapping */
+ if ((ip_THUNK(ip) && !ip_UNPOINTED(ip)) ||
+ (ip_MUTABLE(ip) && ip->type != FETCH_ME)) {
+ /* Make up new GAs for single-copy closures */
+ globalAddr *newGA = makeGlobal(closure, rtsTrue);
+
+ ASSERT(closure == graph);
+
+ /* Create an old GA to new GA mapping */
+ *gaga++ = ga;
+ splitWeight(gaga, newGA);
+ ASSERT(gaga->weight == 1L << (BITS_IN(unsigned) - 1));
+ gaga++;
+ }
+ graph += FIXED_HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size);
+ }
+
+ /*
+ * Set parent pointer to point to chosen closure. If we're at the top of
+ * the graph (our parent is NULL), then we want to arrange to return the
+ * chosen closure to our caller (possibly in place of the allocated graph
+ * root.)
+ */
+ if (parent == NULL)
+ graphroot = closure;
+ else
+ ((StgPtr)parent)[FIXED_HS + pvhs + pptr] = (StgWord) closure;
+
+ /* Save closure pointer for resolving offsets */
+ *slotptr = (StgWord) closure;
+
+ /* Locate next parent pointer */
+ pptr++;
+ while (pptr + 1 > pptrs) {
+ parent = DeQueueClosure();
+
+ if (parent == NULL)
+ break;
+ else {
+ (void) get_closure_info(parent, &size, &pptrs, &nonptrs,
+ &pvhs, str);
+ pptr = 0;
+ }
+ }
+ } while (parent != NULL);
+
+ ASSERT(bufsize == 0 || graph - 1 <= SAVE_Hp);
+
+ *gamap = PendingGABuffer;
+ *nGAs = (gaga - PendingGABuffer) / 2;
+
+ /* ToDo: are we *certain* graphroot has been set??? WDP 95/07 */
+ ASSERT(graphroot!=NULL);
+ return (graphroot);
+}
+#endif /* PAR */
+
+//@node GranSim Code, Index, GUM code, Unpacking closures
+//@subsection GranSim Code
+
+/*
+ For GrAnSim: In general no actual unpacking should be necessary. We just
+ have to walk over the graph and set the bitmasks appropriately. -- HWL */
+
+//@node Unpacking, , GranSim Code, GranSim Code
+//@subsubsection Unpacking
+
+#if defined(GRAN)
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ barf("CommonUp: should never be entered in a GranSim setup");
+}
+
+/* This code fakes the unpacking of a somewhat virtual buffer */
+StgClosure*
+UnpackGraph(buffer)
+rtsPackBuffer* buffer;
+{
+ nat size, ptrs, nonptrs, vhs,
+ bufptr = 0;
+ StgClosure *closure, *graphroot, *graph;
+ StgInfoTable *ip;
+ StgWord bufsize, unpackedsize,
+ pptr = 0, pptrs = 0, pvhs;
+ StgTSO* tso;
+ char str[240], str1[80];
+ int i;
+
+ bufptr = 0;
+ graphroot = buffer->buffer[0];
+
+ tso = buffer->tso;
+
+ /* Unpack the header */
+ unpackedsize = buffer->unpacked_size;
+ bufsize = buffer->size;
+
+ IF_GRAN_DEBUG(pack,
+ belch("<<< Unpacking <<%d>> (buffer @ %p):\n (root @ %p, PE %d,size=%d), demanded by TSO %d (%p)[PE %d]",
+ buffer->id, buffer, graphroot, where_is(graphroot),
+ bufsize, tso->id, tso,
+ where_is((StgClosure *)tso)));
+
+ do {
+ closure = buffer->buffer[bufptr++]; /* that's all we need for GrAnSim -- HWL */
+
+ /* Actually only ip is needed; rest is useful for TESTING -- HWL */
+ ip = get_closure_info(closure,
+ &size, &ptrs, &nonptrs, &vhs, str);
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str, "** (%p): Changing bitmask[%s]: 0x%x ",
+ closure, (closure_HNF(closure) ? "NF" : "__"),
+ PROCS(closure)));
+
+ if (ip->type == RBH) {
+ closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */
+
+ IF_GRAN_DEBUG(pack,
+ strcat(str, " (converting RBH) "));
+
+ convertFromRBH(closure); /* In GUM that's done by convertToFetchMe */
+ } else if (IS_BLACK_HOLE(closure)) {
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ } else if ( closure->header.gran.procs & PE_NUMBER(CurrentProc) == 0 ) {
+ if (closure_HNF(closure))
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ else
+ closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */
+ }
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str1, "0x%x", PROCS(closure)); strcat(str, str1));
+ IF_GRAN_DEBUG(pack, belch(str));
+
+ } while (bufptr<buffer->size) ; /* (parent != NULL); */
+
+ /* In GrAnSim we allocate pack buffers dynamically! -- HWL */
+ free(buffer->buffer);
+ free(buffer);
+
+ IF_GRAN_DEBUG(pack,
+ belch("PrintGraph of %p is:", graphroot); PrintGraph(graphroot,0));
+
+ return (graphroot);
+}
+#endif /* GRAN */
+#endif
+
+//@node Index, , GranSim Code, Unpacking closures
+//@subsection Index
+
+//@index
+//* CommonUp:: @cindex\s-+CommonUp
+//* InitPendingGABuffer:: @cindex\s-+InitPendingGABuffer
+//* PendingGABuffer:: @cindex\s-+PendingGABuffer
+//* UnpackGraph:: @cindex\s-+UnpackGraph
+//@end index
--- /dev/null
+/* -----------------------------------------------------------------------------
+ * $Id: FetchMe.h,v 1.2 2000/01/13 14:34:06 hwloidl Exp $
+ *
+ * Closure types for the parallel system.
+ *
+ * ---------------------------------------------------------------------------*/
+
+EI_(FETCH_ME_info);
+EF_(FETCH_ME_entry);
+
+EI_(FETCH_ME_BQ_info);
+EF_(FETCH_ME_BQ_entry);
+
+EI_(BLOCKED_FETCH_info);
+EF_(BLOCKED_FETCH_entry);
+
+EI_(RBH_Save_0_info);
+EF_(RBH_Save_0_entry);
+EI_(RBH_Save_1_info);
+EF_(RBH_Save_1_entry);
+EI_(RBH_Save_2_info);
+EF_(RBH_Save_2_entry);
--- /dev/null
+/* ----------------------------------------------------------------------------
+ Time-stamp: <Wed Jan 12 2000 13:39:33 Stardate: [-30]4193.88 hwloidl>
+ $Id: FetchMe.hc,v 1.2 2000/01/13 14:34:06 hwloidl Exp $
+
+ Entry code for a FETCH_ME closure
+
+ This module defines routines for handling remote pointers (@FetchMe@s)
+ in GUM. It is threaded (@.hc@) because @FetchMe_entry@ will be
+ called during evaluation.
+
+ * --------------------------------------------------------------------------*/
+
+#ifdef PAR /* all of it */
+
+//@menu
+//* Includes::
+//* Info tables::
+//* Index::
+//@end menu
+
+//@node Includes, Info tables
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Storage.h"
+#include "GranSim.h"
+#include "GranSimRts.h"
+#include "Parallel.h"
+#include "ParallelRts.h"
+#include "FetchMe.h"
+#include "HLC.h"
+#include "StgRun.h" /* for StgReturn and register saving */
+
+/* --------------------------------------------------------------------------
+ FETCH_ME closures.
+
+ A FETCH_ME closure represents data that currently resides on
+ another PE. We issue a fetch message, and wait for the data to be
+ retrieved.
+
+ About the difference between std and PAR in returning to the RTS:
+ in PAR we call RTS functions from within the entry code (see also
+ BLACKHOLE_entry and friends in StgMiscClosures.hc); therefore, we
+ have to save the thread state before calling these functions ---
+ this is done via SAVE_THREAD_STATE; we then just load the return
+ code into R1 before jumping into the RTS --- this is done via
+ THREAD_RETURN; so, in short we have something like
+ SAVE_THREAD_STATE + THREAD_RETURN = BLOCK_NP
+
+ ------------------------------------------------------------------------ */
+
+//@node Info tables, Index, Includes
+//@subsection Info tables
+
+//@cindex FETCH_ME_info
+INFO_TABLE(FETCH_ME_info, FETCH_ME_entry, 0,2, FETCH_ME, const, EF_,0,0);
+//@cindex FETCH_ME_entry
+STGFUN(FETCH_ME_entry)
+{
+ extern globalAddr *rga_GLOBAL;
+ extern globalAddr *lga_GLOBAL;
+ extern globalAddr fmbqga_GLOBAL;
+ extern StgClosure *p_GLOBAL;
+ /*
+ globalAddr *rga;
+ globalAddr *lga;
+ globalAddr fmbqga;
+ StgClosure *p;
+ */
+
+ rga_GLOBAL = ((StgFetchMe *)R1.p)->ga;
+ ASSERT(rga->payload.gc.gtid != mytid);
+
+ /* Turn the FETCH_ME into a FETCH_ME_BQ, and place the current thread
+ * on the blocking queue.
+ */
+ // R1.cl->header.info = FETCH_ME_BQ_info;
+ SET_INFO((StgClosure *)R1.cl, &FETCH_ME_BQ_info);
+
+ CurrentTSO->link = END_BQ_QUEUE;
+ ((StgFetchMeBlockingQueue *)R1.cl)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO;
+
+ /* record onto which closure the current thread is blcoking */
+ CurrentTSO->block_info.closure = R1.cl;
+ //recordMutable((StgMutClosure *)R1.cl);
+ p_GLOBAL = R1.cl;
+
+ /* Save the Thread State here, before calling RTS routines below! */
+ //BLOCK_NP_NO_JUMP(1);
+ SAVE_THREAD_STATE(1);
+
+ /* unknown junk... needed? --SDM yes, want to see what's happening -- HWL */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ //rtsTime now = CURRENT_TIME; /* Now */
+ CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat;
+ CurrentTSO->par.fetchcount++;
+ /* TSO_QUEUE(CurrentTSO) = Q_FETCHING; */
+ CurrentTSO->par.blockedat = CURRENT_TIME;
+ /* we are about to send off a FETCH message, so dump a FETCH event */
+ DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(rga_GLOBAL->payload.gc.gtid),
+ GR_FETCH, CurrentTSO, (StgClosure *)R1.p, 0);
+ }
+
+ /* Phil T. claims that this was a workaround for a hard-to-find
+ * bug, hence I'm leaving it out for now --SDM
+ */
+ /* Assign a brand-new global address to the newly created FMBQ */
+ lga_GLOBAL = makeGlobal(p_GLOBAL, rtsFalse);
+ splitWeight(&fmbqga_GLOBAL, lga_GLOBAL);
+ ASSERT(fmbqga_GLOBAL.weight == 1L << (BITS_IN(unsigned) - 1));
+
+ /* I *hope* it's ok to call this from STG land. --SDM */
+ STGCALL3(sendFetch, rga_GLOBAL, &fmbqga_GLOBAL, 0/*load*/);
+
+ // sendFetch now called from processTheRealFetch, to make SDM happy
+ //theGlobalFromGA.payload.gc.gtid = rga->payload.gc.gtid;
+ //theGlobalFromGA.payload.gc.slot = rga->payload.gc.slot;
+ //theGlobalFromGA.weight = rga->weight;
+ //theGlobalToGA.payload.gc.gtid = fmbqga.payload.gc.gtid;
+ //theGlobalToGA.payload.gc.slot = fmbqga.payload.gc.slot;
+ //theGlobalToGA.weight = fmbqga.weight;
+
+ // STGCALL6(fprintf,stderr,"%% Fetching %p from remote PE ((%x,%d,%x))\n",R1.p,rga->payload.gc.gtid, rga->payload.gc.slot, rga->weight);
+
+ THREAD_RETURN(1); /* back to the scheduler */
+ // was: BLOCK_NP(1);
+ FE_
+}
+
+/* ---------------------------------------------------------------------------
+ FETCH_ME_BQ
+
+ On the first entry of a FETCH_ME closure, we turn the closure into
+ a FETCH_ME_BQ, which behaves just like a BLACKHOLE_BQ. Any thread
+ entering the FETCH_ME_BQ will be placed in the blocking queue.
+ When the data arrives from the remote PE, all waiting threads are
+ woken up and the FETCH_ME_BQ is overwritten with the fetched data.
+
+ FETCH_ME_BQ_entry is a copy of BLACKHOLE_BQ_entry -- HWL
+ ------------------------------------------------------------------------ */
+
+INFO_TABLE(FETCH_ME_BQ_info, FETCH_ME_BQ_entry,0,2,FETCH_ME_BQ,const,EF_,0,0);
+//@cindex FETCH_ME_BQ_info
+STGFUN(FETCH_ME_BQ_entry)
+{
+ FB_
+ TICK_ENT_BH();
+
+ /* Put ourselves on the blocking queue for this black hole */
+ CurrentTSO->block_info.closure = R1.cl;
+ CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue;
+ ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO;
+
+#if defined(PAR)
+ /* Save the Thread State here, before calling RTS routines below! */
+ SAVE_THREAD_STATE(1);
+
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ //rtsTime now = CURRENT_TIME; /* Now */
+ CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat;
+ CurrentTSO->par.blockcount++;
+ CurrentTSO->par.blockedat = CURRENT_TIME;
+ DumpRawGranEvent(CURRENT_PROC, thisPE,
+ GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0);
+ }
+
+ THREAD_RETURN(1); /* back to the scheduler */
+#else
+ /* stg_gen_block is too heavyweight, use a specialised one */
+ BLOCK_NP(1);
+#endif
+ FE_
+}
+
+/* ---------------------------------------------------------------------------
+ BLOCKED_FETCH_BQ
+
+ A BLOCKED_FETCH closure only ever exists in the blocking queue of a
+ globally visible closure i.e. one with a GA. A BLOCKED_FETCH closure
+ indicates that a TSO on another PE is waiting for the result of this
+ computation. Thus, when updating the closure, the result has to be sent
+ to that PE. The relevant routines handling that are awaken_blocked_queue
+ and blockFetch (for putting BLOCKED_FETCH closure into a BQ).
+*/
+
+//@cindex BLOCKED_FETCH_info
+INFO_TABLE(BLOCKED_FETCH_info, BLOCKED_FETCH_entry,0,2,BLOCKED_FETCH,const,EF_,0,0);
+//@cindex BLOCKED_FETCH_entry
+STGFUN(BLOCKED_FETCH_entry)
+{
+ FB_
+ /* see NON_ENTERABLE_ENTRY_CODE in StgMiscClosures.hc */
+ fprintf(stderr,"Qagh: BLOCKED_FETCH entered!\n");
+ STGCALL1(raiseError, errorHandler);
+ stg_exit(EXIT_FAILURE); /* not executed */
+ FE_
+}
+
+#endif /* PAR */
+
+//@node Index, , Info tables
+//@subsection Index
+
+//@index
+//* BLOCKED_FETCH_entry:: @cindex\s-+BLOCKED_FETCH_entry
+//* BLOCKED_FETCH_info:: @cindex\s-+BLOCKED_FETCH_info
+//* FETCH_ME_BQ_info:: @cindex\s-+FETCH_ME_BQ_info
+//* FETCH_ME_entry:: @cindex\s-+FETCH_ME_entry
+//* FETCH_ME_info:: @cindex\s-+FETCH_ME_info
+//@end index
--- /dev/null
+/* ---------------------------------------------------------------------------
+ Time-stamp: <Sat Dec 04 1999 21:28:56 Stardate: [-30]3999.47 hwloidl>
+ $Id: Global.c,v 1.2 2000/01/13 14:34:06 hwloidl Exp $
+
+ (c) The AQUA/Parade Projects, Glasgow University, 1995
+ The GdH/APART 624 Projects, Heriot-Watt University, Edinburgh, 1999
+
+ Global Address Manipulation.
+
+ The GALA and LAGA tables for mapping global addresses to local addresses
+ (i.e. heap pointers) are defined here. We use the generic hash tables
+ defined in Hash.c.
+ ------------------------------------------------------------------------- */
+
+#ifdef PAR /* whole file */
+
+//@menu
+//* Includes::
+//* Global tables and lists::
+//* Fcts on GALA tables::
+//* Interface to taskId-PE table::
+//* Interface to LAGA table::
+//* Interface to GALA table::
+//* GC functions for GALA tables::
+//* Index::
+//@end menu
+
+//@node Includes, Global tables and lists, Global Address Manipulation, Global Address Manipulation
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Storage.h"
+#include "Hash.h"
+#include "ParallelRts.h"
+
+/*
+ @globalAddr@ structures are allocated in chunks to reduce malloc overhead.
+*/
+
+//@node Global tables and lists, Fcts on GALA tables, Includes, Global Address Manipulation
+//@subsection Global tables and lists
+
+//@cindex thisPE
+int thisPE;
+
+//@menu
+//* Free lists::
+//* Hash tables::
+//@end menu
+
+//@node Free lists, Hash tables, Global tables and lists, Global tables and lists
+//@subsubsection Free lists
+
+/* Free list of GALA entries */
+GALA *freeGALAList = NULL;
+
+/* Number of globalAddr cells to allocate in one go */
+#define GCHUNK (1024 * sizeof(StgWord) / sizeof(GALA))
+
+/* Free list of indirections */
+
+//@cindex nextIndirection
+static StgInt nextIndirection = 0;
+//@cindex freeIndirections
+GALA *freeIndirections = NULL;
+
+/* The list of live indirections has to be marked for GC (see makeGlobal) */
+//@cindex liveIndirections
+GALA *liveIndirections = NULL;
+
+/* The list of remote indirections has to be marked for GC (see setRemoteGA) */
+//@cindex liveRemoteGAs
+GALA *liveRemoteGAs = NULL;
+
+//@node Hash tables, , Free lists, Global tables and lists
+//@subsubsection Hash tables
+
+/* Mapping global task ids PEs */
+//@cindex taskIDtoPEtable
+HashTable *taskIDtoPEtable = NULL;
+
+static int nextPE = 0;
+
+/* LAGA table: StgClosure* -> globalAddr*
+ (Remember: globalAddr = (GlobalTaskId, Slot, Weight))
+ Mapping local to global addresses (see interface below)
+*/
+
+//@cindex LAtoGALAtable
+HashTable *LAtoGALAtable = NULL;
+
+/* GALA table: globalAddr* -> StgClosure*
+ (Remember: globalAddr = (GlobalTaskId, Slot, Weight))
+ Mapping global to local addresses (see interface below)
+*/
+
+//@cindex pGAtoGALAtable
+HashTable *pGAtoGALAtable = NULL;
+
+//@node Fcts on GALA tables, Interface to taskId-PE table, Global tables and lists, Global Address Manipulation
+//@subsection Fcts on GALA tables
+
+//@cindex allocGALA
+static GALA *
+allocGALA(void)
+{
+ GALA *gl, *p;
+
+ if ((gl = freeGALAList) != NULL) {
+ freeGALAList = gl->next;
+ } else {
+ gl = (GALA *) stgMallocBytes(GCHUNK * sizeof(GALA), "allocGALA");
+
+ freeGALAList = gl + 1;
+ for (p = freeGALAList; p < gl + GCHUNK - 1; p++)
+ p->next = p + 1;
+ p->next = NULL;
+ }
+ return gl;
+}
+
+//@node Interface to taskId-PE table, Interface to LAGA table, Fcts on GALA tables, Global Address Manipulation
+//@subsection Interface to taskId-PE table
+
+/*
+ We don't really like GLOBAL_TASK_ID, so we keep a table of TASK_ID to
+ PE mappings. The idea is that a PE identifier will fit in 16 bits, whereas
+ a TASK_ID may not.
+*/
+
+//@cindex taskIDtoPE
+PEs
+taskIDtoPE(GlobalTaskId gtid)
+{
+ return (PEs) lookupHashTable(taskIDtoPEtable, gtid);
+}
+
+//@cindex registerTask
+void
+registerTask(gtid)
+GlobalTaskId gtid;
+{
+ if (gtid == mytid)
+ thisPE = nextPE;
+
+ insertHashTable(taskIDtoPEtable, gtid, (void *) (StgWord) nextPE++);
+}
+
+//@node Interface to LAGA table, Interface to GALA table, Interface to taskId-PE table, Global Address Manipulation
+//@subsection Interface to LAGA table
+
+/*
+ The local address to global address mapping returns a globalAddr structure
+ (pe task id, slot, weight) for any closure in the local heap which has a
+ global identity. Such closures may be copies of normal form objects with
+ a remote `master' location, @FetchMe@ nodes referencing remote objects, or
+ globally visible objects in the local heap (for which we are the master).
+*/
+
+//@cindex LAGAlookup
+globalAddr *
+LAGAlookup(addr)
+StgClosure *addr;
+{
+ GALA *gala;
+
+ /* We never look for GA's on indirections */
+ ASSERT(IS_INDIRECTION(addr) == NULL);
+ if ((gala = lookupHashTable(LAtoGALAtable, (StgWord) addr)) == NULL)
+ return NULL;
+ else
+ return &(gala->ga);
+}
+
+//@node Interface to GALA table, GC functions for GALA tables, Interface to LAGA table, Global Address Manipulation
+//@subsection Interface to GALA table
+
+/*
+ We also manage a mapping of global addresses to local addresses, so that
+ we can ``common up'' multiple references to the same object as they arrive
+ in data packets from remote PEs.
+
+ The global address to local address mapping is actually managed via a
+ ``packed global address'' to GALA hash table. The packed global
+ address takes the interesting part of the @globalAddr@ structure
+ (i.e. the pe and slot fields) and packs them into a single word
+ suitable for hashing.
+*/
+
+//@cindex GALAlookup
+StgClosure *
+GALAlookup(ga)
+globalAddr *ga;
+{
+ StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot);
+ GALA *gala;
+
+ if ((gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga)) == NULL)
+ return NULL;
+ else {
+ /*
+ * Bypass any indirections when returning a local closure to
+ * the caller. Note that we do not short-circuit the entry in
+ * the GALA tables right now, because we would have to do a
+ * hash table delete and insert in the LAtoGALAtable to keep
+ * that table up-to-date for preferred GALA pairs. That's
+ * probably a bit expensive.
+ */
+ return UNWIND_IND((StgClosure *)(gala->la));
+ }
+}
+
+/*
+ External references to our globally-visible closures are managed through an
+ indirection table. The idea is that the closure may move about as the result
+ of local garbage collections, but its global identity is determined by its
+ slot in the indirection table, which never changes.
+
+ The indirection table is maintained implicitly as part of the global
+ address to local address table. We need only keep track of the
+ highest numbered indirection index allocated so far, along with a free
+ list of lower numbered indices no longer in use.
+*/
+
+/*
+ Allocate an indirection slot for the closure currently at address @addr@.
+*/
+
+//@cindex allocIndirection
+static GALA *
+allocIndirection(StgPtr addr)
+{
+ GALA *gala;
+
+ if ((gala = freeIndirections) != NULL) {
+ freeIndirections = gala->next;
+ } else {
+ gala = allocGALA();
+ gala->ga.payload.gc.gtid = mytid;
+ gala->ga.payload.gc.slot = nextIndirection++;
+ }
+ gala->ga.weight = MAX_GA_WEIGHT;
+ gala->la = addr;
+ return gala;
+}
+
+/*
+ Make a local closure at @addr@ globally visible. We have to allocate an
+ indirection slot for it, and update both the local address to global address
+ and global address to local address maps.
+*/
+
+//@cindex makeGlobal
+globalAddr *
+makeGlobal(addr, preferred)
+StgClosure *addr;
+rtsBool preferred;
+{
+ GALA *oldGALA = lookupHashTable(LAtoGALAtable, (StgWord) addr);
+ GALA *newGALA = allocIndirection((StgPtr)addr);
+ StgWord pga = PackGA(thisPE, newGALA->ga.payload.gc.slot);
+
+ ASSERT(HEAP_ALLOCED(addr)); // check that addr might point into the heap
+ ASSERT(GALAlookup(&(newGALA->ga)) == NULL);
+
+ newGALA->la = addr;
+ newGALA->preferred = preferred;
+
+ if (preferred) {
+ /* The new GA is now the preferred GA for the LA */
+ if (oldGALA != NULL) {
+ oldGALA->preferred = rtsFalse;
+ (void) removeHashTable(LAtoGALAtable, (StgWord) addr, (void *) oldGALA);
+ }
+ insertHashTable(LAtoGALAtable, (StgWord) addr, (void *) newGALA);
+ }
+
+ /* put the new GALA entry on the list of live indirections */
+ newGALA->next = liveIndirections;
+ liveIndirections = newGALA;
+
+ insertHashTable(pGAtoGALAtable, pga, (void *) newGALA);
+
+ return &(newGALA->ga);
+}
+
+/*
+ Assign an existing remote global address to an existing closure.
+ We do not retain the @globalAddr@ structure that's passed in as an argument,
+ so it can be a static in the calling routine.
+*/
+
+//@cindex setRemoteGA
+globalAddr *
+setRemoteGA(addr, ga, preferred)
+StgClosure *addr;
+globalAddr *ga;
+rtsBool preferred;
+{
+ GALA *oldGALA = lookupHashTable(LAtoGALAtable, (StgWord) addr);
+ GALA *newGALA = allocGALA();
+ StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot);
+
+ ASSERT(ga->payload.gc.gtid != mytid);
+ ASSERT(ga->weight > 0);
+ ASSERT(GALAlookup(ga) == NULL);
+
+ newGALA->ga = *ga;
+ newGALA->la = addr;
+ newGALA->preferred = preferred;
+
+ if (preferred) {
+ /* The new GA is now the preferred GA for the LA */
+ if (oldGALA != NULL) {
+ oldGALA->preferred = rtsFalse;
+ (void) removeHashTable(LAtoGALAtable, (StgWord) addr, (void *) oldGALA);
+ }
+ insertHashTable(LAtoGALAtable, (StgWord) addr, (void *) newGALA);
+ }
+ newGALA->next = liveRemoteGAs;
+ liveRemoteGAs = newGALA;
+
+ insertHashTable(pGAtoGALAtable, pga, (void *) newGALA);
+
+ ga->weight = 0;
+
+ return &(newGALA->ga);
+}
+
+/*
+ Give me a bit of weight to give away on a new reference to a particular
+ global address. If we run down to nothing, we have to assign a new GA.
+*/
+
+//@cindex splitWeight
+void
+splitWeight(to, from)
+globalAddr *to, *from;
+{
+ /* Make sure we have enough weight to split */
+ if (from->weight == 1)
+ from = makeGlobal(GALAlookup(from), rtsTrue);
+
+ to->payload = from->payload;
+
+ if (from->weight == 0)
+ to->weight = 1L << (BITS_IN(unsigned) - 1);
+ else
+ to->weight = from->weight / 2;
+
+ from->weight -= to->weight;
+}
+
+/*
+ Here, I am returning a bit of weight that a remote PE no longer needs.
+*/
+
+//@cindex addWeight
+globalAddr *
+addWeight(ga)
+globalAddr *ga;
+{
+ StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot);
+ GALA *gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga);
+
+ IF_PAR_DEBUG(weight,
+ fprintf(stderr, "@* Adding weight %x to ", ga->weight);
+ printGA(&(gala->ga));
+ fputc('\n', stderr));
+
+ gala->ga.weight += ga->weight;
+ ga->weight = 0;
+
+ return &(gala->ga);
+}
+
+/*
+ Initialize all of the global address structures: the task ID to PE id
+ map, the local address to global address map, the global address to
+ local address map, and the indirection table.
+*/
+
+//@cindex initGAtables
+void
+initGAtables(void)
+{
+ taskIDtoPEtable = allocHashTable();
+ LAtoGALAtable = allocHashTable();
+ pGAtoGALAtable = allocHashTable();
+}
+
+//@cindex PackGA
+StgWord
+PackGA (pe, slot)
+StgWord pe;
+int slot;
+{
+ int pe_shift = (BITS_IN(StgWord)*3)/4;
+ int pe_bits = BITS_IN(StgWord) - pe_shift;
+
+ if ( pe_bits < 8 || slot >= (1L << pe_shift) ) { /* big trouble */
+ fflush(stdout);
+ fprintf(stderr, "PackGA: slot# too big (%d) or not enough pe_bits (%d)\n",
+ slot,pe_bits);
+ stg_exit(EXIT_FAILURE);
+ }
+
+ return((((StgWord)(pe)) << pe_shift) | ((StgWord)(slot)));
+
+ /* the idea is to use 3/4 of the bits (e.g., 24) for indirection-
+ table "slot", and 1/4 for the pe# (e.g., 8).
+
+ We check for too many bits in "slot", and double-check (at
+ compile-time?) that we have enough bits for "pe". We *don't*
+ check for too many bits in "pe", because SysMan enforces a
+ MAX_PEs limit at the very very beginning.
+
+ Phil & Will 95/08
+ */
+}
+
+//@node GC functions for GALA tables, Debugging routines, Interface to GALA table, Global Address Manipulation
+//@subsection GC functions for GALA tables
+
+/*
+ When we do a copying collection, we want to evacuate all of the local
+ entries in the GALA table for which there are outstanding remote
+ pointers (i.e. for which the weight is not MAX_GA_WEIGHT.)
+*/
+//@cindex markLocalGAs
+void
+markLocalGAs(rtsBool full)
+{
+ GALA *gala;
+ GALA *next;
+ GALA *prev = NULL;
+ StgPtr old_la, new_la;
+ nat n=0, m=0; // debugging only
+
+ IF_DEBUG(gc,
+ belch("@@ markLocalGAs: Marking LIVE INDIRECTIONS in GALA table starting with GALA at %p\n",
+ liveIndirections);
+ printLAGAtable());
+
+ for (gala = liveIndirections, m=0; gala != NULL; gala = next, m++) {
+ IF_DEBUG(gc,
+ printGA(&(gala->ga));
+ fprintf(stderr, ";@ %d: LA: %p (%s) ",
+ m, gala->la, info_type(gala->la)));
+ next = gala->next;
+ old_la = gala->la;
+ ASSERT(gala->ga.payload.gc.gtid == mytid); /* it's supposed to be local */
+ if (get_itbl((StgClosure *)old_la)->type == EVACUATED) {
+ /* somebody else already evacuated this closure */
+ new_la = ((StgEvacuated *)old_la)->evacuee;
+ IF_DEBUG(gc,
+ belch(" already evacuated to %p\n", new_la));
+ } else {
+ StgClosure *foo ; // debugging only
+ n++;
+ IF_PAR_DEBUG(verbose,
+ if (IS_INDIRECTION((StgClosure *)old_la))
+ belch("{markLocalGAs}Daq ghuH: trying to mark an indirection %p (%s) -> %p (%s); [closure=%p]",
+ old_la, info_type(old_la),
+ (foo = UNWIND_IND((StgClosure *)old_la)), info_type(foo),
+ old_la));
+ new_la = MarkRoot(UNWIND_IND((StgClosure *)old_la)); // or just evacuate(old_ga)
+ IF_DEBUG(gc,
+ belch(" evacuated %p to %p\n", old_la, new_la));
+ }
+
+ gala->la = new_la;
+ /* remove old LA and replace with new LA */
+ //(void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala);
+ //insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala);
+
+ gala->next = prev;
+ prev = gala;
+ }
+ liveIndirections = prev; /* list has been reversed during the marking */
+
+ IF_PAR_DEBUG(verbose,
+ belch("@@ markLocalGAs: %d of %d GALAs marked on PE %x",
+ n, m, mytid));
+
+ /* -------------------------------------------------------------------- */
+
+ n=0; m=0; // debugging only
+
+ IF_DEBUG(gc,
+ belch("@@ markLocalGAs: Marking LIVE REMOTE GAs in GALA table starting with GALA at %p\n",
+ liveRemoteGAs));
+
+ for (gala = liveRemoteGAs, prev = NULL; gala != NULL; gala = next) {
+ IF_DEBUG(gc,
+ printGA(&(gala->ga)));
+ next = gala->next;
+ old_la = gala->la;
+ ASSERT(gala->ga.payload.gc.gtid != mytid); /* it's supposed to be remote */
+ if (get_itbl((StgClosure *)old_la)->type == EVACUATED) {
+ /* somebody else already evacuated this closure */
+ new_la = ((StgEvacuated *)old_la)->evacuee;
+ } else {
+ n++;
+ new_la = MarkRoot((StgClosure *)old_la); // or just evacuate(old_ga)
+ }
+
+ gala->la = new_la;
+ /* remove old LA and replace with new LA */
+ //(void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala);
+ //insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala);
+
+ gala->next = prev;
+ prev = gala;
+ }
+ liveRemoteGAs = prev; /* list is reversed during marking */
+
+ /* If we have any remaining FREE messages to send off, do so now */
+ // sendFreeMessages();
+
+ IF_DEBUG(gc,
+ belch("@@ markLocalGAs: GALA after marking");
+ printLAGAtable();
+ belch("--------------------------------------"));
+
+}
+
+void
+OLDmarkLocalGAs(rtsBool full)
+{
+ extern StgClosure *MarkRootHWL(StgClosure *root);
+
+ GALA *gala;
+ GALA *next;
+ GALA *prev = NULL;
+ StgPtr new_la;
+ nat n=0, m=0; // debugging only
+
+ IF_DEBUG(gc,
+ belch("@@ markLocalGAs: Marking entries in GALA table starting with GALA at %p",
+ liveIndirections);
+ printLAGAtable());
+
+ for (gala = liveIndirections; gala != NULL; gala = next) {
+ IF_DEBUG(gc,
+ printGA(&(gala->ga));
+ fprintf(stderr, " LA: %p (%s) ",
+ gala->la, info_type(gala->la)));
+ next = gala->next;
+ ASSERT(gala->ga.payload.gc.gtid == mytid); /* it's supposed to be local */
+ if (gala->ga.weight != MAX_GA_WEIGHT) {
+ /* Remote references exist, so we must evacuate the local closure */
+ StgPtr old_la = gala->la;
+
+ if (get_itbl((StgClosure *)old_la)->type != EVACUATED) { // track evacuee!??
+ n++;
+ IF_DEBUG(gc,
+ fprintf(stderr, " marking as root\n"));
+ new_la = MarkRoot((StgClosure *)old_la); // or just evacuate(old_ga)
+ //IF_DEBUG(gc,
+ // fprintf(stderr, " new LA is %p ", new_la));
+ if (!full && gala->preferred && new_la != old_la) {
+ IF_DEBUG(gc,
+ fprintf(stderr, " replacing %p with %p in LAGA table\n",
+ old_la, new_la));
+ (void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala);
+ insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala);
+ }
+ } else {
+ IF_DEBUG(gc,
+ fprintf(stderr, " EVAC "));
+ new_la = ((StgEvacuated *)old_la)->evacuee;
+ IF_DEBUG(gc,
+ fprintf(stderr, " replacing %p with %p in LAGA table\n",
+ old_la, new_la));
+ (void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala);
+ insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala);
+ }
+ gala->next = prev;
+ prev = gala;
+ } else {
+ /* Since we have all of the weight, this GA is no longer needed */
+ StgWord pga = PackGA(thisPE, gala->ga.payload.gc.slot);
+
+ m++;
+ IF_DEBUG(gc,
+ fprintf(stderr, " freeing slot %d",
+ gala->ga.payload.gc.slot));
+
+ /* put the now redundant GALA onto the free list */
+ gala->next = freeIndirections;
+ freeIndirections = gala;
+ /* remove the GALA from the GALA table; now it's just local */
+ (void) removeHashTable(pGAtoGALAtable, pga, (void *) gala);
+ if (!full && gala->preferred)
+ (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+
+#ifdef DEBUG
+ gala->ga.weight = 0x0d0d0d0d;
+ gala->la = (StgWord) 0x0bad0bad;
+#endif
+ }
+ }
+ liveIndirections = prev; /* list has been reversed during the marking */
+
+ IF_PAR_DEBUG(verbose,
+ belch("@@ markLocalGAs: %d GALAs marked, %d GALAs nuked on PE %x",
+ n, m, mytid));
+
+}
+
+//@cindex RebuildGAtables
+void
+RebuildGAtables(rtsBool full)
+{
+ GALA *gala;
+ GALA *next;
+ GALA *prev;
+ StgClosure *closure, *last, *new_closure;
+
+ //prepareFreeMsgBuffers();
+
+ if (full)
+ RebuildLAGAtable();
+
+ IF_DEBUG(gc,
+ belch("@@ RebuildGAtables: After ReBuilding GALA table starting with GALA at %p",
+ liveRemoteGAs);
+ printLAGAtable());
+}
+
+void
+OLDRebuildGAtables(rtsBool full)
+{
+ GALA *gala;
+ GALA *next;
+ GALA *prev;
+ StgClosure *closure, *last, *new_closure;
+
+ prepareFreeMsgBuffers();
+
+ for (gala = liveRemoteGAs, prev = NULL; gala != NULL; gala = next) {
+ IF_DEBUG(gc,
+ printGA(&(gala->ga)));
+ next = gala->next;
+ ASSERT(gala->ga.payload.gc.gtid != mytid); /* it's supposed to be remote */
+
+ closure = (StgClosure *) (gala->la);
+
+ /*
+ * If the old closure has not been forwarded, we let go. Note that this
+ * approach also drops global aliases for PLCs.
+ */
+
+ if (!full && gala->preferred)
+ (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+
+ /* Follow indirection chains to the end, just in case */
+ closure = UNWIND_IND(closure);
+
+ /*
+ if (get_itbl(closure)->type != EVACUATED) { // (new_closure = isAlive(closure)) == NULL) { // (W_) Forward_Ref_info)
+ // closure is not alive any more, thus remove GA
+ int pe = taskIDtoPE(gala->ga.payload.gc.gtid);
+ StgWord pga = PackGA(pe, gala->ga.payload.gc.slot);
+
+ IF_DEBUG(gc,
+ fprintf(stderr, " (LA: %p (%s)) is unused on this PE -> sending free\n",
+ closure, info_type(closure)));
+
+ (void) removeHashTable(pGAtoGALAtable, pga, (void *) gala);
+ freeRemoteGA(pe, &(gala->ga));
+ gala->next = freeGALAList;
+ freeGALAList = gala;
+ } else {
+ */
+ if (get_itbl(closure)->type == EVACUATED) {
+ IF_DEBUG(gc,
+ fprintf(stderr, " EVAC %p (%s)\n",
+ closure, info_type(closure)));
+ closure = ((StgEvacuated *)closure)->evacuee;
+ } else {
+ IF_DEBUG(gc,
+ fprintf(stderr, " !EVAC %p (%s)\n",
+ closure, info_type(closure)));
+ }
+ gala->la = closure;
+ if (!full && gala->preferred)
+ insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+ gala->next = prev;
+ prev = gala;
+ }
+ //}
+ liveRemoteGAs = prev; /* list is reversed during marking */
+
+ /* If we have any remaining FREE messages to send off, do so now */
+ sendFreeMessages();
+
+ if (full)
+ RebuildLAGAtable();
+
+ IF_DEBUG(gc,
+ belch("@@ RebuildGAtables: After ReBuilding GALA table starting with GALA at %p",
+ liveRemoteGAs);
+ printLAGAtable());
+}
+
+/*
+ Rebuild the LA->GA table, assuming that the addresses in the GALAs are
+ correct.
+*/
+
+//@cindex RebuildLAGAtable
+void
+RebuildLAGAtable(void)
+{
+ GALA *gala;
+ nat n=0, m=0; // debugging
+
+ /* The old LA->GA table is worthless */
+ freeHashTable(LAtoGALAtable, NULL);
+ LAtoGALAtable = allocHashTable();
+
+ IF_DEBUG(gc,
+ belch("@@ RebuildLAGAtable: new LAGA table at %p",
+ LAtoGALAtable));
+
+ for (gala = liveIndirections; gala != NULL; gala = gala->next) {
+ n++;
+ if (gala->preferred)
+ insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+ }
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) {
+ m++;
+ if (gala->preferred)
+ insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+ }
+
+ IF_DEBUG(gc,
+ belch("@@ RebuildLAGAtable: inserted %d entries from liveIndirections and %d entries from liveRemoteGAs",
+ n,m));
+
+}
+
+//@node Debugging routines, Index, GC functions for GALA tables, Global Address Manipulation
+//@subsection Debugging routines
+
+//@cindex printGA
+void
+printGA (globalAddr *ga)
+{
+ fprintf(stderr, "((%x, %d, %x))",
+ ga->payload.gc.gtid,
+ ga->payload.gc.slot,
+ ga->weight);
+}
+
+//@cindex printGALA
+void
+printGALA (GALA *gala)
+{
+ printGA(&(gala->ga));
+ fprintf(stderr, " -> %p (%s)", (StgPtr)gala->la, info_type(gala->la));
+ fprintf(stderr, " %s", (gala->preferred) ? "PREF" : "____");
+}
+
+/*
+ Printing the LA->GA table.
+*/
+
+//@cindex DebugPrintLAGAtable
+void
+printLAGAtable(void)
+{
+ GALA *gala;
+ nat n=0, m=0; // debugging
+
+ belch("@@ LAGAtable (%p) with liveIndirections=%p, liveRemoteGAs=%p:",
+ LAtoGALAtable, liveIndirections, liveRemoteGAs);
+
+ for (gala = liveIndirections; gala != NULL; gala = gala->next) {
+ n++;
+ printGALA(gala);
+ fputc('\n', stderr);
+ }
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) {
+ m++;
+ printGALA(gala);
+ fputc('\n', stderr);
+ }
+ belch("@@ LAGAtable has %d liveIndirections entries and %d liveRemoteGAs entries",
+ n, m);
+}
+
+#endif /* PAR -- whole file */
+
+//@node Index, , Debugging routines, Global Address Manipulation
+//@subsection Index
+
+//@index
+//* GALAlookup:: @cindex\s-+GALAlookup
+//* LAGAlookup:: @cindex\s-+LAGAlookup
+//* LAtoGALAtable:: @cindex\s-+LAtoGALAtable
+//* PackGA:: @cindex\s-+PackGA
+//* RebuildGAtables:: @cindex\s-+RebuildGAtables
+//* RebuildLAGAtable:: @cindex\s-+RebuildLAGAtable
+//* addWeight:: @cindex\s-+addWeight
+//* allocGALA:: @cindex\s-+allocGALA
+//* allocIndirection:: @cindex\s-+allocIndirection
+//* freeIndirections:: @cindex\s-+freeIndirections
+//* initGAtables:: @cindex\s-+initGAtables
+//* liveIndirections:: @cindex\s-+liveIndirections
+//* liveRemoteGAs:: @cindex\s-+liveRemoteGAs
+//* makeGlobal:: @cindex\s-+makeGlobal
+//* markLocalGAs:: @cindex\s-+markLocalGAs
+//* nextIndirection:: @cindex\s-+nextIndirection
+//* pGAtoGALAtable:: @cindex\s-+pGAtoGALAtable
+//* registerTask:: @cindex\s-+registerTask
+//* setRemoteGA:: @cindex\s-+setRemoteGA
+//* splitWeight:: @cindex\s-+splitWeight
+//* taskIDtoPE:: @cindex\s-+taskIDtoPE
+//* taskIDtoPEtable:: @cindex\s-+taskIDtoPEtable
+//* thisPE:: @cindex\s-+thisPE
+//@end index
--- /dev/null
+/*
+ Time-stamp: <Sat Dec 11 1999 17:25:27 Stardate: [-30]4033.42 software>
+ $Id: GranSim.c,v 1.2 2000/01/13 14:34:06 hwloidl Exp $
+
+ Variables and functions specific to GranSim the parallelism simulator
+ for GPH.
+*/
+
+//@node GranSim specific code, , ,
+//@section GranSim specific code
+
+/*
+ Macros for dealing with the new and improved GA field for simulating
+ parallel execution. Based on @CONCURRENT@ package. The GA field now
+ contains a mask, where the n-th bit stands for the n-th processor, where
+ this data can be found. In case of multiple copies, several bits are
+ set. The total number of processors is bounded by @MAX_PROC@, which
+ should be <= the length of a word in bits. -- HWL
+*/
+
+//@menu
+//* Includes::
+//* Prototypes and externs::
+//* Constants and Variables::
+//* Initialisation::
+//* Global Address Operations::
+//* Global Event Queue::
+//* Spark queue functions::
+//* Scheduling functions::
+//* Thread Queue routines::
+//* GranSim functions::
+//* GranSimLight routines::
+//* Code for Fetching Nodes::
+//* Idle PEs::
+//* Routines directly called from Haskell world::
+//* Emiting profiling info for GrAnSim::
+//* Dumping routines::
+//* Index::
+//@end menu
+
+//@node Includes, Prototypes and externs, GranSim specific code, GranSim specific code
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "StgMiscClosures.h"
+#include "StgTypes.h"
+#include "Schedule.h"
+#include "SchedAPI.h" // for pushClosure
+#include "GC.h"
+#include "GranSimRts.h"
+#include "GranSim.h"
+#include "ParallelRts.h"
+#include "ParallelDebug.h"
+#include "Storage.h" // for recordMutable
+
+
+//@node Prototypes and externs, Constants and Variables, Includes, GranSim specific code
+//@subsection Prototypes and externs
+
+#if defined(GRAN)
+
+/* Prototypes */
+static inline PEs ga_to_proc(StgWord);
+static inline rtsBool any_idle(void);
+static inline nat idlers(void);
+ PEs where_is(StgClosure *node);
+
+static rtsBool stealSomething(PEs proc, rtsBool steal_spark, rtsBool steal_thread);
+static inline rtsBool stealSpark(PEs proc);
+static inline rtsBool stealThread(PEs proc);
+static rtsBool stealSparkMagic(PEs proc);
+static rtsBool stealThreadMagic(PEs proc);
+/* subsumed by stealSomething
+static void stealThread(PEs proc);
+static void stealSpark(PEs proc);
+*/
+static rtsTime sparkStealTime(void);
+static nat natRandom(nat from, nat to);
+static PEs findRandomPE(PEs proc);
+static void sortPEsByTime (PEs proc, PEs *pes_by_time,
+ nat *firstp, nat *np);
+
+void GetRoots(void);
+
+#endif /* GRAN */
+
+//@node Constants and Variables, Initialisation, Prototypes and externs, GranSim specific code
+//@subsection Constants and Variables
+
+#if defined(GRAN) || defined(PAR)
+/* See GranSim.h for the definition of the enum gran_event_types */
+char *gran_event_names[] = {
+ "START", "START(Q)",
+ "STEALING", "STOLEN", "STOLEN(Q)",
+ "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)",
+ "SCHEDULE", "DESCHEDULE",
+ "END",
+ "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED",
+ "ALLOC",
+ "TERMINATE",
+ "SYSTEM_START", "SYSTEM_END", /* only for debugging */
+ "??"
+};
+#endif
+
+#if defined(GRAN) /* whole file */
+char *proc_status_names[] = {
+ "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy",
+ "UnknownProcStatus"
+};
+
+/* For internal use (event statistics) only */
+char *event_names[] =
+ { "ContinueThread", "StartThread", "ResumeThread",
+ "MoveSpark", "MoveThread", "FindWork",
+ "FetchNode", "FetchReply",
+ "GlobalBlock", "UnblockThread"
+ };
+
+//@cindex CurrentProc
+PEs CurrentProc = 0;
+
+/*
+ ToDo: Create a structure for the processor status and put all the
+ arrays below into it.
+ -- HWL */
+
+//@cindex CurrentTime
+/* One clock for each PE */
+rtsTime CurrentTime[MAX_PROC];
+
+/* Useful to restrict communication; cf fishing model in GUM */
+nat OutstandingFetches[MAX_PROC], OutstandingFishes[MAX_PROC];
+
+/* Status of each PE (new since but independent of GranSim Light) */
+rtsProcStatus procStatus[MAX_PROC];
+
+# if defined(GRAN) && defined(GRAN_CHECK)
+/* To check if the RTS ever tries to run a thread that should be blocked
+ because of fetching remote data */
+StgTSO *BlockedOnFetch[MAX_PROC];
+# define FETCH_MASK_TSO 0x08000000 /* only bits 0, 1, 2 should be used */
+# endif
+
+nat SparksAvail = 0; /* How many sparks are available */
+nat SurplusThreads = 0; /* How many excess threads are there */
+
+/* Do we need to reschedule following a fetch? */
+rtsBool NeedToReSchedule = rtsFalse, IgnoreEvents = rtsFalse, IgnoreYields = rtsFalse;
+rtsTime TimeOfNextEvent, TimeOfLastEvent, EndOfTimeSlice; /* checked from the threaded world! */
+
+//@cindex spark queue
+/* GranSim: a globally visible array of spark queues */
+rtsSparkQ pending_sparks_hds[MAX_PROC];
+rtsSparkQ pending_sparks_tls[MAX_PROC];
+
+nat sparksIgnored = 0, sparksCreated = 0;
+
+GlobalGranStats globalGranStats;
+
+nat gran_arith_cost, gran_branch_cost, gran_load_cost,
+ gran_store_cost, gran_float_cost;
+
+/*
+Old comment from 0.29. ToDo: Check and update -- HWL
+
+The following variables control the behaviour of GrAnSim. In general, there
+is one RTS option for enabling each of these features. In getting the
+desired setup of GranSim the following questions have to be answered:
+\begin{itemize}
+\item {\em Which scheduling algorithm} to use (@RtsFlags.GranFlags.DoFairSchedule@)?
+ Currently only unfair scheduling is supported.
+\item What to do when remote data is fetched (@RtsFlags.GranFlags.DoAsyncFetch@)?
+ Either block and wait for the
+ data or reschedule and do some other work.
+ Thus, if this variable is true, asynchronous communication is
+ modelled. Block on fetch mainly makes sense for incremental fetching.
+
+ There is also a simplified fetch variant available
+ (@RtsFlags.GranFlags.SimplifiedFetch@). This variant does not use events to model
+ communication. It is faster but the results will be less accurate.
+\item How aggressive to be in getting work after a reschedule on fetch
+ (@RtsFlags.GranFlags.FetchStrategy@)?
+ This is determined by the so-called {\em fetching
+ strategy\/}. Currently, there are four possibilities:
+ \begin{enumerate}
+ \item Only run a runnable thread.
+ \item Turn a spark into a thread, if necessary.
+ \item Steal a remote spark, if necessary.
+ \item Steal a runnable thread from another processor, if necessary.
+ \end{itemize}
+ The variable @RtsFlags.GranFlags.FetchStrategy@ determines how far to go in this list
+ when rescheduling on a fetch.
+\item Should sparks or threads be stolen first when looking for work
+ (@RtsFlags.GranFlags.DoStealThreadsFirst@)?
+ The default is to steal sparks first (much cheaper).
+\item Should the RTS use a lazy thread creation scheme
+ (@RtsFlags.GranFlags.DoAlwaysCreateThreads@)? By default yes i.e.\ sparks are only
+ turned into threads when work is needed. Also note, that sparks
+ can be discarded by the RTS (this is done in the case of an overflow
+ of the spark pool). Setting @RtsFlags.GranFlags.DoAlwaysCreateThreads@ to @True@ forces
+ the creation of threads at the next possibility (i.e.\ when new work
+ is demanded the next time).
+\item Should data be fetched closure-by-closure or in packets
+ (@RtsFlags.GranFlags.DoBulkFetching@)? The default strategy is a GRIP-like incremental
+ (i.e.\ closure-by-closure) strategy. This makes sense in a
+ low-latency setting but is bad in a high-latency system. Setting
+ @RtsFlags.GranFlags.DoBulkFetching@ to @True@ enables bulk (packet) fetching. Other
+ parameters determine the size of the packets (@pack_buffer_size@) and the number of
+ thunks that should be put into one packet (@RtsFlags.GranFlags.ThunksToPack@).
+\item If there is no other possibility to find work, should runnable threads
+ be moved to an idle processor (@RtsFlags.GranFlags.DoThreadMigration@)? In any case, the
+ RTS tried to get sparks (either local or remote ones) first. Thread
+ migration is very expensive, since a whole TSO has to be transferred
+ and probably data locality becomes worse in the process. Note, that
+ the closure, which will be evaluated next by that TSO is not
+ transferred together with the TSO (that might block another thread).
+\item Should the RTS distinguish between sparks created by local nodes and
+ stolen sparks (@RtsFlags.GranFlags.PreferSparksOfLocalNodes@)? The idea is to improve
+ data locality by preferring sparks of local nodes (it is more likely
+ that the data for those sparks is already on the local processor).
+ However, such a distinction also imposes an overhead on the spark
+ queue management, and typically a large number of sparks are
+ generated during execution. By default this variable is set to @False@.
+\item Should the RTS use granularity control mechanisms? The idea of a
+ granularity control mechanism is to make use of granularity
+ information provided via annotation of the @par@ construct in order
+ to prefer bigger threads when either turning a spark into a thread or
+ when choosing the next thread to schedule. Currently, three such
+ mechanisms are implemented:
+ \begin{itemize}
+ \item Cut-off: The granularity information is interpreted as a
+ priority. If a threshold priority is given to the RTS, then
+ only those sparks with a higher priority than the threshold
+ are actually created. Other sparks are immediately discarded.
+ This is similar to a usual cut-off mechanism often used in
+ parallel programs, where parallelism is only created if the
+ input data is lage enough. With this option, the choice is
+ hidden in the RTS and only the threshold value has to be
+ provided as a parameter to the runtime system.
+ \item Priority Sparking: This mechanism keeps priorities for sparks
+ and chooses the spark with the highest priority when turning
+ a spark into a thread. After that the priority information is
+ discarded. The overhead of this mechanism comes from
+ maintaining a sorted spark queue.
+ \item Priority Scheduling: This mechanism keeps the granularity
+ information for threads, to. Thus, on each reschedule the
+ largest thread is chosen. This mechanism has a higher
+ overhead, as the thread queue is sorted, too.
+ \end{itemize}
+\end{itemize}
+*/
+
+//@node Initialisation, Global Address Operations, Constants and Variables, GranSim specific code
+//@subsection Initialisation
+
+void
+init_gr_stats (void) {
+ memset(&globalGranStats, '\0', sizeof(GlobalGranStats));
+#if 0
+ /* event stats */
+ globalGranStats.noOfEvents = 0;
+ for (i=0; i<MAX_EVENT; i++) globalGranStats.event_counts[i]=0;
+
+ /* communication stats */
+ globalGranStats.fetch_misses = 0;
+ globalGranStats.tot_low_pri_sparks = 0;
+
+ /* obscure stats */
+ globalGranStats.rs_sp_count = 0;
+ globalGranStats.rs_t_count = 0;
+ globalGranStats.ntimes_total = 0,
+ globalGranStats.fl_total = 0;
+ globalGranStats.no_of_steals = 0;
+
+ /* spark queue stats */
+ globalGranStats.tot_sq_len = 0,
+ globalGranStats.tot_sq_probes = 0;
+ globalGranStats.tot_sparks = 0;
+ globalGranStats.withered_sparks = 0;
+ globalGranStats.tot_add_threads = 0;
+ globalGranStats.tot_tq_len = 0;
+ globalGranStats.non_end_add_threads = 0;
+
+ /* thread stats */
+ globalGranStats.tot_threads_created = 0;
+ for (i=0; i<MAX_PROC; i++) globalGranStats.threads_created_on_PE[i]=0;
+#endif /* 0 */
+}
+
+//@node Global Address Operations, Global Event Queue, Initialisation, GranSim specific code
+//@subsection Global Address Operations
+/*
+ ----------------------------------------------------------------------
+ Global Address Operations
+
+ These functions perform operations on the global-address (ga) part of a
+ closure. The ga is the only new field (1 word) in a closure introduced by
+ GrAnSim. It serves as a bitmask, indicating on which processor the
+ closure is residing. Since threads are described by Thread State Object
+ (TSO), which is nothing but another kind of closure, this scheme allows
+ gives placement information about threads.
+
+ A ga is just a bitmask, so the operations on them are mainly bitmask
+ manipulating functions. Note, that there are important macros like PROCS,
+ IS_LOCAL_TO etc. They are defined in @GrAnSim.lh@.
+
+ NOTE: In GrAnSim-light we don't maintain placement information. This
+ allows to simulate an arbitrary number of processors. The price we have
+ to be is the lack of costing any communication properly. In short,
+ GrAnSim-light is meant to reveal the maximal parallelism in a program.
+ From an implementation point of view the important thing is: {\em
+ GrAnSim-light does not maintain global-addresses}. */
+
+/* ga_to_proc returns the first processor marked in the bitmask ga.
+ Normally only one bit in ga should be set. But for PLCs all bits
+ are set. That shouldn't hurt since we only need IS_LOCAL_TO for PLCs */
+
+//@cindex ga_to_proc
+
+static inline PEs
+ga_to_proc(StgWord ga)
+{
+ PEs i;
+ for (i = 0; i < RtsFlags.GranFlags.proc && !IS_LOCAL_TO(ga, i); i++);
+ ASSERT(0<=i && i<RtsFlags.GranFlags.proc);
+ return (i);
+}
+
+/* NB: This takes a *node* rather than just a ga as input */
+//@cindex where_is
+PEs
+where_is(StgClosure *node)
+{ return (ga_to_proc(PROCS(node))); }
+
+// debugging only
+//@cindex is_unique
+rtsBool
+is_unique(StgClosure *node)
+{
+ PEs i;
+ rtsBool unique = rtsFalse;
+
+ for (i = 0; i < RtsFlags.GranFlags.proc ; i++)
+ if (IS_LOCAL_TO(PROCS(node), i))
+ if (unique) // exactly 1 instance found so far
+ return rtsFalse; // found a 2nd instance => not unique
+ else
+ unique = rtsTrue; // found 1st instance
+ ASSERT(unique); // otherwise returned from within loop
+ return (unique);
+}
+
+//@cindex any_idle
+static inline rtsBool
+any_idle(void) { /* any (map (\ i -> procStatus[i] == Idle)) [0,..,MAX_PROC] */
+ PEs i;
+ rtsBool any_idle;
+ for(i=0, any_idle=rtsFalse;
+ !any_idle && i<RtsFlags.GranFlags.proc;
+ any_idle = any_idle || procStatus[i] == Idle, i++)
+ {} ;
+}
+
+//@cindex idlers
+static inline nat
+idlers(void) { /* number of idle PEs */
+ PEs i, j;
+ for(i=0, j=0;
+ i<RtsFlags.GranFlags.proc;
+ j += (procStatus[i] == Idle) ? 1 : 0, i++)
+ {} ;
+ return j;
+}
+
+//@node Global Event Queue, Spark queue functions, Global Address Operations, GranSim specific code
+//@subsection Global Event Queue
+/*
+The following routines implement an ADT of an event-queue (FIFO).
+ToDo: Put that in an own file(?)
+*/
+
+/* Pointer to the global event queue; events are currently malloc'ed */
+rtsEventQ EventHd = NULL;
+
+//@cindex get_next_event
+rtsEvent *
+get_next_event(void)
+{
+ static rtsEventQ entry = NULL;
+
+ if (EventHd == NULL) {
+ barf("No next event. This may be caused by a circular data dependency in the program.");
+ }
+
+ if (entry != NULL)
+ free((char *)entry);
+
+ if (RtsFlags.GranFlags.GranSimStats.Global) { /* count events */
+ globalGranStats.noOfEvents++;
+ globalGranStats.event_counts[EventHd->evttype]++;
+ }
+
+ entry = EventHd;
+
+ IF_GRAN_DEBUG(event_trace,
+ print_event(entry));
+
+ EventHd = EventHd->next;
+ return(entry);
+}
+
+/* When getting the time of the next event we ignore CONTINUETHREAD events:
+ we don't want to be interrupted before the end of the current time slice
+ unless there is something important to handle.
+*/
+//@cindex get_time_of_next_event
+rtsTime
+get_time_of_next_event(void)
+{
+ rtsEventQ event = EventHd;
+
+ while (event != NULL && event->evttype==ContinueThread) {
+ event = event->next;
+ }
+ if(event == NULL)
+ return ((rtsTime) 0);
+ else
+ return (event->time);
+}
+
+/* ToDo: replace malloc/free with a free list */
+//@cindex insert_event
+void
+insert_event(newentry)
+rtsEvent *newentry;
+{
+ rtsEventType evttype = newentry->evttype;
+ rtsEvent *event, **prev;
+
+ /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */
+
+ /* Search the queue and insert at the right point:
+ FINDWORK before everything, CONTINUETHREAD after everything.
+
+ This ensures that we find any available work after all threads have
+ executed the current cycle. This level of detail would normally be
+ irrelevant, but matters for ridiculously low latencies...
+ */
+
+ /* Changed the ordering: Now FINDWORK comes after everything but
+ CONTINUETHREAD. This makes sure that a MOVESPARK comes before a
+ FINDWORK. This is important when a GranSimSparkAt happens and
+ DoAlwaysCreateThreads is turned on. Also important if a GC occurs
+ when trying to build a new thread (see much_spark) -- HWL 02/96 */
+
+ if(EventHd == NULL)
+ EventHd = newentry;
+ else {
+ for (event = EventHd, prev=(rtsEvent**)&EventHd;
+ event != NULL;
+ prev = (rtsEvent**)&(event->next), event = event->next) {
+ switch (evttype) {
+ case FindWork: if ( event->time < newentry->time ||
+ ( (event->time == newentry->time) &&
+ (event->evttype != ContinueThread) ) )
+ continue;
+ else
+ break;
+ case ContinueThread: if ( event->time <= newentry->time )
+ continue;
+ else
+ break;
+ default: if ( event->time < newentry->time ||
+ ((event->time == newentry->time) &&
+ (event->evttype == newentry->evttype)) )
+ continue;
+ else
+ break;
+ }
+ /* Insert newentry here (i.e. before event) */
+ *prev = newentry;
+ newentry->next = event;
+ break;
+ }
+ if (event == NULL)
+ *prev = newentry;
+ }
+}
+
+//@cindex new_event
+void
+new_event(proc,creator,time,evttype,tso,node,spark)
+PEs proc, creator;
+rtsTime time;
+rtsEventType evttype;
+StgTSO *tso;
+StgClosure *node;
+rtsSpark *spark;
+{
+ rtsEvent *newentry = (rtsEvent *) stgMallocBytes(sizeof(rtsEvent), "new_event");
+
+ newentry->proc = proc;
+ newentry->creator = creator;
+ newentry->time = time;
+ newentry->evttype = evttype;
+ newentry->tso = tso;
+ newentry->node = node;
+ newentry->spark = spark;
+ newentry->gc_info = 0;
+ newentry->next = NULL;
+
+ insert_event(newentry);
+
+ IF_DEBUG(gran,
+ fprintf(stderr, "GRAN: new_event: \n");
+ print_event(newentry))
+}
+
+//@cindex prepend_event
+void
+prepend_event(event) /* put event at beginning of EventQueue */
+rtsEvent *event;
+{ /* only used for GC! */
+ event->next = EventHd;
+ EventHd = event;
+}
+
+//@cindex grab_event
+rtsEventQ
+grab_event(void) /* undo prepend_event i.e. get the event */
+{ /* at the head of EventQ but don't free anything */
+ rtsEventQ event = EventHd;
+
+ if (EventHd == NULL) {
+ barf("No next event (in grab_event). This may be caused by a circular data dependency in the program.");
+ }
+
+ EventHd = EventHd->next;
+ return (event);
+}
+
+//@cindex traverse_eventq_for_gc
+void
+traverse_eventq_for_gc(void)
+{
+ rtsEventQ event = EventHd;
+ StgWord bufsize;
+ StgClosure *closurep;
+ StgTSO *tsop;
+ StgPtr buffer, bufptr;
+ PEs proc, creator;
+
+ /* Traverse eventq and replace every FETCHREPLY by a FETCHNODE for the
+ orig closure (root of packed graph). This means that a graph, which is
+ between processors at the time of GC is fetched again at the time when
+ it would have arrived, had there been no GC. Slightly inaccurate but
+ safe for GC.
+ This is only needed for GUM style fetchng. -- HWL */
+ if (!RtsFlags.GranFlags.DoBulkFetching)
+ return;
+
+ for(event = EventHd; event!=NULL; event=event->next) {
+ if (event->evttype==FetchReply) {
+ buffer = stgCast(StgPtr,event->node);
+ ASSERT(buffer[PACK_FLAG_LOCN]==MAGIC_PACK_FLAG); /* It's a pack buffer */
+ bufsize = buffer[PACK_SIZE_LOCN];
+ closurep = stgCast(StgClosure*,buffer[PACK_HDR_SIZE]);
+ tsop = stgCast(StgTSO*,buffer[PACK_TSO_LOCN]);
+ proc = event->proc;
+ creator = event->creator; /* similar to unpacking */
+ for (bufptr=buffer+PACK_HDR_SIZE;
+ bufptr<(buffer+bufsize);
+ bufptr++) {
+ // if ( (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_SPEC_RBH_TYPE) ||
+ // (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_GEN_RBH_TYPE) ) {
+ if ( GET_INFO(stgCast(StgClosure*,bufptr)) ) {
+ convertFromRBH(stgCast(StgClosure *,bufptr));
+ }
+ }
+ free(buffer);
+ event->evttype = FetchNode;
+ event->proc = creator;
+ event->creator = proc;
+ event->node = closurep;
+ event->tso = tsop;
+ event->gc_info = 0;
+ }
+ }
+}
+
+void
+markEventQueue(void)
+{
+ StgClosure *MarkRoot(StgClosure *root); // prototype
+
+ rtsEventQ event = EventHd;
+ nat len;
+
+ /* iterate over eventq and register relevant fields in event as roots */
+ for(event = EventHd, len = 0; event!=NULL; event=event->next, len++) {
+ switch (event->evttype) {
+ case ContinueThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ break;
+ case StartThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case ResumeThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case MoveSpark:
+ event->spark->node = (StgClosure *)MarkRoot((StgClosure *)event->spark->node);
+ break;
+ case MoveThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ break;
+ case FindWork:
+ break;
+ case FetchNode:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case FetchReply:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ if (RtsFlags.GranFlags.DoBulkFetching)
+ // ToDo: traverse_eventw_for_gc if GUM-Fetching!!! HWL
+ belch("ghuH: packets in BulkFetching not marked as roots; mayb be fatal");
+ else
+ event->node = (StgTSO *)MarkRoot((StgClosure *)event->node);
+ break;
+ case GlobalBlock:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case UnblockThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ default:
+ barf("markEventQueue: trying to mark unknown event @ %p", event);
+ }}
+ IF_DEBUG(gc,
+ belch("GC: markEventQueue: %d events in queue", len));
+}
+
+/*
+ Prune all ContinueThread events related to tso or node in the eventq.
+ Currently used if a thread leaves STG land with ThreadBlocked status,
+ i.e. it blocked on a closure and has been put on its blocking queue. It
+ will be reawakended via a call to awaken_blocked_queue. Until then no
+ event effecting this tso should appear in the eventq. A bit of a hack,
+ because ideally we shouldn't generate such spurious ContinueThread events
+ in the first place.
+*/
+//@cindex prune_eventq
+void
+prune_eventq(tso, node)
+StgTSO *tso;
+StgClosure *node;
+{ rtsEventQ prev = (rtsEventQ)NULL, event = EventHd;
+
+ /* node unused for now */
+ ASSERT(node==NULL);
+ /* tso must be valid, then */
+ ASSERT(tso!=END_TSO_QUEUE);
+ while (event != NULL) {
+ if (event->evttype==ContinueThread &&
+ (event->tso==tso)) {
+ IF_GRAN_DEBUG(event_trace, // ToDo: use another debug flag
+ belch("prune_eventq: pruning ContinueThread event for TSO %d (%p) on PE %d @ %lx (%p)",
+ event->tso->id, event->tso, event->proc, event->time, event));
+ if (prev==(rtsEventQ)NULL) { // beginning of eventq
+ EventHd = event->next;
+ free(event);
+ event = EventHd;
+ } else {
+ prev->next = event->next;
+ free(event);
+ event = prev->next;
+ }
+ } else { // no pruning necessary; go to next event
+ prev = event;
+ event = event->next;
+ }
+ }
+}
+
+//@cindex print_event
+void
+print_event(event)
+rtsEvent *event;
+{
+ char str_tso[16], str_node[16];
+ StgThreadID tso_id;
+
+ if (event->tso==END_TSO_QUEUE) {
+ strcpy(str_tso, "______");
+ tso_id = 0;
+ } else {
+ sprintf(str_tso, "%p", event->tso);
+ tso_id = (event->tso==NULL) ? 0 : event->tso->id;
+ }
+ if (event->node==(StgClosure*)NULL) {
+ strcpy(str_node, "______");
+ } else {
+ sprintf(str_node, "%p", event->node);
+ }
+ // HWL: shouldn't be necessary; ToDo: nuke
+ //str_tso[6]='\0';
+ //str_node[6]='\0';
+
+ if (event==NULL)
+ fprintf(stderr,"Evt: NIL\n");
+ else
+ fprintf(stderr, "Evt: %s (%u), PE %u [%u], Time %lu, TSO %d (%s), Node %s\n", //"Evt: %s (%u), PE %u [%u], Time %u, TSO %s (%#l), Node %s\n",
+ event_names[event->evttype], event->evttype,
+ event->proc, event->creator, event->time,
+ tso_id, str_tso, str_node
+ /*, event->spark, event->next */ );
+
+}
+
+//@cindex print_eventq
+void
+print_eventq(hd)
+rtsEvent *hd;
+{
+ rtsEvent *x;
+
+ fprintf(stderr,"Event Queue with root at %p:\n", hd);
+ for (x=hd; x!=NULL; x=x->next) {
+ print_event(x);
+ }
+}
+
+/*
+ Spark queue functions are now all in Sparks.c!!
+*/
+//@node Scheduling functions, Thread Queue routines, Spark queue functions, GranSim specific code
+//@subsection Scheduling functions
+
+/*
+ These functions are variants of thread initialisation and therefore
+ related to initThread and friends in Schedule.c. However, they are
+ specific to a GranSim setup in storing more info in the TSO's statistics
+ buffer and sorting the thread queues etc.
+*/
+
+/*
+ A large portion of startThread deals with maintaining a sorted thread
+ queue, which is needed for the Priority Sparking option. Without that
+ complication the code boils down to FIFO handling.
+*/
+//@cindex insertThread
+void
+insertThread(tso, proc)
+StgTSO* tso;
+PEs proc;
+{
+ StgTSO *prev = NULL, *next = NULL;
+ nat count = 0;
+ rtsBool found = rtsFalse;
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(!is_on_queue(tso,proc));
+ /* Idle proc: put the thread on the run queue
+ same for pri spark and basic version */
+ if (run_queue_hds[proc] == END_TSO_QUEUE)
+ {
+ /* too strong!
+ ASSERT((CurrentProc==MainProc &&
+ CurrentTime[MainProc]==0 &&
+ procStatus[MainProc]==Idle) ||
+ procStatus[proc]==Starting);
+ */
+ run_queue_hds[proc] = run_queue_tls[proc] = tso;
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadqueuetime;
+
+ /* new_event of ContinueThread has been moved to do_the_startthread */
+
+ /* too strong!
+ ASSERT(procStatus[proc]==Idle ||
+ procStatus[proc]==Fishing ||
+ procStatus[proc]==Starting);
+ procStatus[proc] = Busy;
+ */
+ return;
+ }
+
+ if (RtsFlags.GranFlags.Light)
+ GranSimLight_insertThread(tso, proc);
+
+ /* Only for Pri Scheduling: find place where to insert tso into queue */
+ if (RtsFlags.GranFlags.DoPriorityScheduling && tso->gran.pri!=0)
+ /* {add_to_spark_queue}vo' jInIHta'; Qu' wa'DIch yIleghQo' */
+ for (prev = run_queue_hds[proc], next = run_queue_hds[proc]->link, count=0;
+ (next != END_TSO_QUEUE) &&
+ !(found = tso->gran.pri >= next->gran.pri);
+ prev = next, next = next->link, count++)
+ {
+ ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) &&
+ (prev==(StgTSO*)NULL || prev->link==next));
+ }
+
+ ASSERT(!found || next != END_TSO_QUEUE);
+ ASSERT(procStatus[proc]!=Idle);
+
+ if (found) {
+ /* found can only be rtsTrue if pri scheduling enabled */
+ ASSERT(RtsFlags.GranFlags.DoPriorityScheduling);
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.non_end_add_threads++;
+ /* Add tso to ThreadQueue between prev and next */
+ tso->link = next;
+ if ( next == (StgTSO*)END_TSO_QUEUE ) {
+ run_queue_tl = tso;
+ } else {
+ /* no back link for TSO chain */
+ }
+
+ if ( prev == (StgTSO*)END_TSO_QUEUE ) {
+ /* Never add TSO as first elem of thread queue; the first */
+ /* element should be the one that is currently running -- HWL */
+ IF_DEBUG(gran,
+ belch("GRAN: Qagh: NewThread (w/ PriorityScheduling): Trying to add TSO %p (PRI=%d) as first elem of threadQ (%p) on proc %u (@ %u)\n",
+ tso, tso->gran.pri, run_queue_hd, proc,
+ CurrentTime[proc]));
+ } else {
+ prev->link = tso;
+ }
+ } else { /* !found */ /* or not pri sparking! */
+ /* Add TSO to the end of the thread queue on that processor */
+ run_queue_tls[proc]->link = tso;
+ run_queue_tls[proc] = tso;
+ }
+ ASSERT(RtsFlags.GranFlags.DoPriorityScheduling || count==0);
+ CurrentTime[proc] += count * RtsFlags.GranFlags.Costs.pri_sched_overhead +
+ RtsFlags.GranFlags.Costs.threadqueuetime;
+
+ /* ToDo: check if this is still needed -- HWL
+ if (RtsFlags.GranFlags.DoThreadMigration)
+ ++SurplusThreads;
+
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ !(( event_type == GR_START || event_type == GR_STARTQ) &&
+ RtsFlags.GranFlags.labelling) )
+ DumpRawGranEvent(proc, creator, event_type+1, tso, node,
+ tso->gran.sparkname, spark_queue_len(proc));
+ */
+
+# if defined(GRAN_CHECK)
+ /* Check if thread queue is sorted. Only for testing, really! HWL */
+ if ( RtsFlags.GranFlags.DoPriorityScheduling &&
+ (RtsFlags.GranFlags.Debug.sortedQ) ) {
+ rtsBool sorted = rtsTrue;
+ StgTSO *prev, *next;
+
+ if (run_queue_hds[proc]==END_TSO_QUEUE ||
+ run_queue_hds[proc]->link==END_TSO_QUEUE) {
+ /* just 1 elem => ok */
+ } else {
+ /* Qu' wa'DIch yIleghQo' (ignore first elem)! */
+ for (prev = run_queue_hds[proc]->link, next = prev->link;
+ (next != END_TSO_QUEUE) ;
+ prev = next, next = prev->link) {
+ ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) &&
+ (prev==(StgTSO*)NULL || prev->link==next));
+ sorted = sorted &&
+ (prev->gran.pri >= next->gran.pri);
+ }
+ }
+ if (!sorted) {
+ fprintf(stderr,"Qagh: THREADQ on PE %d is not sorted:\n",
+ CurrentProc);
+ G_THREADQ(run_queue_hd,0x1);
+ }
+ }
+# endif
+}
+
+/*
+ insertThread, which is only used for GranSim Light, is similar to
+ startThread in that it adds a TSO to a thread queue. However, it assumes
+ that the thread queue is sorted by local clocks and it inserts the TSO at
+ the right place in the queue. Don't create any event, just insert.
+*/
+//@cindex GranSimLight_insertThread
+rtsBool
+GranSimLight_insertThread(tso, proc)
+StgTSO* tso;
+PEs proc;
+{
+ StgTSO *prev, *next;
+ nat count = 0;
+ rtsBool found = rtsFalse;
+
+ ASSERT(RtsFlags.GranFlags.Light);
+
+ /* In GrAnSim-Light we always have an idle `virtual' proc.
+ The semantics of the one-and-only thread queue is different here:
+ all threads in the queue are running (each on its own virtual processor);
+ the queue is only needed internally in the simulator to interleave the
+ reductions of the different processors.
+ The one-and-only thread queue is sorted by the local clocks of the TSOs.
+ */
+ ASSERT(run_queue_hds[proc] != END_TSO_QUEUE);
+ ASSERT(tso->link == END_TSO_QUEUE);
+
+ /* If only one thread in queue so far we emit DESCHEDULE in debug mode */
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ (RtsFlags.GranFlags.Debug.checkLight) &&
+ (run_queue_hd->link == END_TSO_QUEUE)) {
+ DumpRawGranEvent(proc, proc, GR_DESCHEDULE,
+ run_queue_hds[proc], (StgClosure*)NULL,
+ tso->gran.sparkname, spark_queue_len(proc)); // ToDo: check spar_queue_len
+ // resched = rtsTrue;
+ }
+
+ /* this routine should only be used in a GrAnSim Light setup */
+ /* && CurrentProc must be 0 in GrAnSim Light setup */
+ ASSERT(RtsFlags.GranFlags.Light && CurrentProc==0);
+
+ /* Idle proc; same for pri spark and basic version */
+ if (run_queue_hd==END_TSO_QUEUE)
+ {
+ run_queue_hd = run_queue_tl = tso;
+ /* MAKE_BUSY(CurrentProc); */
+ return rtsTrue;
+ }
+
+ for (prev = run_queue_hds[proc], next = run_queue_hds[proc]->link, count = 0;
+ (next != END_TSO_QUEUE) &&
+ !(found = (tso->gran.clock < next->gran.clock));
+ prev = next, next = next->link, count++)
+ {
+ ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) &&
+ (prev==(StgTSO*)NULL || prev->link==next));
+ }
+
+ /* found can only be rtsTrue if pri sparking enabled */
+ if (found) {
+ /* Add tso to ThreadQueue between prev and next */
+ tso->link = next;
+ if ( next == END_TSO_QUEUE ) {
+ run_queue_tls[proc] = tso;
+ } else {
+ /* no back link for TSO chain */
+ }
+
+ if ( prev == END_TSO_QUEUE ) {
+ run_queue_hds[proc] = tso;
+ } else {
+ prev->link = tso;
+ }
+ } else { /* !found */ /* or not pri sparking! */
+ /* Add TSO to the end of the thread queue on that processor */
+ run_queue_tls[proc]->link = tso;
+ run_queue_tls[proc] = tso;
+ }
+
+ if ( prev == END_TSO_QUEUE ) { /* new head of queue */
+ new_event(proc, proc, CurrentTime[proc],
+ ContinueThread,
+ tso, (StgClosure*)NULL, (rtsSpark*)NULL);
+ }
+ /*
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ !(( event_type == GR_START || event_type == GR_STARTQ) &&
+ RtsFlags.GranFlags.labelling) )
+ DumpRawGranEvent(proc, creator, gr_evttype, tso, node,
+ tso->gran.sparkname, spark_queue_len(proc));
+ */
+ return rtsTrue;
+}
+
+/*
+ endThread is responsible for general clean-up after the thread tso has
+ finished. This includes emitting statistics into the profile etc.
+*/
+void
+endThread(StgTSO *tso, PEs proc)
+{
+ ASSERT(procStatus[proc]==Busy); // coming straight out of STG land
+ ASSERT(tso->whatNext==ThreadComplete);
+ // ToDo: prune ContinueThreads for this TSO from event queue
+ DumpEndEvent(proc, tso, rtsFalse /* not mandatory */);
+
+ /* if this was the last thread on this PE then make it Idle */
+ if (run_queue_hds[proc]==END_TSO_QUEUE) {
+ procStatus[CurrentProc] = Idle;
+ }
+}
+
+//@node Thread Queue routines, GranSim functions, Scheduling functions, GranSim specific code
+//@subsection Thread Queue routines
+
+/*
+ Check whether given tso resides on the run queue of the current processor.
+ Only used for debugging.
+*/
+
+//@cindex is_on_queue
+rtsBool
+is_on_queue (StgTSO *tso, PEs proc)
+{
+ StgTSO *t;
+ rtsBool found;
+
+ for (t=run_queue_hds[proc], found=rtsFalse;
+ t!=END_TSO_QUEUE && !(found = t==tso);
+ t=t->link)
+ /* nothing */ ;
+
+ return found;
+}
+
+/* This routine is only used for keeping a statistics of thread queue
+ lengths to evaluate the impact of priority scheduling. -- HWL
+ {spark_queue_len}vo' jInIHta'
+*/
+//@cindex thread_queue_len
+nat
+thread_queue_len(PEs proc)
+{
+ StgTSO *prev, *next;
+ nat len;
+
+ for (len = 0, prev = END_TSO_QUEUE, next = run_queue_hds[proc];
+ next != END_TSO_QUEUE;
+ len++, prev = next, next = prev->link)
+ {}
+
+ return (len);
+}
+
+//@node GranSim functions, GranSimLight routines, Thread Queue routines, GranSim specific code
+//@subsection GranSim functions
+
+/* ----------------------------------------------------------------- */
+/* The main event handling functions; called from Schedule.c (schedule) */
+/* ----------------------------------------------------------------- */
+
+//@cindex do_the_globalblock
+
+void
+do_the_globalblock(rtsEvent* event)
+{
+ PEs proc = event->proc; /* proc that requested node */
+ StgTSO *tso = event->tso; /* tso that requested node */
+ StgClosure *node = event->node; /* requested, remote node */
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the GlobalBlock\n"));
+ /* There should be no GLOBALBLOCKs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* GlobalBlock events only valid with GUM fetching */
+ ASSERT(RtsFlags.GranFlags.DoBulkFetching);
+
+ IF_GRAN_DEBUG(bq, // globalBlock,
+ if (IS_LOCAL_TO(PROCS(node),proc)) {
+ belch("## Qagh: GlobalBlock: Blocking TSO %d (%p) on LOCAL node %p (PE %d).\n",
+ tso->id, tso, node, proc);
+ });
+
+ /* CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.munpacktime; */
+ if ( blockFetch(tso,proc,node) != 0 )
+ return; /* node has become local by now */
+
+#if 0
+ ToDo: check whether anything has to be done at all after blockFetch -- HWL
+
+ if (!RtsFlags.GranFlags.DoAsyncFetch) { /* head of queue is next thread */
+ StgTSO* tso = run_queue_hds[proc]; /* awaken next thread */
+ if (tso != (StgTSO*)NULL) {
+ new_event(proc, proc, CurrentTime[proc],
+ ContinueThread,
+ tso, (StgClosure*)NULL, (rtsSpark*)NULL);
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcontextswitchtime;
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, CurrentProc, GR_SCHEDULE, tso,
+ (StgClosure*)NULL, tso->gran.sparkname, spark_queue_len(CurrentProc)); // ToDo: check sparkname and spar_queue_len
+ procStatus[proc] = Busy; /* might have been fetching */
+ } else {
+ procStatus[proc] = Idle; /* no work on proc now */
+ }
+ } else { /* RtsFlags.GranFlags.DoAsyncFetch i.e. block-on-fetch */
+ /* other thread is already running */
+ /* 'oH 'utbe' 'e' vIHar ; I think that's not needed -- HWL
+ new_event(proc,proc,CurrentTime[proc],
+ CONTINUETHREAD,EVENT_TSO(event),
+ (RtsFlags.GranFlags.DoBulkFetching ? closure :
+ EVENT_NODE(event)),NULL);
+ */
+ }
+#endif
+}
+
+//@cindex do_the_unblock
+
+void
+do_the_unblock(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc that requested node */
+ creator = event->creator; /* proc that requested node */
+ StgTSO* tso = event->tso; /* tso that requested node */
+ StgClosure* node = event->node; /* requested, remote node */
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the UnBlock\n"))
+ /* There should be no UNBLOCKs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* UnblockThread means either FetchReply has arrived or
+ a blocking queue has been awakened;
+ ToDo: check with assertions
+ ASSERT(procStatus[proc]==Fetching || IS_BLACK_HOLE(event->node));
+ */
+ if (!RtsFlags.GranFlags.DoAsyncFetch) { /* block-on-fetch */
+ /* We count block-on-fetch as normal block time */
+ tso->gran.blocktime += CurrentTime[proc] - tso->gran.blockedat;
+ /* Dumping now done when processing the event
+ No costs for contextswitch or thread queueing in this case
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, CurrentProc, GR_RESUME, tso,
+ (StgClosure*)NULL, tso->gran.sparkname, spark_queue_len(CurrentProc));
+ */
+ /* Maybe do this in FetchReply already
+ if (procStatus[proc]==Fetching)
+ procStatus[proc] = Busy;
+ */
+ /*
+ new_event(proc, proc, CurrentTime[proc],
+ ContinueThread,
+ tso, node, (rtsSpark*)NULL);
+ */
+ } else {
+ /* Asynchr comm causes additional costs here: */
+ /* Bring the TSO from the blocked queue into the threadq */
+ }
+ /* In all cases, the UnblockThread causes a ResumeThread to be scheduled */
+ new_event(proc, proc,
+ CurrentTime[proc]+RtsFlags.GranFlags.Costs.threadqueuetime,
+ ResumeThread,
+ tso, node, (rtsSpark*)NULL);
+}
+
+//@cindex do_the_fetchnode
+
+void
+do_the_fetchnode(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc that holds the requested node */
+ creator = event->creator; /* proc that requested node */
+ StgTSO* tso = event->tso;
+ StgClosure* node = event->node; /* requested, remote node */
+ rtsFetchReturnCode rc;
+
+ ASSERT(CurrentProc==proc);
+ /* There should be no FETCHNODEs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the FetchNode\n"));
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* ToDo: check whether this is the right place for dumping the event */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(creator, proc, GR_FETCH, tso, node, (StgInt)0, 0);
+
+ do {
+ rc = handleFetchRequest(node, proc, creator, tso);
+ if (rc == OutOfHeap) { /* trigger GC */
+# if defined(GRAN_CHECK) && defined(GRAN)
+ if (RtsFlags.GcFlags.giveStats)
+ fprintf(RtsFlags.GcFlags.statsFile,"***** veQ boSwI' PackNearbyGraph(node %p, tso %p (%d))\n",
+ node, tso, tso->id);
+# endif
+ prepend_event(event);
+ GarbageCollect(GetRoots);
+ // HWL: ToDo: check whether a ContinueThread has to be issued
+ // HWL old: ReallyPerformThreadGC(PACK_HEAP_REQUIRED, rtsFalse);
+# if defined(GRAN_CHECK) && defined(GRAN)
+ if (RtsFlags.GcFlags.giveStats) {
+ fprintf(RtsFlags.GcFlags.statsFile,"***** SAVE_Hp=%p, SAVE_HpLim=%p, PACK_HEAP_REQUIRED=%d\n",
+ Hp, HpLim, 0) ; // PACK_HEAP_REQUIRED); ???
+ fprintf(stderr,"***** No. of packets so far: %d (total size: %d)\n",
+ globalGranStats.tot_packets, globalGranStats.tot_packet_size);
+ }
+# endif
+ event = grab_event();
+ // Hp -= PACK_HEAP_REQUIRED; // ???
+
+ /* GC knows that events are special and follows the pointer i.e. */
+ /* events are valid even if they moved. An EXIT is triggered */
+ /* if there is not enough heap after GC. */
+ }
+ } while (rc == OutOfHeap);
+}
+
+//@cindex do_the_fetchreply
+void
+do_the_fetchreply(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc that requested node */
+ creator = event->creator; /* proc that holds the requested node */
+ StgTSO* tso = event->tso;
+ StgClosure* node = event->node; /* requested, remote node */
+ StgClosure* closure=(StgClosure*)NULL;
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(RtsFlags.GranFlags.DoAsyncFetch || procStatus[proc]==Fetching);
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the FetchReply\n"));
+ /* There should be no FETCHREPLYs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ /* assign message unpack costs *before* dumping the event */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* ToDo: check whether this is the right place for dumping the event */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, creator, GR_REPLY, tso, node,
+ tso->gran.sparkname, spark_queue_len(proc));
+
+ /* THIS SHOULD NEVER HAPPEN
+ If tso is in the BQ of node this means that it actually entered the
+ remote closure, due to a missing GranSimFetch at the beginning of the
+ entry code; therefore, this is actually a faked fetch, triggered from
+ within GranSimBlock;
+ since tso is both in the EVQ and the BQ for node, we have to take it out
+ of the BQ first before we can handle the FetchReply;
+ ToDo: special cases in awaken_blocked_queue, since the BQ magically moved.
+ */
+ if (tso->blocked_on!=(StgClosure*)NULL) {
+ IF_GRAN_DEBUG(bq,
+ belch("## ghuH: TSO %d (%p) in FetchReply is blocked on node %p (shouldn't happen AFAIK)",
+ tso->id, tso, node));
+ // unlink_from_bq(tso, node);
+ }
+
+ if (RtsFlags.GranFlags.DoBulkFetching) { /* bulk (packet) fetching */
+ rtsPackBuffer *buffer = (rtsPackBuffer*)node;
+ nat size = buffer->size;
+
+ /* NB: Fetch misses can't occur with GUM fetching, as */
+ /* updatable closure are turned into RBHs and therefore locked */
+ /* for other processors that try to grab them. */
+
+ closure = UnpackGraph(buffer);
+ CurrentTime[proc] += size * RtsFlags.GranFlags.Costs.munpacktime;
+ } else // incremental fetching
+ /* Copy or move node to CurrentProc */
+ if (fetchNode(node, creator, proc)) {
+ /* Fetch has failed i.e. node has been grabbed by another PE */
+ PEs p = where_is(node);
+ rtsTime fetchtime;
+
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.fetch_misses++;
+
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== Qu'vatlh! fetch miss @ %u: node %p is at proc %u (rather than proc %u)\n",
+ CurrentTime[proc],node,p,creator));
+
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ /* Count fetch again !? */
+ ++(tso->gran.fetchcount);
+ tso->gran.fetchtime += RtsFlags.GranFlags.Costs.fetchtime;
+
+ fetchtime = stg_max(CurrentTime[CurrentProc],CurrentTime[p]) +
+ RtsFlags.GranFlags.Costs.latency;
+
+ /* Chase the grabbed node */
+ new_event(p, proc, fetchtime,
+ FetchNode,
+ tso, node, (rtsSpark*)NULL);
+
+# if 0 && defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ IF_GRAN_DEBUG(blockOnFetch,
+ BlockedOnFetch[CurrentProc] = tso;) /*-rtsTrue;-*/
+
+ IF_GRAN_DEBUG(blockOnFetch_sanity,
+ tso->type |= FETCH_MASK_TSO;)
+# endif
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ return; /* NB: no REPLy has been processed; tso still sleeping */
+ }
+
+ /* -- Qapla'! Fetch has been successful; node is here, now */
+ ++(event->tso->gran.fetchcount);
+ event->tso->gran.fetchtime += RtsFlags.GranFlags.Costs.fetchtime;
+
+ /* this is now done at the beginning of this routine
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc,event->creator, GR_REPLY, event->tso,
+ (RtsFlags.GranFlags.DoBulkFetching ?
+ closure :
+ event->node),
+ tso->gran.sparkname, spark_queue_len(proc));
+ */
+
+ --OutstandingFetches[proc];
+ ASSERT(OutstandingFetches[proc] >= 0);
+ new_event(proc, proc, CurrentTime[proc],
+ ResumeThread,
+ event->tso, (RtsFlags.GranFlags.DoBulkFetching ?
+ closure :
+ event->node),
+ (rtsSpark*)NULL);
+}
+
+//@cindex do_the_movethread
+
+void
+do_the_movethread(rtsEvent* event) {
+ PEs proc = event->proc, /* proc that requested node */
+ creator = event->creator; /* proc that holds the requested node */
+ StgTSO* tso = event->tso;
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the MoveThread\n"));
+
+ ASSERT(CurrentProc==proc);
+ /* There should be no MOVETHREADs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* MOVETHREAD events should never occur without -bM */
+ ASSERT(RtsFlags.GranFlags.DoThreadMigration);
+ /* Bitmask of moved thread should be 0 */
+ ASSERT(PROCS(tso)==0);
+ ASSERT(procStatus[proc] == Fishing ||
+ RtsFlags.GranFlags.DoAsyncFetch);
+ ASSERT(OutstandingFishes[proc]>0);
+
+ /* ToDo: exact costs for unpacking the whole TSO */
+ CurrentTime[proc] += 5l * RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* ToDo: check whether this is the right place for dumping the event */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, creator,
+ GR_STOLEN, tso, (StgClosure*)NULL, (StgInt)0, 0);
+
+ // ToDo: check cost functions
+ --OutstandingFishes[proc];
+ SET_GRAN_HDR(tso, ThisPE); // adjust the bitmask for the TSO
+ insertThread(tso, proc);
+
+ if (procStatus[proc]==Fishing)
+ procStatus[proc] = Idle;
+
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.tot_TSOs_migrated++;
+}
+
+//@cindex do_the_movespark
+
+void
+do_the_movespark(rtsEvent* event) {
+ PEs proc = event->proc, /* proc that requested spark */
+ creator = event->creator; /* proc that holds the requested spark */
+ StgTSO* tso = event->tso;
+ rtsSparkQ spark = event->spark;
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the MoveSpark\n"))
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(spark!=NULL);
+ ASSERT(procStatus[proc] == Fishing ||
+ RtsFlags.GranFlags.DoAsyncFetch);
+ ASSERT(OutstandingFishes[proc]>0);
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* record movement of spark only if spark profiling is turned on */
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(proc, creator,
+ SP_ACQUIRED,
+ tso, spark->node, spark->name, spark_queue_len(proc));
+
+ /* global statistics */
+ if ( RtsFlags.GranFlags.GranSimStats.Global &&
+ !closure_SHOULD_SPARK(spark->node))
+ globalGranStats.withered_sparks++;
+ /* Not adding the spark to the spark queue would be the right */
+ /* thing here, but it also would be cheating, as this info can't be */
+ /* available in a real system. -- HWL */
+
+ --OutstandingFishes[proc];
+
+ add_to_spark_queue(spark);
+
+ IF_GRAN_DEBUG(randomSteal, // ToDo: spark-distribution flag
+ print_sparkq_stats());
+
+ /* Should we treat stolen sparks specially? Currently, we don't. */
+
+ if (procStatus[proc]==Fishing)
+ procStatus[proc] = Idle;
+
+ /* add_to_spark_queue will increase the time of the current proc. */
+ /*
+ If proc was fishing, it is Idle now with the new spark in its spark
+ pool. This means that the next time handleIdlePEs is called, a local
+ FindWork will be created on this PE to turn the spark into a thread. Of
+ course another PE might steal the spark in the meantime (that's why we
+ are using events rather than inlining all the operations in the first
+ place). */
+}
+
+/*
+ In the Constellation class version of GranSim the semantics of StarThread
+ events has changed. Now, StartThread has to perform 3 basic operations:
+ - create a new thread (previously this was done in ActivateSpark);
+ - insert the thread into the run queue of the current processor
+ - generate a new event for actually running the new thread
+ Note that the insertThread is called via createThread.
+*/
+
+//@cindex do_the_startthread
+
+void
+do_the_startthread(rtsEvent *event)
+{
+ PEs proc = event->proc; /* proc that requested node */
+ StgTSO *tso = event->tso; /* tso that requested node */
+ StgClosure *node = event->node; /* requested, remote node */
+ rtsSpark *spark = event->spark;
+ GranEventType gr_evttype;
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(!RtsFlags.GranFlags.Light || CurrentProc==0);
+ ASSERT(event->evttype == ResumeThread || event->evttype == StartThread);
+ /* if this was called via StartThread: */
+ ASSERT(event->evttype!=StartThread || tso == END_TSO_QUEUE); // not yet created
+ // ToDo: check: ASSERT(event->evttype!=StartThread || procStatus[proc]==Starting);
+ /* if this was called via ResumeThread: */
+ ASSERT(event->evttype!=ResumeThread ||
+ RtsFlags.GranFlags.DoAsyncFetch ||!is_on_queue(tso,proc));
+
+ /* startThread may have been called from the main event handler upon
+ finding either a ResumeThread or a StartThread event; set the
+ gr_evttype (needed for writing to .gr file) accordingly */
+ // gr_evttype = (event->evttype == ResumeThread) ? GR_RESUME : GR_START;
+
+ if ( event->evttype == StartThread ) {
+ GranEventType gr_evttype = (run_queue_hds[proc]==END_TSO_QUEUE) ?
+ GR_START : GR_STARTQ;
+
+ tso = createThread(BLOCK_SIZE_W, spark->gran_info);// implicit insertThread!
+ pushClosure(tso, node);
+
+ // ToDo: fwd info on local/global spark to thread -- HWL
+ // tso->gran.exported = spark->exported;
+ // tso->gran.locked = !spark->global;
+ tso->gran.sparkname = spark->name;
+
+ ASSERT(CurrentProc==proc);
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpGranEvent(gr_evttype,tso);
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcreatetime;
+ } else { // event->evttype == ResumeThread
+ GranEventType gr_evttype = (run_queue_hds[proc]==END_TSO_QUEUE) ?
+ GR_RESUME : GR_RESUMEQ;
+
+ insertThread(tso, proc);
+
+ ASSERT(CurrentProc==proc);
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpGranEvent(gr_evttype,tso);
+ }
+
+ ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE); // non-empty run queue
+ procStatus[proc] = Busy;
+ /* make sure that this thread is actually run */
+ new_event(proc, proc,
+ CurrentTime[proc],
+ ContinueThread,
+ tso, node, (rtsSpark*)NULL);
+
+ /* A wee bit of statistics gathering */
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_add_threads++;
+ globalGranStats.tot_tq_len += thread_queue_len(CurrentProc);
+ }
+
+}
+
+//@cindex do_the_findwork
+void
+do_the_findwork(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc to search for work */
+ creator = event->creator; /* proc that requested work */
+ rtsSparkQ spark = event->spark;
+ /* ToDo: check that this size is safe -- HWL */
+ nat req_heap = sizeofW(StgTSO) + MIN_STACK_WORDS;
+ // add this? -- HWL:RtsFlags.ConcFlags.stkChunkSize;
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the Findwork\n"));
+
+ /* If GUM style fishing is enabled, the contents of the spark field says
+ what to steal (spark(1) or thread(2)); */
+ ASSERT(!(RtsFlags.GranFlags.Fishing && event->spark==(rtsSpark*)0));
+
+ /* Make sure that we have enough heap for creating a new
+ thread. This is a conservative estimate of the required heap.
+ This eliminates special checks for GC around NewThread within
+ ActivateSpark. */
+
+ if (Hp + req_heap > HpLim ) {
+ IF_DEBUG(gc,
+ belch("GC: Doing GC from within Findwork handling (that's bloody dangerous if you ask me)");)
+ GarbageCollect(GetRoots);
+ // ReallyPerformThreadGC(req_heap, rtsFalse); old -- HWL
+ Hp -= req_heap;
+ if (procStatus[CurrentProc]==Sparking)
+ procStatus[CurrentProc]=Idle;
+ return;
+ }
+
+ if ( RtsFlags.GranFlags.DoAlwaysCreateThreads ||
+ RtsFlags.GranFlags.Fishing ||
+ ((procStatus[proc]==Idle || procStatus[proc]==Sparking) &&
+ (RtsFlags.GranFlags.FetchStrategy >= 2 ||
+ OutstandingFetches[proc] == 0)) )
+ {
+ rtsBool found;
+ rtsSparkQ prev, spark;
+
+ /* ToDo: check */
+ ASSERT(procStatus[proc]==Sparking ||
+ RtsFlags.GranFlags.DoAlwaysCreateThreads ||
+ RtsFlags.GranFlags.Fishing);
+
+ /* SImmoHwI' yInej! Search spark queue! */
+ /* gimme_spark (event, &found, &spark); */
+ findLocalSpark(event, &found, &spark);
+
+ if (!found) { /* pagh vumwI' */
+ /*
+ If no spark has been found this can mean 2 things:
+ 1/ The FindWork was a fish (i.e. a message sent by another PE) and
+ the spark pool of the receiver is empty
+ --> the fish has to be forwarded to another PE
+ 2/ The FindWork was local to this PE (i.e. no communication; in this
+ case creator==proc) and the spark pool of the PE is not empty
+ contains only sparks of closures that should not be sparked
+ (note: if the spark pool were empty, handleIdlePEs wouldn't have
+ generated a FindWork in the first place)
+ --> the PE has to be made idle to trigger stealing sparks the next
+ time handleIdlePEs is performed
+ */
+
+ ASSERT(pending_sparks_hds[proc]==(rtsSpark*)NULL);
+ if (creator==proc) {
+ /* local FindWork */
+ if (procStatus[proc]==Busy) {
+ belch("ghuH: PE %d in Busy state while processing local FindWork (spark pool is empty!) @ %lx",
+ proc, CurrentTime[proc]);
+ procStatus[proc] = Idle;
+ }
+ } else {
+ /* global FindWork i.e. a Fish */
+ ASSERT(RtsFlags.GranFlags.Fishing);
+ /* actually this generates another request from the originating PE */
+ ASSERT(OutstandingFishes[creator]>0);
+ OutstandingFishes[creator]--;
+ /* ToDo: assign costs for sending fish to proc not to creator */
+ stealSpark(creator); /* might steal from same PE; ToDo: fix */
+ ASSERT(RtsFlags.GranFlags.maxFishes!=1 || procStatus[creator] == Fishing);
+ /* any assertions on state of proc possible here? */
+ }
+ } else {
+ /* DaH chu' Qu' yIchen! Now create new work! */
+ IF_GRAN_DEBUG(findWork,
+ belch("+- munching spark %p; creating thread for node %p",
+ spark, spark->node));
+ activateSpark (event, spark);
+ ASSERT(spark != (rtsSpark*)NULL);
+ spark = delete_from_sparkq (spark, proc, rtsTrue);
+ }
+
+ IF_GRAN_DEBUG(findWork,
+ belch("+- Contents of spark queues at the end of FindWork @ %lx",
+ CurrentTime[proc]);
+ print_sparkq_stats());
+
+ /* ToDo: check ; not valid if GC occurs in ActivateSpark */
+ ASSERT(!found ||
+ /* forward fish or */
+ (proc!=creator ||
+ /* local spark or */
+ (proc==creator && procStatus[proc]==Starting)) ||
+ //(!found && procStatus[proc]==Idle) ||
+ RtsFlags.GranFlags.DoAlwaysCreateThreads);
+ } else {
+ IF_GRAN_DEBUG(findWork,
+ belch("+- RTS refuses to findWork on PE %d @ %lx",
+ proc, CurrentTime[proc]);
+ belch(" procStatus[%d]=%s, fetch strategy=%d, outstanding fetches[%d]=%d",
+ proc, proc_status_names[procStatus[proc]],
+ RtsFlags.GranFlags.FetchStrategy,
+ proc, OutstandingFetches[proc]));
+ }
+}
+
+//@node GranSimLight routines, Code for Fetching Nodes, GranSim functions, GranSim specific code
+//@subsection GranSimLight routines
+
+/*
+ This code is called from the central scheduler after having rgabbed a
+ new event and is only needed for GranSim-Light. It mainly adjusts the
+ ActiveTSO so that all costs that have to be assigned from within the
+ scheduler are assigned to the right TSO. The choice of ActiveTSO depends
+ on the type of event that has been found.
+*/
+
+void
+GranSimLight_enter_system(event, ActiveTSOp)
+rtsEvent *event;
+StgTSO **ActiveTSOp;
+{
+ StgTSO *ActiveTSO = *ActiveTSOp;
+
+ ASSERT (RtsFlags.GranFlags.Light);
+
+ /* Restore local clock of the virtual processor attached to CurrentTSO.
+ All costs will be associated to the `virt. proc' on which the tso
+ is living. */
+ if (ActiveTSO != NULL) { /* already in system area */
+ ActiveTSO->gran.clock = CurrentTime[CurrentProc];
+ if (RtsFlags.GranFlags.DoFairSchedule)
+ {
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight)
+ DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
+ }
+ }
+ switch (event->evttype)
+ {
+ case ContinueThread:
+ case FindWork: /* inaccurate this way */
+ ActiveTSO = run_queue_hd;
+ break;
+ case ResumeThread:
+ case StartThread:
+ case MoveSpark: /* has tso of virt proc in tso field of event */
+ ActiveTSO = event->tso;
+ break;
+ default: barf("Illegal event type %s (%d) in GrAnSim Light setup\n",
+ event_names[event->evttype],event->evttype);
+ }
+ CurrentTime[CurrentProc] = ActiveTSO->gran.clock;
+ if (RtsFlags.GranFlags.DoFairSchedule) {
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight)
+ DumpGranEvent(GR_SYSTEM_START,ActiveTSO);
+ }
+}
+
+void
+GranSimLight_leave_system(event, ActiveTSOp)
+rtsEvent *event;
+StgTSO **ActiveTSOp;
+{
+ StgTSO *ActiveTSO = *ActiveTSOp;
+
+ ASSERT(RtsFlags.GranFlags.Light);
+
+ /* Save time of `virt. proc' which was active since last getevent and
+ restore time of `virt. proc' where CurrentTSO is living on. */
+ if(RtsFlags.GranFlags.DoFairSchedule) {
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight) // ToDo: clean up flags
+ DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
+ }
+ ActiveTSO->gran.clock = CurrentTime[CurrentProc];
+ ActiveTSO = (StgTSO*)NULL;
+ CurrentTime[CurrentProc] = CurrentTSO->gran.clock;
+ if (RtsFlags.GranFlags.DoFairSchedule /* && resched */ ) {
+ // resched = rtsFalse;
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight)
+ DumpGranEvent(GR_SCHEDULE,run_queue_hd);
+ }
+ /*
+ if (TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure &&
+ (TimeOfNextEvent == 0 ||
+ TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000<TimeOfNextEvent)) {
+ new_event(CurrentProc,CurrentProc,TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000,
+ CONTINUETHREAD,TSO_LINK(ThreadQueueHd),PrelBase_Z91Z93_closure,NULL);
+ TimeOfNextEvent = get_time_of_next_event();
+ }
+ */
+}
+
+//@node Code for Fetching Nodes, Idle PEs, GranSimLight routines, GranSim specific code
+//@subsection Code for Fetching Nodes
+
+/*
+ The following GrAnSim routines simulate the fetching of nodes from a
+ remote processor. We use a 1 word bitmask to indicate on which processor
+ a node is lying. Thus, moving or copying a node from one processor to
+ another just requires an appropriate change in this bitmask (using
+ @SET_GA@). Additionally, the clocks have to be updated.
+
+ A special case arises when the node that is needed by processor A has
+ been moved from a processor B to a processor C between sending out a
+ @FETCH@ (from A) and its arrival at B. In that case the @FETCH@ has to
+ be forwarded to C. This is simulated by issuing another FetchNode event
+ on processor C with A as creator.
+*/
+
+/* ngoqvam che' {GrAnSim}! */
+
+/* Fetch node "node" to processor "p" */
+
+//@cindex fetchNode
+
+rtsFetchReturnCode
+fetchNode(node,from,to)
+StgClosure* node;
+PEs from, to;
+{
+ /* In case of RtsFlags.GranFlags.DoBulkFetching this fct should never be
+ entered! Instead, UnpackGraph is used in ReSchedule */
+ StgClosure* closure;
+
+ ASSERT(to==CurrentProc);
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* fetchNode should never be entered with DoBulkFetching */
+ ASSERT(!RtsFlags.GranFlags.DoBulkFetching);
+
+ /* Now fetch the node */
+ if (!IS_LOCAL_TO(PROCS(node),from) &&
+ !IS_LOCAL_TO(PROCS(node),to) )
+ return NodeHasMoved;
+
+ if (closure_HNF(node)) /* node already in head normal form? */
+ node->header.gran.procs |= PE_NUMBER(to); /* Copy node */
+ else
+ node->header.gran.procs = PE_NUMBER(to); /* Move node */
+
+ return Ok;
+}
+
+/*
+ Process a fetch request.
+
+ Cost of sending a packet of size n = C + P*n
+ where C = packet construction constant,
+ P = cost of packing one word into a packet
+ [Should also account for multiple packets].
+*/
+
+//@cindex handleFetchRequest
+
+rtsFetchReturnCode
+handleFetchRequest(node,to,from,tso)
+StgClosure* node; // the node which is requested
+PEs to, from; // fetch request: from -> to
+StgTSO* tso; // the tso which needs the node
+{
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* ToDo: check assertion */
+ ASSERT(OutstandingFetches[from]>0);
+
+ /* probably wrong place; */
+ ASSERT(CurrentProc==to);
+
+ if (IS_LOCAL_TO(PROCS(node), from)) /* Somebody else moved node already => */
+ { /* start tso */
+ IF_GRAN_DEBUG(thunkStealing,
+ fprintf(stderr,"ghuH: handleFetchRequest entered with local node %p (%s) (PE %d)\n",
+ node, info_type(node), from));
+
+ if (RtsFlags.GranFlags.DoBulkFetching) {
+ nat size;
+ rtsPackBuffer *graph;
+
+ /* Create a 1-node-buffer and schedule a FETCHREPLY now */
+ graph = PackOneNode(node, tso, &size);
+ new_event(from, to, CurrentTime[to],
+ FetchReply,
+ tso, graph, (rtsSpark*)NULL);
+ } else {
+ new_event(from, to, CurrentTime[to],
+ FetchReply,
+ tso, node, (rtsSpark*)NULL);
+ }
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== majQa'! closure %p is local on PE %d already (this is a good thing)", node, from));
+ return (NodeIsLocal);
+ }
+ else if (IS_LOCAL_TO(PROCS(node), to) ) /* Is node still here? */
+ {
+ if (RtsFlags.GranFlags.DoBulkFetching) { /* {GUM}vo' ngoqvam vInIHta' */
+ nat size; /* (code from GUM) */
+ StgClosure* graph;
+
+ if (IS_BLACK_HOLE(node)) { /* block on BH or RBH */
+ new_event(from, to, CurrentTime[to],
+ GlobalBlock,
+ tso, node, (rtsSpark*)NULL);
+ /* Note: blockFetch is done when handling GLOBALBLOCK event;
+ make sure the TSO stays out of the run queue */
+ /* When this thread is reawoken it does the usual: it tries to
+ enter the updated node and issues a fetch if it's remote.
+ It has forgotten that it has sent a fetch already (i.e. a
+ FETCHNODE is swallowed by a BH, leaving the thread in a BQ) */
+ --OutstandingFetches[from];
+
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== majQa'! closure %p on PE %d is a BH (demander=PE %d); faking a FMBQ",
+ node, to, from));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_FMBQs++;
+ }
+ return (NodeIsBH);
+ }
+
+ /* The tso requesting the node is blocked and cannot be on a run queue */
+ ASSERT(!is_on_queue(tso, from));
+
+ if ((graph = PackNearbyGraph(node, tso, &size)) == NULL)
+ return (OutOfHeap); /* out of heap */
+
+ /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
+ /* Send a reply to the originator */
+ /* ToDo: Replace that by software costs for doing graph packing! */
+ CurrentTime[to] += size * RtsFlags.GranFlags.Costs.mpacktime;
+
+ new_event(from, to,
+ CurrentTime[to]+RtsFlags.GranFlags.Costs.latency,
+ FetchReply,
+ tso, (StgClosure *)graph, (rtsSpark*)NULL);
+
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime;
+ return (Ok);
+ } else { /* incremental (single closure) fetching */
+ /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
+ /* Send a reply to the originator */
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ new_event(from, to,
+ CurrentTime[to]+RtsFlags.GranFlags.Costs.latency,
+ FetchReply,
+ tso, node, (rtsSpark*)NULL);
+
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime;
+ return (Ok);
+ }
+ }
+ else /* Qu'vatlh! node has been grabbed by another proc => forward */
+ {
+ PEs node_loc = where_is(node);
+ rtsTime fetchtime;
+
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== Qu'vatlh! node %p has been grabbed by PE %d from PE %d (demander=%d) @ %d\n",
+ node,node_loc,to,from,CurrentTime[to]));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.fetch_misses++;
+ }
+
+ /* Prepare FORWARD message to proc p_new */
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ fetchtime = stg_max(CurrentTime[to], CurrentTime[node_loc]) +
+ RtsFlags.GranFlags.Costs.latency;
+
+ new_event(node_loc, from, fetchtime,
+ FetchNode,
+ tso, node, (rtsSpark*)NULL);
+
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ return (NodeHasMoved);
+ }
+}
+
+/*
+ blockFetch blocks a BlockedFetch node on some kind of black hole.
+
+ Taken from gum/HLComms.lc. [find a better place for that ?] -- HWL
+
+ {\bf Note:} In GranSim we don't have @FETCHME@ nodes and therefore don't
+ create @FMBQ@'s (FetchMe blocking queues) to cope with global
+ blocking. Instead, non-local TSO are put into the BQ in the same way as
+ local TSOs. However, we have to check if a TSO is local or global in
+ order to account for the latencies involved and for keeping track of the
+ number of fetches that are really going on.
+*/
+
+//@cindex blockFetch
+
+rtsFetchReturnCode
+blockFetch(tso, proc, bh)
+StgTSO* tso; /* TSO which gets blocked */
+PEs proc; /* PE where that tso was running */
+StgClosure* bh; /* closure to block on (BH, RBH, BQ) */
+{
+ StgInfoTable *info;
+
+ IF_GRAN_DEBUG(bq,
+ fprintf(stderr,"## blockFetch: blocking TSO %p (%d)[PE %d] on node %p (%s) [PE %d]. No graph is packed!\n",
+ tso, tso->id, proc, bh, info_type(bh), where_is(bh)));
+
+ if (!IS_BLACK_HOLE(bh)) { /* catches BHs and RBHs */
+ IF_GRAN_DEBUG(bq,
+ fprintf(stderr,"## blockFetch: node %p (%s) is not a BH => awakening TSO %p (%d) [PE %u]\n",
+ bh, info_type(bh), tso, tso->id, proc));
+
+ /* No BH anymore => immediately unblock tso */
+ new_event(proc, proc, CurrentTime[proc],
+ UnblockThread,
+ tso, bh, (rtsSpark*)NULL);
+
+ /* Is this always a REPLY to a FETCH in the profile ? */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, proc, GR_REPLY, tso, bh, (StgInt)0, 0);
+ return (NodeIsNoBH);
+ }
+
+ /* DaH {BQ}Daq Qu' Suq 'e' wISov!
+ Now we know that we have to put the tso into the BQ.
+ 2 cases: If block-on-fetch, tso is at head of threadq =>
+ => take it out of threadq and into BQ
+ If reschedule-on-fetch, tso is only pointed to be event
+ => just put it into BQ
+
+ ngoq ngo'!!
+ if (!RtsFlags.GranFlags.DoAsyncFetch) {
+ GranSimBlock(tso, proc, bh);
+ } else {
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, where_is(bh), GR_BLOCK, tso, bh, (StgInt)0, 0);
+ ++(tso->gran.blockcount);
+ tso->gran.blockedat = CurrentTime[proc];
+ }
+ */
+
+ /* after scheduling the GlobalBlock event the TSO is not put into the
+ run queue again; it is only pointed to via the event we are
+ processing now; in GranSim 4.xx there is no difference between
+ synchr and asynchr comm here */
+ ASSERT(!is_on_queue(tso, proc));
+ ASSERT(tso->link == END_TSO_QUEUE);
+
+ GranSimBlock(tso, proc, bh); /* GranSim statistics gathering */
+
+ /* Now, put tso into BQ (similar to blocking entry codes) */
+ info = get_itbl(bh);
+ switch (info -> type) {
+ case RBH:
+ case BLACKHOLE:
+ case CAF_BLACKHOLE: // ToDo: check whether this is a possibly ITBL here
+ case SE_BLACKHOLE: // ToDo: check whether this is a possibly ITBL here
+ case SE_CAF_BLACKHOLE:// ToDo: check whether this is a possibly ITBL here
+ /* basically an inlined version of BLACKHOLE_entry -- HWL */
+ /* Change the BLACKHOLE into a BLACKHOLE_BQ */
+ ((StgBlockingQueue *)bh)->header.info = &BLACKHOLE_BQ_info;
+ /* Put ourselves on the blocking queue for this black hole */
+ // tso->link=END_TSO_QUEUE; not necessary; see assertion above
+ ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)tso;
+ tso->blocked_on = bh;
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case BLACKHOLE_BQ:
+ /* basically an inlined version of BLACKHOLE_BQ_entry -- HWL */
+ tso->link = (StgTSO *) (((StgBlockingQueue*)bh)->blocking_queue);
+ ((StgBlockingQueue*)bh)->blocking_queue = (StgBlockingQueueElement *)tso;
+ recordMutable((StgMutClosure *)bh);
+
+# if 0 && defined(GC_MUT_REQUIRED)
+ ToDo: check whether recordMutable is necessary -- HWL
+ /*
+ * If we modify a black hole in the old generation, we have to make
+ * sure it goes on the mutables list
+ */
+
+ if (bh <= StorageMgrInfo.OldLim) {
+ MUT_LINK(bh) = (W_) StorageMgrInfo.OldMutables;
+ StorageMgrInfo.OldMutables = bh;
+ } else
+ MUT_LINK(bh) = MUT_NOT_LINKED;
+# endif
+ break;
+
+ case FETCH_ME_BQ:
+ barf("Qagh: FMBQ closure (%p) found in GrAnSim (TSO=%p (%d))\n",
+ bh, tso, tso->id);
+
+ default:
+ {
+ G_PRINT_NODE(bh);
+ barf("Qagh: thought %p was a black hole (IP %p (%s))",
+ bh, info, info_type(get_itbl(bh)));
+ }
+ }
+ return (Ok);
+}
+
+
+//@node Idle PEs, Routines directly called from Haskell world, Code for Fetching Nodes, GranSim specific code
+//@subsection Idle PEs
+
+/*
+ Export work to idle PEs. This function is called from @ReSchedule@
+ before dispatching on the current event. @HandleIdlePEs@ iterates over
+ all PEs, trying to get work for idle PEs. Note, that this is a
+ simplification compared to GUM's fishing model. We try to compensate for
+ that by making the cost for stealing work dependent on the number of
+ idle processors and thereby on the probability with which a randomly
+ sent fish would find work.
+*/
+
+//@cindex handleIdlePEs
+
+void
+handleIdlePEs(void)
+{
+ PEs p;
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: handling Idle PEs\n"))
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ /* Could check whether there are idle PEs if it's a cheap check */
+ for (p = 0; p < RtsFlags.GranFlags.proc; p++)
+ if (procStatus[p]==Idle) /* && IS_SPARKING(p) && IS_STARTING(p) */
+ /* First look for local work i.e. examine local spark pool! */
+ if (pending_sparks_hds[p]!=(rtsSpark *)NULL) {
+ new_event(p, p, CurrentTime[p],
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
+ procStatus[p] = Sparking;
+ } else if ((RtsFlags.GranFlags.maxFishes==0 ||
+ OutstandingFishes[p]<RtsFlags.GranFlags.maxFishes) ) {
+
+ /* If no local work then try to get remote work!
+ Qu' Hopbe' pagh tu'lu'pu'chugh Qu' Hop yISuq ! */
+ if (RtsFlags.GranFlags.DoStealThreadsFirst &&
+ (RtsFlags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[p] == 0))
+ {
+ if (SurplusThreads > 0l) /* Steal a thread */
+ stealThread(p);
+
+ if (procStatus[p]!=Idle)
+ break;
+ }
+
+ if (SparksAvail > 0 &&
+ (RtsFlags.GranFlags.FetchStrategy >= 3 || OutstandingFetches[p] == 0)) /* Steal a spark */
+ stealSpark(p);
+
+ if (SurplusThreads > 0 &&
+ (RtsFlags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[p] == 0)) /* Steal a thread */
+ stealThread(p);
+ }
+}
+
+/*
+ Steal a spark and schedule moving it to proc. We want to look at PEs in
+ clock order -- most retarded first. Currently sparks are only stolen
+ from the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually,
+ this should be changed to first steal from the former then from the
+ latter.
+
+ We model a sort of fishing mechanism by counting the number of sparks
+ and threads we are currently stealing. */
+
+/*
+ Return a random nat value in the intervall [from, to)
+*/
+static nat
+natRandom(from, to)
+nat from, to;
+{
+ nat r, d;
+
+ ASSERT(from<=to);
+ d = to - from;
+ /* random returns a value in [0, RAND_MAX] */
+ r = (nat) ((float)from + ((float)random()*(float)d)/(float)RAND_MAX);
+ r = (r==to) ? from : r;
+ ASSERT(from<=r && (r<to || from==to));
+ return r;
+}
+
+/*
+ Find any PE other than proc. Used for GUM style fishing only.
+*/
+static PEs
+findRandomPE (proc)
+PEs proc;
+{
+ nat p;
+
+ ASSERT(RtsFlags.GranFlags.Fishing);
+ if (RtsFlags.GranFlags.RandomSteal) {
+ p = natRandom(0,RtsFlags.GranFlags.proc); /* full range of PEs */
+ } else {
+ p = 0;
+ }
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ RANDOM_STEAL (fishing): stealing from PE %d (current proc is %d)",
+ p, proc);)
+
+ return (PEs)p;
+}
+
+/*
+ Magic code for stealing sparks/threads makes use of global knowledge on
+ spark queues.
+*/
+static void
+sortPEsByTime (proc, pes_by_time, firstp, np)
+PEs proc;
+PEs *pes_by_time;
+nat *firstp, *np;
+{
+ PEs p, temp, n, i, j;
+ nat first, upb, r=0, q=0;
+
+ ASSERT(!RtsFlags.GranFlags.Fishing);
+
+#if 0
+ upb = RtsFlags.GranFlags.proc; /* full range of PEs */
+
+ if (RtsFlags.GranFlags.RandomSteal) {
+ r = natRandom(0,RtsFlags.GranFlags.proc); /* full range of PEs */
+ } else {
+ r = 0;
+ }
+#endif
+
+ /* pes_by_time shall contain processors from which we may steal sparks */
+ for(n=0, p=0; p < RtsFlags.GranFlags.proc; ++p)
+ if ((proc != p) && // not the current proc
+ (pending_sparks_hds[p] != (rtsSpark *)NULL) && // non-empty spark pool
+ (CurrentTime[p] <= CurrentTime[CurrentProc]))
+ pes_by_time[n++] = p;
+
+ /* sort pes_by_time */
+ for(i=0; i < n; ++i)
+ for(j=i+1; j < n; ++j)
+ if (CurrentTime[pes_by_time[i]] > CurrentTime[pes_by_time[j]]) {
+ rtsTime temp = pes_by_time[i];
+ pes_by_time[i] = pes_by_time[j];
+ pes_by_time[j] = temp;
+ }
+
+ /* Choose random processor to steal spark from; first look at processors */
+ /* that are earlier than the current one (i.e. proc) */
+ for(first=0;
+ (first < n) && (CurrentTime[pes_by_time[first]] <= CurrentTime[proc]);
+ ++first)
+ /* nothing */ ;
+
+ /* if the assertion below is true we can get rid of first */
+ /* ASSERT(first==n); */
+ /* ToDo: check if first is really needed; find cleaner solution */
+
+ *firstp = first;
+ *np = n;
+}
+
+/*
+ Steal a spark (piece of work) from any processor and bring it to proc.
+*/
+//@cindex stealSpark
+static inline rtsBool
+stealSpark(PEs proc) { stealSomething(proc, rtsTrue, rtsFalse); }
+
+/*
+ Steal a thread from any processor and bring it to proc i.e. thread migration
+*/
+//@cindex stealThread
+static inline rtsBool
+stealThread(PEs proc) { stealSomething(proc, rtsFalse, rtsTrue); }
+
+/*
+ Steal a spark or a thread and schedule moving it to proc.
+*/
+//@cindex stealSomething
+static rtsBool
+stealSomething(proc, steal_spark, steal_thread)
+PEs proc; // PE that needs work (stealer)
+rtsBool steal_spark, steal_thread; // should a spark and/or thread be stolen
+{
+ PEs p;
+ rtsTime fish_arrival_time;
+ rtsSpark *spark, *prev, *next;
+ rtsBool stolen = rtsFalse;
+
+ ASSERT(steal_spark || steal_thread);
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ ASSERT(!steal_thread || RtsFlags.GranFlags.DoThreadMigration);
+
+ if (!RtsFlags.GranFlags.Fishing) {
+ // ToDo: check if stealing threads is prefered over stealing sparks
+ if (steal_spark) {
+ if (stealSparkMagic(proc))
+ return rtsTrue;
+ else // no spark found
+ if (steal_thread)
+ return stealThreadMagic(proc);
+ else // no thread found
+ return rtsFalse;
+ } else { // ASSERT(steal_thread);
+ return stealThreadMagic(proc);
+ }
+ barf("stealSomething: never reached");
+ }
+
+ /* The rest of this function does GUM style fishing */
+
+ p = findRandomPE(proc); /* find a random PE other than proc */
+
+ /* Message packing costs for sending a Fish; qeq jabbI'ID */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ /* use another GranEvent for requesting a thread? */
+ if (steal_spark && RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(p, proc, SP_REQUESTED,
+ (StgTSO*)NULL, (StgClosure *)NULL, (StgInt)0, 0);
+
+ /* time of the fish arrival on the remote PE */
+ fish_arrival_time = CurrentTime[proc] + RtsFlags.GranFlags.Costs.latency;
+
+ /* Phps use an own Fish event for that? */
+ /* The contents of the spark component is a HACK:
+ 1 means give me a spark;
+ 2 means give me a thread
+ 0 means give me nothing (this should never happen)
+ */
+ new_event(p, proc, fish_arrival_time,
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL,
+ (steal_spark ? (rtsSpark*)1 : steal_thread ? (rtsSpark*)2 : (rtsSpark*)0));
+
+ ++OutstandingFishes[proc];
+ /* only with Async fetching? */
+ if (procStatus[proc]==Idle)
+ procStatus[proc]=Fishing;
+
+ /* time needed to clean up buffers etc after sending a message */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ /* If GUM style fishing stealing always succeeds because it only consists
+ of sending out a fish; of course, when the fish may return
+ empty-handed! */
+ return rtsTrue;
+}
+
+/*
+ This version of stealing a spark makes use of the global info on all
+ spark pools etc which is not available in a real parallel system.
+ This could be extended to test e.g. the impact of perfect load information.
+*/
+//@cindex stealSparkMagic
+static rtsBool
+stealSparkMagic(proc)
+PEs proc;
+{
+ PEs p, i, j, n, first, upb;
+ rtsSpark *spark, *next;
+ PEs pes_by_time[MAX_PROC];
+ rtsBool stolen = rtsFalse;
+ rtsTime stealtime;
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ sortPEsByTime(proc, pes_by_time, &first, &n);
+
+ while (!stolen && n>0) {
+ upb = (first==0) ? n : first;
+ i = natRandom(0,upb); /* choose a random eligible PE */
+ p = pes_by_time[i];
+
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ stealSparkMagic (random_steal, not fishing): stealing spark from PE %d (current proc is %d)",
+ p, proc));
+
+ ASSERT(pending_sparks_hds[p]!=(rtsSpark *)NULL); /* non-empty spark pool */
+
+ /* Now go through rtsSparkQ and steal the first eligible spark */
+
+ spark = pending_sparks_hds[p];
+ while (!stolen && spark != (rtsSpark*)NULL)
+ {
+ /* NB: no prev pointer is needed here because all sparks that are not
+ chosen are pruned
+ */
+ if ((procStatus[p]==Idle || procStatus[p]==Sparking || procStatus[p] == Fishing) &&
+ spark->next==(rtsSpark*)NULL)
+ {
+ /* Be social! Don't steal the only spark of an idle processor
+ not {spark} neH yInIH !! */
+ break; /* next PE */
+ }
+ else if (closure_SHOULD_SPARK(spark->node))
+ {
+ /* Don't Steal local sparks;
+ ToDo: optionally prefer local over global sparks
+ if (!spark->global) {
+ prev=spark;
+ continue; next spark
+ }
+ */
+ /* found a spark! */
+
+ /* Prepare message for sending spark */
+ CurrentTime[p] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(p, (PEs)0, SP_EXPORTED,
+ (StgTSO*)NULL, spark->node,
+ spark->name, spark_queue_len(p));
+
+ stealtime = (CurrentTime[p] > CurrentTime[proc] ?
+ CurrentTime[p] :
+ CurrentTime[proc])
+ + sparkStealTime();
+
+ new_event(proc, p /* CurrentProc */, stealtime,
+ MoveSpark,
+ (StgTSO*)NULL, spark->node, spark);
+
+ stolen = rtsTrue;
+ ++OutstandingFishes[proc]; /* no. of sparks currently on the fly */
+ if (procStatus[proc]==Idle)
+ procStatus[proc] = Fishing;
+ ++(spark->global); /* record that this is a global spark */
+ ASSERT(SparksAvail>0);
+ --SparksAvail; /* on-the-fly sparks are not available */
+ next = delete_from_sparkq(spark, p, rtsFalse); // don't dispose!
+ CurrentTime[p] += RtsFlags.GranFlags.Costs.mtidytime;
+ }
+ else /* !(closure_SHOULD_SPARK(SPARK_NODE(spark))) */
+ {
+ IF_GRAN_DEBUG(checkSparkQ,
+ belch("^^ pruning spark %p (node %p) in stealSparkMagic",
+ spark, spark->node));
+
+ /* if the spark points to a node that should not be sparked,
+ prune the spark queue at this point */
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(p, (PEs)0, SP_PRUNED,
+ (StgTSO*)NULL, spark->node,
+ spark->name, spark_queue_len(p));
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.pruned_sparks++;
+
+ ASSERT(SparksAvail>0);
+ --SparksAvail;
+ spark = delete_from_sparkq(spark, p, rtsTrue);
+ }
+ /* unlink spark (may have been freed!) from sparkq;
+ if (prev == NULL) // spark was head of spark queue
+ pending_sparks_hds[p] = spark->next;
+ else
+ prev->next = spark->next;
+ if (spark->next == NULL)
+ pending_sparks_tls[p] = prev;
+ else
+ next->prev = prev;
+ */
+ } /* while ... iterating over sparkq */
+
+ /* ToDo: assert that PE p still has work left after stealing the spark */
+
+ if (!stolen && (n>0)) { /* nothing stealable from proc p :( */
+ ASSERT(pes_by_time[i]==p);
+
+ /* remove p from the list (at pos i) */
+ for (j=i; j+1<n; j++)
+ pes_by_time[j] = pes_by_time[j+1];
+ n--;
+
+ /* update index to first proc which is later (or equal) than proc */
+ for ( ;
+ (first>0) &&
+ (CurrentTime[pes_by_time[first-1]]>CurrentTime[proc]);
+ first--)
+ /* nothing */ ;
+ }
+ } /* while ... iterating over PEs in pes_by_time */
+
+ IF_GRAN_DEBUG(randomSteal,
+ if (stolen)
+ belch("^^ stealSparkMagic: spark %p (node=%p) stolen by PE %d from PE %d (SparksAvail=%d; idlers=%d)",
+ spark, spark->node, proc, p,
+ SparksAvail, idlers());
+ else
+ belch("^^ stealSparkMagic: nothing stolen by PE %d (sparkq len after pruning=%d)(SparksAvail=%d; idlers=%d)",
+ proc, SparksAvail, idlers()));
+
+ if (RtsFlags.GranFlags.GranSimStats.Global &&
+ stolen && (i!=0)) { /* only for statistics */
+ globalGranStats.rs_sp_count++;
+ globalGranStats.ntimes_total += n;
+ globalGranStats.fl_total += first;
+ globalGranStats.no_of_steals++;
+ }
+
+ return stolen;
+}
+
+/*
+ The old stealThread code, which makes use of global info and does not
+ send out fishes.
+ NB: most of this is the same as in stealSparkMagic;
+ only the pieces specific to processing thread queues are different;
+ long live polymorphism!
+*/
+
+//@cindex stealThreadMagic
+static rtsBool
+stealThreadMagic(proc)
+PEs proc;
+{
+ PEs p, i, j, n, first, upb;
+ StgTSO *tso;
+ PEs pes_by_time[MAX_PROC];
+ rtsBool stolen = rtsFalse;
+ rtsTime stealtime;
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ sortPEsByTime(proc, pes_by_time, &first, &n);
+
+ while (!stolen && n>0) {
+ upb = (first==0) ? n : first;
+ i = natRandom(0,upb); /* choose a random eligible PE */
+ p = pes_by_time[i];
+
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ stealThreadMagic (random_steal, not fishing): stealing thread from PE %d (current proc is %d)",
+ p, proc));
+
+ /* Steal the first exportable thread in the runnable queue but
+ never steal the first in the queue for social reasons;
+ not Qu' wa'DIch yInIH !!
+ */
+ /* Would be better to search through queue and have options which of
+ the threads to pick when stealing */
+ if (run_queue_hds[p] == END_TSO_QUEUE) {
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ stealThreadMagic: No thread to steal from PE %d (stealer=PE %d)",
+ p, proc));
+ } else {
+ tso = run_queue_hds[p]->link; /* tso is *2nd* thread in thread queue */
+ /* Found one */
+ stolen = rtsTrue;
+
+ /* update links in queue */
+ run_queue_hds[p]->link = tso->link;
+ if (run_queue_tls[p] == tso)
+ run_queue_tls[p] = run_queue_hds[p];
+
+ /* ToDo: Turn magic constants into params */
+
+ CurrentTime[p] += 5l * RtsFlags.GranFlags.Costs.mpacktime;
+
+ stealtime = (CurrentTime[p] > CurrentTime[proc] ?
+ CurrentTime[p] :
+ CurrentTime[proc])
+ + sparkStealTime()
+ + 4l * RtsFlags.GranFlags.Costs.additional_latency
+ + 5l * RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* Move the thread; set bitmask to 0 while TSO is `on-the-fly' */
+ SET_GRAN_HDR(tso,Nowhere /* PE_NUMBER(proc) */);
+
+ /* Move from one queue to another */
+ new_event(proc, p, stealtime,
+ MoveThread,
+ tso, (StgClosure*)NULL, (rtsSpark*)NULL);
+
+ /* MAKE_BUSY(proc); not yet; only when thread is in threadq */
+ ++OutstandingFishes[proc];
+ if (procStatus[proc])
+ procStatus[proc] = Fishing;
+ --SurplusThreads;
+
+ if(RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(p, proc,
+ GR_STEALING,
+ tso, (StgClosure*)NULL, (StgInt)0, 0);
+
+ /* costs for tidying up buffer after having sent it */
+ CurrentTime[p] += 5l * RtsFlags.GranFlags.Costs.mtidytime;
+ }
+
+ /* ToDo: assert that PE p still has work left after stealing the spark */
+
+ if (!stolen && (n>0)) { /* nothing stealable from proc p :( */
+ ASSERT(pes_by_time[i]==p);
+
+ /* remove p from the list (at pos i) */
+ for (j=i; j+1<n; j++)
+ pes_by_time[j] = pes_by_time[j+1];
+ n--;
+
+ /* update index to first proc which is later (or equal) than proc */
+ for ( ;
+ (first>0) &&
+ (CurrentTime[pes_by_time[first-1]]>CurrentTime[proc]);
+ first--)
+ /* nothing */ ;
+ }
+ } /* while ... iterating over PEs in pes_by_time */
+
+ IF_GRAN_DEBUG(randomSteal,
+ if (stolen)
+ belch("^^ stealThreadMagic: stolen TSO %d (%p) by PE %d from PE %d (SparksAvail=%d; idlers=%d)",
+ tso->id, tso, proc, p,
+ SparksAvail, idlers());
+ else
+ belch("stealThreadMagic: nothing stolen by PE %d (SparksAvail=%d; idlers=%d)",
+ proc, SparksAvail, idlers()));
+
+ if (RtsFlags.GranFlags.GranSimStats.Global &&
+ stolen && (i!=0)) { /* only for statistics */
+ /* ToDo: more statistics on avg thread queue lenght etc */
+ globalGranStats.rs_t_count++;
+ globalGranStats.no_of_migrates++;
+ }
+
+ return stolen;
+}
+
+//@cindex sparkStealTime
+static rtsTime
+sparkStealTime(void)
+{
+ double fishdelay, sparkdelay, latencydelay;
+ fishdelay = (double)RtsFlags.GranFlags.proc/2;
+ sparkdelay = fishdelay -
+ ((fishdelay-1)/(double)(RtsFlags.GranFlags.proc-1))*(double)idlers();
+ latencydelay = sparkdelay*((double)RtsFlags.GranFlags.Costs.latency);
+
+ return((rtsTime)latencydelay);
+}
+
+//@node Routines directly called from Haskell world, Emiting profiling info for GrAnSim, Idle PEs, GranSim specific code
+//@subsection Routines directly called from Haskell world
+/*
+The @GranSim...@ routines in here are directly called via macros from the
+threaded world.
+
+First some auxiliary routines.
+*/
+
+/* Take the current thread off the thread queue and thereby activate the
+ next thread. It's assumed that the next ReSchedule after this uses
+ NEW_THREAD as param.
+ This fct is called from GranSimBlock and GranSimFetch
+*/
+
+//@cindex ActivateNextThread
+
+void
+ActivateNextThread (proc)
+PEs proc;
+{
+ StgTSO *t;
+ /*
+ This routine is entered either via GranSimFetch or via GranSimBlock.
+ It has to prepare the CurrentTSO for being blocked and update the
+ run queue and other statistics on PE proc. The actual enqueuing to the
+ blocking queue (if coming from GranSimBlock) is done in the entry code
+ of the BLACKHOLE and BLACKHOLE_BQ closures (see StgMiscClosures.hc).
+ */
+ /* ToDo: add assertions here!! */
+ //ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE);
+
+ // Only necessary if the running thread is at front of the queue
+ // run_queue_hds[proc] = run_queue_hds[proc]->link;
+ ASSERT(CurrentProc==proc);
+ ASSERT(!is_on_queue(CurrentTSO,proc));
+ if (run_queue_hds[proc]==END_TSO_QUEUE) {
+ /* NB: this routine is only entered with asynchr comm (see assertion) */
+ procStatus[proc] = Idle;
+ } else {
+ /* ToDo: check cost assignment */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcontextswitchtime;
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ (!RtsFlags.GranFlags.Light || RtsFlags.GranFlags.Debug.checkLight))
+ /* right flag !?? ^^^ */
+ DumpRawGranEvent(proc, 0, GR_SCHEDULE, run_queue_hds[proc],
+ (StgClosure*)NULL, (StgInt)0, 0);
+ }
+}
+
+/*
+ The following GranSim fcts are stg-called from the threaded world.
+*/
+
+/* Called from HP_CHK and friends (see StgMacros.h) */
+//@cindex GranSimAllocate
+void
+GranSimAllocate(n)
+StgInt n;
+{
+ CurrentTSO->gran.allocs += n;
+ ++(CurrentTSO->gran.basicblocks);
+
+ if (RtsFlags.GranFlags.GranSimStats.Heap) {
+ DumpRawGranEvent(CurrentProc, 0, GR_ALLOC, CurrentTSO,
+ (StgClosure*)NULL, (StgInt)0, n);
+ }
+
+ CurrentTSO->gran.exectime += RtsFlags.GranFlags.Costs.heapalloc_cost;
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.heapalloc_cost;
+}
+
+/*
+ Subtract the values added above, if a heap check fails and
+ so has to be redone.
+*/
+//@cindex GranSimUnallocate
+void
+GranSimUnallocate(n)
+StgInt n;
+{
+ CurrentTSO->gran.allocs -= n;
+ --(CurrentTSO->gran.basicblocks);
+
+ CurrentTSO->gran.exectime -= RtsFlags.GranFlags.Costs.heapalloc_cost;
+ CurrentTime[CurrentProc] -= RtsFlags.GranFlags.Costs.heapalloc_cost;
+}
+
+/* NB: We now inline this code via GRAN_EXEC rather than calling this fct */
+//@cindex GranSimExec
+void
+GranSimExec(ariths,branches,loads,stores,floats)
+StgWord ariths,branches,loads,stores,floats;
+{
+ StgWord cost = RtsFlags.GranFlags.Costs.arith_cost*ariths +
+ RtsFlags.GranFlags.Costs.branch_cost*branches +
+ RtsFlags.GranFlags.Costs.load_cost * loads +
+ RtsFlags.GranFlags.Costs.store_cost*stores +
+ RtsFlags.GranFlags.Costs.float_cost*floats;
+
+ CurrentTSO->gran.exectime += cost;
+ CurrentTime[CurrentProc] += cost;
+}
+
+/*
+ Fetch the node if it isn't local
+ -- result indicates whether fetch has been done.
+
+ This is GRIP-style single item fetching.
+*/
+
+//@cindex GranSimFetch
+StgInt
+GranSimFetch(node /* , liveness_mask */ )
+StgClosure *node;
+/* StgInt liveness_mask; */
+{
+ /* reset the return value (to be checked within STG land) */
+ NeedToReSchedule = rtsFalse;
+
+ if (RtsFlags.GranFlags.Light) {
+ /* Always reschedule in GrAnSim-Light to prevent one TSO from
+ running off too far
+ new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ ContinueThread,CurrentTSO,node,NULL);
+ */
+ return(0);
+ }
+
+ /* Faking an RBH closure:
+ If the bitmask of the closure is 0 then this node is a fake RBH;
+ */
+ if (node->header.gran.procs == Nowhere) {
+ IF_GRAN_DEBUG(bq,
+ belch("## Found fake RBH (node %p); delaying TSO %d (%p)",
+ node, CurrentTSO->id, CurrentTSO));
+
+ new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc]+10000,
+ ContinueThread, CurrentTSO, node, (rtsSpark*)NULL);
+
+ /* Rescheduling (GranSim internal) is necessary */
+ NeedToReSchedule = rtsTrue;
+
+ return(1);
+ }
+
+ /* Note: once a node has been fetched, this test will be passed */
+ if (!IS_LOCAL_TO(PROCS(node),CurrentProc))
+ {
+ PEs p = where_is(node);
+ rtsTime fetchtime;
+
+ IF_GRAN_DEBUG(thunkStealing,
+ if (p==CurrentProc)
+ belch("GranSimFetch: Trying to fetch from own processor%u\n", p););
+
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime;
+ /* NB: Fetch is counted on arrival (FetchReply) */
+
+ fetchtime = stg_max(CurrentTime[CurrentProc],CurrentTime[p]) +
+ RtsFlags.GranFlags.Costs.latency;
+
+ new_event(p, CurrentProc, fetchtime,
+ FetchNode, CurrentTSO, node, (rtsSpark*)NULL);
+
+ if (fetchtime<TimeOfNextEvent)
+ TimeOfNextEvent = fetchtime;
+
+ /* About to block */
+ CurrentTSO->gran.blockedat = CurrentTime[CurrentProc];
+
+ ++OutstandingFetches[CurrentProc];
+
+ if (RtsFlags.GranFlags.DoAsyncFetch)
+ /* if asynchr comm is turned on, activate the next thread in the q */
+ ActivateNextThread(CurrentProc);
+ else
+ procStatus[CurrentProc] = Fetching;
+
+#if 0
+ /* ToDo: nuke the entire if (anything special for fair schedule?) */
+ if (RtsFlags.GranFlags.DoAsyncFetch)
+ {
+ /* Remove CurrentTSO from the queue -- assumes head of queue == CurrentTSO */
+ if(!RtsFlags.GranFlags.DoFairSchedule)
+ {
+ /* now done in do_the_fetchnode
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(CurrentProc, p, GR_FETCH, CurrentTSO,
+ node, (StgInt)0, 0);
+ */
+ ActivateNextThread(CurrentProc);
+
+# if 0 && defined(GRAN_CHECK)
+ if (RtsFlags.GranFlags.Debug.blockOnFetch_sanity) {
+ if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
+ fprintf(stderr,"FetchNode: TSO 0x%x has fetch-mask set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ stg_exit(EXIT_FAILURE);
+ } else {
+ TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
+ }
+ }
+# endif
+ CurrentTSO->link = END_TSO_QUEUE;
+ /* CurrentTSO = END_TSO_QUEUE; */
+
+ /* CurrentTSO is pointed to by the FetchNode event; it is
+ on no run queue any more */
+ } else { /* fair scheduling currently not supported -- HWL */
+ barf("Asynchr communication is not yet compatible with fair scheduling\n");
+ }
+ } else { /* !RtsFlags.GranFlags.DoAsyncFetch */
+ procStatus[CurrentProc] = Fetching; // ToDo: BlockedOnFetch;
+ /* now done in do_the_fetchnode
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(CurrentProc, p,
+ GR_FETCH, CurrentTSO, node, (StgInt)0, 0);
+ */
+ IF_GRAN_DEBUG(blockOnFetch,
+ BlockedOnFetch[CurrentProc] = CurrentTSO;); /*- rtsTrue; -*/
+ }
+#endif /* 0 */
+
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ /* Rescheduling (GranSim internal) is necessary */
+ NeedToReSchedule = rtsTrue;
+
+ return(1);
+ }
+ return(0);
+}
+
+//@cindex GranSimSpark
+void
+GranSimSpark(local,node)
+StgInt local;
+StgClosure *node;
+{
+ /* ++SparksAvail; Nope; do that in add_to_spark_queue */
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(CurrentProc, (PEs)0, SP_SPARK,
+ END_TSO_QUEUE, node, (StgInt)0, spark_queue_len(CurrentProc)-1);
+
+ /* Force the PE to take notice of the spark */
+ if(RtsFlags.GranFlags.DoAlwaysCreateThreads) {
+ new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ FindWork,
+ END_TSO_QUEUE, (StgClosure*)NULL, (rtsSpark*)NULL);
+ if (CurrentTime[CurrentProc]<TimeOfNextEvent)
+ TimeOfNextEvent = CurrentTime[CurrentProc];
+ }
+
+ if(local)
+ ++CurrentTSO->gran.localsparks;
+ else
+ ++CurrentTSO->gran.globalsparks;
+}
+
+//@cindex GranSimSparkAt
+void
+GranSimSparkAt(spark,where,identifier)
+rtsSpark *spark;
+StgClosure *where; /* This should be a node; alternatively could be a GA */
+StgInt identifier;
+{
+ PEs p = where_is(where);
+ GranSimSparkAtAbs(spark,p,identifier);
+}
+
+//@cindex GranSimSparkAtAbs
+void
+GranSimSparkAtAbs(spark,proc,identifier)
+rtsSpark *spark;
+PEs proc;
+StgInt identifier;
+{
+ rtsTime exporttime;
+
+ if (spark == (rtsSpark *)NULL) /* Note: Granularity control might have */
+ return; /* turned a spark into a NULL. */
+
+ /* ++SparksAvail; Nope; do that in add_to_spark_queue */
+ if(RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(proc,0,SP_SPARKAT,
+ END_TSO_QUEUE, spark->node, (StgInt)0, spark_queue_len(proc));
+
+ if (proc!=CurrentProc) {
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime;
+ exporttime = (CurrentTime[proc] > CurrentTime[CurrentProc]?
+ CurrentTime[proc]: CurrentTime[CurrentProc])
+ + RtsFlags.GranFlags.Costs.latency;
+ } else {
+ exporttime = CurrentTime[CurrentProc];
+ }
+
+ if ( RtsFlags.GranFlags.Light )
+ /* Need CurrentTSO in event field to associate costs with creating
+ spark even in a GrAnSim Light setup */
+ new_event(proc, CurrentProc, exporttime,
+ MoveSpark,
+ CurrentTSO, spark->node, spark);
+ else
+ new_event(proc, CurrentProc, exporttime,
+ MoveSpark, (StgTSO*)NULL, spark->node, spark);
+ /* Bit of a hack to treat placed sparks the same as stolen sparks */
+ ++OutstandingFishes[proc];
+
+ /* Force the PE to take notice of the spark (FINDWORK is put after a
+ MoveSpark into the sparkq!) */
+ if (RtsFlags.GranFlags.DoAlwaysCreateThreads) {
+ new_event(CurrentProc,CurrentProc,exporttime+1,
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
+ }
+
+ if (exporttime<TimeOfNextEvent)
+ TimeOfNextEvent = exporttime;
+
+ if (proc!=CurrentProc) {
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mtidytime;
+ ++CurrentTSO->gran.globalsparks;
+ } else {
+ ++CurrentTSO->gran.localsparks;
+ }
+}
+
+/*
+ This function handles local and global blocking. It's called either
+ from threaded code (RBH_entry, BH_entry etc) or from blockFetch when
+ trying to fetch an BH or RBH
+*/
+
+//@cindex GranSimBlock
+void
+GranSimBlock(tso, proc, node)
+StgTSO *tso;
+PEs proc;
+StgClosure *node;
+{
+ PEs node_proc = where_is(node), tso_proc = where_is(tso);
+
+ ASSERT(tso_proc==CurrentProc);
+ // ASSERT(node_proc==CurrentProc);
+ IF_GRAN_DEBUG(bq,
+ if (node_proc!=CurrentProc)
+ belch("## ghuH: TSO %d (%lx) [PE %d] blocks on non-local node %p [PE %d] (no simulation of FETCHMEs)",
+ tso->id, tso, tso_proc, node, node_proc));
+ ASSERT(tso->link==END_TSO_QUEUE);
+ ASSERT(!is_on_queue(tso,proc)); // tso must not be on run queue already!
+ //ASSERT(tso==run_queue_hds[proc]);
+
+ IF_DEBUG(gran,
+ belch("GRAN: TSO %d (%p) [PE %d] blocks on closure %p @ %lx",
+ tso->id, tso, proc, node, CurrentTime[proc]);)
+
+
+ /* THIS SHOULD NEVER HAPPEN!
+ If tso tries to block on a remote node (i.e. node_proc!=CurrentProc)
+ we have missed a GranSimFetch before entering this closure;
+ we hack around it for now, faking a FetchNode;
+ because GranSimBlock is entered via a BLACKHOLE(_BQ) closure,
+ tso will be blocked on this closure until the FetchReply occurs.
+
+ ngoq Dogh!
+
+ if (node_proc!=CurrentProc) {
+ StgInt ret;
+ ret = GranSimFetch(node);
+ IF_GRAN_DEBUG(bq,
+ if (ret)
+ belch(".. GranSimBlock: faking a FetchNode of node %p from %d to %d",
+ node, node_proc, CurrentProc););
+ return;
+ }
+ */
+
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc,node_proc,GR_BLOCK,tso,node,(StgInt)0,0);
+
+ ++(tso->gran.blockcount);
+ /* Distinction between local and global block is made in blockFetch */
+ tso->gran.blockedat = CurrentTime[proc];
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadqueuetime;
+ ActivateNextThread(proc);
+ /* tso->link = END_TSO_QUEUE; not really necessary; only for testing */
+}
+
+#endif /* GRAN */
+
+//@node Index, , Dumping routines, GranSim specific code
+//@subsection Index
+
+//@index
+//* ActivateNextThread:: @cindex\s-+ActivateNextThread
+//* CurrentProc:: @cindex\s-+CurrentProc
+//* CurrentTime:: @cindex\s-+CurrentTime
+//* GranSimAllocate:: @cindex\s-+GranSimAllocate
+//* GranSimBlock:: @cindex\s-+GranSimBlock
+//* GranSimExec:: @cindex\s-+GranSimExec
+//* GranSimFetch:: @cindex\s-+GranSimFetch
+//* GranSimLight_insertThread:: @cindex\s-+GranSimLight_insertThread
+//* GranSimSpark:: @cindex\s-+GranSimSpark
+//* GranSimSparkAt:: @cindex\s-+GranSimSparkAt
+//* GranSimSparkAtAbs:: @cindex\s-+GranSimSparkAtAbs
+//* GranSimUnallocate:: @cindex\s-+GranSimUnallocate
+//* any_idle:: @cindex\s-+any_idle
+//* blockFetch:: @cindex\s-+blockFetch
+//* do_the_fetchnode:: @cindex\s-+do_the_fetchnode
+//* do_the_fetchreply:: @cindex\s-+do_the_fetchreply
+//* do_the_findwork:: @cindex\s-+do_the_findwork
+//* do_the_globalblock:: @cindex\s-+do_the_globalblock
+//* do_the_movespark:: @cindex\s-+do_the_movespark
+//* do_the_movethread:: @cindex\s-+do_the_movethread
+//* do_the_startthread:: @cindex\s-+do_the_startthread
+//* do_the_unblock:: @cindex\s-+do_the_unblock
+//* fetchNode:: @cindex\s-+fetchNode
+//* ga_to_proc:: @cindex\s-+ga_to_proc
+//* get_next_event:: @cindex\s-+get_next_event
+//* get_time_of_next_event:: @cindex\s-+get_time_of_next_event
+//* grab_event:: @cindex\s-+grab_event
+//* handleFetchRequest:: @cindex\s-+handleFetchRequest
+//* handleIdlePEs:: @cindex\s-+handleIdlePEs
+//* idlers:: @cindex\s-+idlers
+//* insertThread:: @cindex\s-+insertThread
+//* insert_event:: @cindex\s-+insert_event
+//* is_on_queue:: @cindex\s-+is_on_queue
+//* is_unique:: @cindex\s-+is_unique
+//* new_event:: @cindex\s-+new_event
+//* prepend_event:: @cindex\s-+prepend_event
+//* print_event:: @cindex\s-+print_event
+//* print_eventq:: @cindex\s-+print_eventq
+//* prune_eventq :: @cindex\s-+prune_eventq
+//* spark queue:: @cindex\s-+spark queue
+//* sparkStealTime:: @cindex\s-+sparkStealTime
+//* stealSomething:: @cindex\s-+stealSomething
+//* stealSpark:: @cindex\s-+stealSpark
+//* stealSparkMagic:: @cindex\s-+stealSparkMagic
+//* stealThread:: @cindex\s-+stealThread
+//* stealThreadMagic:: @cindex\s-+stealThreadMagic
+//* thread_queue_len:: @cindex\s-+thread_queue_len
+//* traverse_eventq_for_gc:: @cindex\s-+traverse_eventq_for_gc
+//* where_is:: @cindex\s-+where_is
+//@end index
--- /dev/null
+/* --------------------------------------------------------------------------
+ Time-stamp: <Sat Dec 04 1999 01:26:45 Stardate: [-30]3995.30 hwloidl>
+ $Id: GranSimRts.h,v 1.2 2000/01/13 14:34:07 hwloidl Exp $
+
+ Variables and functions specific to GranSim.
+ ----------------------------------------------------------------------- */
+
+#ifndef GRANSIM_RTS_H
+#define GRANSIM_RTS_H
+
+//@node Headers for GranSim objs used only in the RTS internally, , ,
+//@section Headers for GranSim objs used only in the RTS internally
+
+//@menu
+//* Event queue::
+//* Spark handling routines::
+//* Processor related stuff::
+//* Local types::
+//* Statistics gathering::
+//* Prototypes::
+//@end menu
+
+//@node Event queue, Spark handling routines, Headers for GranSim objs used only in the RTS internally, Headers for GranSim objs used only in the RTS internally
+//@subsection Event queue
+
+#if defined(GRAN) || defined(PAR)
+/* Granularity event types for output (see DumpGranEvent) */
+typedef enum GranEventType_ {
+ GR_START = 0, GR_STARTQ,
+ GR_STEALING, GR_STOLEN, GR_STOLENQ,
+ GR_FETCH, GR_REPLY, GR_BLOCK, GR_RESUME, GR_RESUMEQ,
+ GR_SCHEDULE, GR_DESCHEDULE,
+ GR_END,
+ SP_SPARK, SP_SPARKAT, SP_USED, SP_PRUNED, SP_EXPORTED, SP_ACQUIRED, SP_REQUESTED,
+ GR_ALLOC,
+ GR_TERMINATE,
+ GR_SYSTEM_START, GR_SYSTEM_END, /* only for debugging */
+ GR_EVENT_MAX
+} GranEventType;
+
+extern char *gran_event_names[];
+#endif
+
+#if defined(GRAN) /* whole file */
+
+/* Event Types (internal use only) */
+typedef enum rtsEventType_ {
+ ContinueThread = 0, /* Continue running the first thread in the queue */
+ StartThread, /* Start a newly created thread */
+ ResumeThread, /* Resume a previously running thread */
+ MoveSpark, /* Move a spark from one PE to another */
+ MoveThread, /* Move a thread from one PE to another */
+ FindWork, /* Search for work */
+ FetchNode, /* Fetch a node */
+ FetchReply, /* Receive a node */
+ GlobalBlock, /* Block a TSO on a remote node */
+ UnblockThread /* Make a TSO runnable */
+} rtsEventType;
+
+/* Number of last event type */
+#define MAX_EVENT 9
+
+typedef struct rtsEvent_ {
+ PEs proc; /* Processor id */
+ PEs creator; /* Processor id of PE that created the event */
+ rtsEventType evttype; /* rtsEvent type */
+ rtsTime time; /* Time at which event happened */
+ StgTSO *tso; /* Associated TSO, if relevant */
+ StgClosure *node; /* Associated node, if relevant */
+ rtsSpark *spark; /* Associated SPARK, if relevant */
+ StgInt gc_info; /* Counter of heap objects to mark (used in GC only)*/
+ struct rtsEvent_ *next;
+ } rtsEvent;
+
+typedef rtsEvent *rtsEventQ;
+
+extern rtsEventQ EventHd;
+
+/* Interface for ADT of Event Queue */
+rtsEvent *get_next_event(void);
+rtsTime get_time_of_next_event(void);
+void insert_event(rtsEvent *newentry);
+void new_event(PEs proc, PEs creator, rtsTime time,
+ rtsEventType evttype, StgTSO *tso,
+ StgClosure *node, rtsSpark *spark);
+void print_event(rtsEvent *event);
+void print_eventq(rtsEvent *hd);
+void prepend_event(rtsEvent *event);
+rtsEventQ grab_event(void);
+void prune_eventq(StgTSO *tso, StgClosure *node);
+
+void traverse_eventq_for_gc(void);
+void markEventQueue(void);
+
+//@node Spark handling routines, Processor related stuff, Event queue, Headers for GranSim objs used only in the RTS internally
+//@subsection Spark handling routines
+
+/* These functions are only used in the RTS internally; see GranSim.h for rest */
+void disposeSpark(rtsSpark *spark);
+void disposeSparkQ(rtsSparkQ spark);
+void print_spark(rtsSpark *spark);
+void print_sparkq(PEs proc);
+void print_sparkq_stats(void);
+nat spark_queue_len(PEs proc);
+rtsSpark *delete_from_sparkq (rtsSpark *spark, PEs p, rtsBool dispose_too);
+void markSparkQueue(void);
+
+//@node Processor related stuff, Local types, Spark handling routines, Headers for GranSim objs used only in the RTS internally
+//@subsection Processor related stuff
+
+typedef enum rtsProcStatus_ {
+ Idle = 0, /* empty threadq */
+ Sparking, /* non-empty sparkq; FINDWORK has been issued */
+ Starting, /* STARTTHREAD has been issue */
+ Fetching, /* waiting for remote data (only if block-on-fetch) */
+ Fishing, /* waiting for remote spark/thread */
+ Busy /* non-empty threadq, with head of queue active */
+} rtsProcStatus;
+
+/*
+#define IS_IDLE(proc) (procStatus[proc] == Idle)
+#define IS_SPARKING(proc) (procStatus[proc] == Sparking)
+#define IS_STARTING(proc) (procStatus[proc] == Starting)
+#define IS_FETCHING(proc) (procStatus[proc] == Fetching)
+#define IS_FISHING(proc) (procStatus[proc] == Fishing)
+#define IS_BUSY(proc) (procStatus[proc] == Busy)
+#define ANY_IDLE (any_idle())
+#define MAKE_IDLE(proc) procStatus[proc] = Idle
+#define MAKE_SPARKING(proc) procStatus[proc] = Sparking
+#define MAKE_STARTING(proc) procStatus[proc] = Starting
+#define MAKE_FETCHING(proc) procStatus[proc] = Fetching
+#define MAKE_FISHING(proc) procStatus[proc] = Fishing
+#define MAKE_BUSY(proc) procStatus[proc] = Busy
+*/
+
+//@node Local types, Statistics gathering, Processor related stuff, Headers for GranSim objs used only in the RTS internally
+//@subsection Local types
+
+/* Return codes of HandleFetchRequest:
+ 0 ... ok (FETCHREPLY event with a buffer containing addresses of the
+ nearby graph has been scheduled)
+ 1 ... node is already local (fetched by somebody else; no event is
+ scheduled in here)
+ 2 ... fetch request has been forwrded to the PE that now contains the
+ node
+ 3 ... node is a black hole (BH, BQ or RBH); no event is scheduled, and
+ the current TSO is put into the blocking queue of that node
+ 4 ... out of heap in PackNearbyGraph; GC should be triggered in calling
+ function to guarantee that the tso and node inputs are valid
+ (they may be moved during GC).
+ Return codes of blockFetch:
+ 0 ... ok; tso is now at beginning of BQ attached to the bh closure
+ 1 ... the bh closure is no BH any more; tso is immediately unblocked
+*/
+
+typedef enum rtsFetchReturnCode_ {
+ Ok = 0,
+ NodeIsLocal,
+ NodeHasMoved,
+ NodeIsBH,
+ NodeIsNoBH,
+ OutOfHeap,
+} rtsFetchReturnCode;
+
+//@node Statistics gathering, Prototypes, Local types, Headers for GranSim objs used only in the RTS internally
+//@subsection Statistics gathering
+
+extern unsigned int /* nat */ OutstandingFetches[], OutstandingFishes[];
+extern rtsProcStatus procStatus[];
+extern StgTSO *BlockedOnFetch[];
+
+/* global structure for collecting statistics */
+typedef struct GlobalGranStats_ {
+ /* event stats */
+ nat noOfEvents;
+ nat event_counts[MAX_EVENT];
+
+ /* communication stats */
+ nat fetch_misses;
+ nat tot_fake_fetches; // GranSim internal; faked Fetches are a kludge!!
+ nat tot_low_pri_sparks;
+
+ /* load distribution statistics */
+ nat rs_sp_count, rs_t_count, ntimes_total, fl_total,
+ no_of_steals, no_of_migrates;
+
+ /* spark queue stats */
+ nat tot_sq_len, tot_sq_probes, tot_sparks;
+ nat tot_add_threads, tot_tq_len, non_end_add_threads;
+
+ /* packet statistics */
+ nat tot_packets, tot_packet_size, tot_cuts, tot_thunks;
+
+ /* thread stats */
+ nat tot_threads_created, threads_created_on_PE[MAX_PROC],
+ tot_TSOs_migrated;
+
+ /* spark stats */
+ nat pruned_sparks, withered_sparks;
+ nat tot_sparks_created, sparks_created_on_PE[MAX_PROC];
+
+ /* scheduling stats */
+ nat tot_yields;
+
+ /* blocking queue statistics */
+ rtsTime tot_bq_processing_time;
+ nat tot_bq_len, tot_bq_len_local, tot_awbq, tot_FMBQs;
+} GlobalGranStats;
+
+extern GlobalGranStats globalGranStats;
+
+//@node Prototypes, , Statistics gathering, Headers for GranSim objs used only in the RTS internally
+//@subsection Prototypes
+
+/* Generally useful fcts */
+PEs where_is(StgClosure *node);
+rtsBool is_unique(StgClosure *node);
+
+/* Prototypes of event handling functions; needed in Schedule.c:ReSchedule() */
+void do_the_globalblock (rtsEvent* event);
+void do_the_unblock (rtsEvent* event);
+void do_the_fetchnode (rtsEvent* event);
+void do_the_fetchreply (rtsEvent* event);
+void do_the_movethread (rtsEvent* event);
+void do_the_movespark (rtsEvent* event);
+void do_the_startthread(rtsEvent *event);
+void do_the_findwork(rtsEvent* event);
+void gimme_spark (rtsEvent *event, rtsBool *found_res, rtsSparkQ *spark_res);
+rtsBool munch_spark (rtsEvent *event, rtsSparkQ spark);
+
+/* GranSimLight routines */
+void GranSimLight_enter_system(rtsEvent *event, StgTSO **ActiveTSOp);
+void GranSimLight_leave_system(rtsEvent *event, StgTSO **ActiveTSOp);
+
+/* Communication related routines */
+rtsFetchReturnCode fetchNode(StgClosure* node, PEs from, PEs to);
+rtsFetchReturnCode handleFetchRequest(StgClosure* node, PEs curr_proc, PEs p, StgTSO* tso);
+rtsFetchReturnCode blockFetch(StgTSO* tso, PEs proc, StgClosure* bh);
+void handleIdlePEs(void);
+
+long int random(void); /* used in stealSpark() and stealThread() in GranSim.c */
+
+/* Scheduling fcts defined in GranSim.c */
+void insertThread(StgTSO *tso, PEs proc);
+void endThread(StgTSO *tso, PEs proc);
+rtsBool GranSimLight_insertThread(StgTSO *tso, PEs proc);
+nat thread_queue_len(PEs proc);
+
+/* For debugging */
+rtsBool is_on_queue (StgTSO *tso, PEs proc);
+
+/* Interface for dumping routines (i.e. writing to log file) */
+void DumpGranEvent(GranEventType name, StgTSO *tso);
+void DumpEndEvent(PEs proc, StgTSO *tso, rtsBool mandatory_thread);
+void DumpTSO(StgTSO *tso);
+void DumpRawGranEvent(PEs proc, PEs p, GranEventType name,
+ StgTSO *tso, StgClosure *node, StgInt sparkname, StgInt len);
+
+#endif
+
+#endif /* GRANSIM_RTS_H */
--- /dev/null
+/* --------------------------------------------------------------------------
+ Time-stamp: <Sun Dec 05 1999 21:02:36 Stardate: [-30]4004.38 hwloidl>
+ $Id: HLC.h,v 1.2 2000/01/13 14:34:07 hwloidl Exp $
+
+ High Level Communications Header (HLC.h)
+
+ Contains the high-level definitions (i.e. communication
+ subsystem independent) used by GUM
+ Phil Trinder, Glasgow University, 12 December 1994
+ H-W. Loidl, Heriot-Watt, November 1999
+ ----------------------------------------------------------------------- */
+
+#ifndef __HLC_H
+#define __HLC_H
+
+#ifdef PAR
+
+#include "LLC.h"
+
+#define NEW_FISH_AGE 0
+#define NEW_FISH_HISTORY 0
+#define NEW_FISH_HUNGER 0
+#define FISH_LIFE_EXPECTANCY 10
+
+
+//@node GUM Message Sending and Unpacking Functions
+//@subsection GUM Message Sending and Unpacking Functions
+
+rtsBool initMoreBuffers(void);
+
+void sendFetch (globalAddr *ga, globalAddr *bqga, int load);
+void sendResume(globalAddr *rga, int nelem, rtsPackBuffer *data);
+void sendAck (GlobalTaskId task, int ngas, globalAddr *gagamap);
+void sendFish (GlobalTaskId destPE, GlobalTaskId origPE, int age, int history, int hunger);
+void sendFree (GlobalTaskId destPE, int nelem, P_ data);
+void sendSchedule(GlobalTaskId origPE, int nelem, rtsPackBuffer *data);
+
+//@node Message-Processing Functions
+//@subsection Message-Processing Functions
+
+void processMessages(void);
+void processFetches(void);
+void processTheRealFetches(void);
+
+//@node Miscellaneous Functions
+//@subsection Miscellaneous Functions
+
+void prepareFreeMsgBuffers(void);
+void freeRemoteGA (int pe, globalAddr *ga);
+void sendFreeMessages(void);
+
+GlobalTaskId choosePE(void);
+StgClosure *createBlockedFetch (globalAddr ga, globalAddr rga);
+void waitForTermination(void);
+
+void DebugPrintGAGAMap (globalAddr *gagamap, int nGAs);
+
+#endif /* PAR */
+#endif /* __HLC_H */
--- /dev/null
+/* ----------------------------------------------------------------------------
+ * Time-stamp: <Wed Jan 12 2000 13:32:25 Stardate: [-30]4193.86 hwloidl>
+ * $Id: HLComms.c,v 1.2 2000/01/13 14:34:07 hwloidl Exp $
+ *
+ * High Level Communications Routines (HLComms.lc)
+ *
+ * Contains the high-level routines (i.e. communication
+ * subsystem independent) used by GUM
+ *
+ * Phil Trinder, Glasgow University, 12 December 1994
+ * Adapted for new RTS
+ * Phil Trinder, Simon Marlow July 1998
+ * H-W. Loidl, Heriot-Watt University, November 1999
+ *
+ * ------------------------------------------------------------------------- */
+
+#ifdef PAR /* whole file */
+
+//@node High Level Communications Routines, , ,
+//@section High Level Communications Routines
+
+//@menu
+//* Macros etc::
+//* Includes::
+//* GUM Message Sending and Unpacking Functions::
+//* Message-Processing Functions::
+//* GUM Message Processor::
+//* Miscellaneous Functions::
+//* Index::
+//@end menu
+
+//@node Macros etc, Includes, High Level Communications Routines, High Level Communications Routines
+//@subsection Macros etc
+
+# ifndef _AIX
+# define NON_POSIX_SOURCE /* so says Solaris */
+# endif
+
+//@node Includes, GUM Message Sending and Unpacking Functions, Macros etc, High Level Communications Routines
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "Storage.h" // for recordMutable
+#include "HLC.h"
+#include "Parallel.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+#include "FetchMe.h" // for BLOCKED_FETCH_info etc
+#if defined(DEBUG)
+# include "ParallelDebug.h"
+#endif
+#include "StgMacros.h" // inlined IS_... fcts
+
+//@node GUM Message Sending and Unpacking Functions, Message-Processing Functions, Includes, High Level Communications Routines
+//@subsection GUM Message Sending and Unpacking Functions
+
+/*
+ * GUM Message Sending and Unpacking Functions
+ */
+
+/*
+ * Allocate space for message processing
+ */
+
+//@cindex gumPackBuffer
+static rtsPackBuffer *gumPackBuffer;
+
+//@cindex initMoreBuffers
+rtsBool
+initMoreBuffers(void)
+{
+ if ((gumPackBuffer = (rtsPackBuffer *)stgMallocWords(RtsFlags.ParFlags.packBufferSize,
+ "initMoreBuffers")) == NULL)
+ return rtsFalse;
+ return rtsTrue;
+}
+
+/*
+ * SendFetch packs the two global addresses and a load into a message +
+ * sends it.
+
+//@cindex FETCH
+
+ Structure of a FETCH message:
+
+ | GA 1 | GA 2 |
+ +------------------------------------+------+
+ | gtid | slot | weight | gtid | slot | load |
+ +------------------------------------+------+
+ */
+
+//@cindex sendFetch
+void
+sendFetch(globalAddr *rga, globalAddr *lga, int load)
+{
+ ASSERT(rga->weight > 0 && lga->weight > 0);
+ IF_PAR_DEBUG(fetch,
+ belch("** [%x] Sending Fetch for ((%x, %d, 0)); locally ((%x, %d, %x)), load = %d",
+ mytid,
+ rga->payload.gc.gtid, rga->payload.gc.slot,
+ lga->payload.gc.gtid, lga->payload.gc.slot, lga->weight,
+ load));
+
+
+ /* ToDo: Dump event
+ DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(rga->payload.gc.gtid),
+ GR_FETCH, CurrentTSO, (StgClosure *)(lga->payload.gc.slot),
+ 0, spark_queue_len(ADVISORY_POOL));
+ */
+
+ sendOpV(PP_FETCH, rga->payload.gc.gtid, 6,
+ (StgWord) rga->payload.gc.gtid, (StgWord) rga->payload.gc.slot,
+ (StgWord) lga->weight, (StgWord) lga->payload.gc.gtid,
+ (StgWord) lga->payload.gc.slot, (StgWord) load);
+}
+
+/*
+ * unpackFetch unpacks a FETCH message into two Global addresses and a load
+ * figure.
+*/
+
+//@cindex unpackFetch
+static void
+unpackFetch(globalAddr *lga, globalAddr *rga, int *load)
+{
+ long buf[6];
+
+ GetArgs(buf, 6);
+
+ IF_PAR_DEBUG(fetch,
+ belch("** [%x] Unpacking Fetch for ((%x, %d, 0)) to ((%x, %d, %x)), load = %d",
+ mytid,
+ (GlobalTaskId) buf[0], (int) buf[1],
+ (GlobalTaskId) buf[3], (int) buf[4], buf[2], buf[5]));
+
+ lga->weight = 1;
+ lga->payload.gc.gtid = (GlobalTaskId) buf[0];
+ lga->payload.gc.slot = (int) buf[1];
+
+ rga->weight = (unsigned) buf[2];
+ rga->payload.gc.gtid = (GlobalTaskId) buf[3];
+ rga->payload.gc.slot = (int) buf[4];
+
+ *load = (int) buf[5];
+
+ ASSERT(rga->weight > 0);
+}
+
+/*
+ * SendResume packs the remote blocking queue's GA and data into a message
+ * and sends it.
+
+//@cindex RESUME
+
+ Structure of a RESUME message:
+
+ -------------------------------
+ | weight | slot | n | data ...
+ -------------------------------
+
+ data is a packed graph represented as an rtsPackBuffer
+ n is the size of the graph (as returned by PackNearbyGraph) + packet hdr size
+ */
+
+//@cindex sendResume
+void
+sendResume(globalAddr *rga, int nelem, rtsPackBuffer *data) // StgPtr data)
+{
+ IF_PAR_DEBUG(resume,
+ PrintPacket(data);
+ belch("[] [%x] Sending Resume for ((%x, %d, %x))",
+ mytid,
+ rga->payload.gc.gtid, rga->payload.gc.slot, rga->weight));
+
+ sendOpNV(PP_RESUME, rga->payload.gc.gtid,
+ nelem + PACK_BUFFER_HDR_SIZE, (StgPtr)data,
+ 2, (rtsWeight) rga->weight, (StgWord) rga->payload.gc.slot);
+}
+
+/*
+ * unpackResume unpacks a Resume message into two Global addresses and
+ * a data array.
+ */
+
+//@cindex unpackResume
+static void
+unpackResume(globalAddr *lga, int *nelem, rtsPackBuffer *data)
+{
+ long buf[3];
+
+ GetArgs(buf, 3);
+
+ IF_PAR_DEBUG(resume,
+ belch("[] [%x] Unpacking Resume for ((%x, %d, %x))",
+ mytid, mytid,
+ (int) buf[1], (unsigned) buf[0]));
+
+ /*
+ RESUME event is written in awaken_blocked_queue
+ DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(lga->payload.gc.gtid),
+ GR_RESUME, END_TSO_QUEUE, (StgClosure *)NULL, 0, 0);
+ */
+
+ lga->weight = (unsigned) buf[0];
+ lga->payload.gc.gtid = mytid;
+ lga->payload.gc.slot = (int) buf[1];
+
+ *nelem = (int) buf[2]; // includes PACK_BUFFER_HDR_SIZE;
+ GetArgs(data, *nelem);
+ *nelem -= PACK_BUFFER_HDR_SIZE;
+}
+
+/*
+ * SendAck packs the global address being acknowledged, together with
+ * an array of global addresses for any closures shipped and sends them.
+
+//@cindex ACK
+
+ Structure of an ACK message:
+
+ | GA 1 | GA 2 |
+ +---------------------------------------------+-------
+ | weight | gtid | slot | weight | gtid | slot | ..... ngas times
+ + --------------------------------------------+-------
+
+ */
+
+//@cindex sendAck
+void
+sendAck(GlobalTaskId task, int ngas, globalAddr *gagamap)
+{
+ static long *buffer;
+ long *p;
+ int i;
+
+ buffer = (long *) gumPackBuffer;
+
+ for(i = 0, p = buffer; i < ngas; i++, p += 6) {
+ ASSERT(gagamap[1].weight > 0);
+ p[0] = (long) gagamap->weight;
+ p[1] = (long) gagamap->payload.gc.gtid;
+ p[2] = (long) gagamap->payload.gc.slot;
+ gagamap++;
+ p[3] = (long) gagamap->weight;
+ p[4] = (long) gagamap->payload.gc.gtid;
+ p[5] = (long) gagamap->payload.gc.slot;
+ gagamap++;
+ }
+ IF_PAR_DEBUG(ack,
+ belch(",, [%x] Sending Ack (%d pairs) to PE %x\n",
+ mytid, ngas, task));
+
+ sendOpN(PP_ACK, task, p - buffer, buffer);
+}
+
+/*
+ * unpackAck unpacks an Acknowledgement message into a Global address,
+ * a count of the number of global addresses following and a map of
+ * Global addresses
+ */
+
+//@cindex unpackAck
+static void
+unpackAck(int *ngas, globalAddr *gagamap)
+{
+ long GAarraysize;
+ long buf[6];
+
+ GetArgs(&GAarraysize, 1);
+
+ *ngas = GAarraysize / 6;
+
+ IF_PAR_DEBUG(ack,
+ belch(",, [%x] Unpacking Ack (%d pairs) on %x\n",
+ mytid, *ngas, mytid));
+
+ while (GAarraysize > 0) {
+ GetArgs(buf, 6);
+ gagamap->weight = (rtsWeight) buf[0];
+ gagamap->payload.gc.gtid = (GlobalTaskId) buf[1];
+ gagamap->payload.gc.slot = (int) buf[2];
+ gagamap++;
+ gagamap->weight = (rtsWeight) buf[3];
+ gagamap->payload.gc.gtid = (GlobalTaskId) buf[4];
+ gagamap->payload.gc.slot = (int) buf[5];
+ ASSERT(gagamap->weight > 0);
+ gagamap++;
+ GAarraysize -= 6;
+ }
+}
+
+/*
+ * SendFish packs the global address being acknowledged, together with
+ * an array of global addresses for any closures shipped and sends them.
+
+//@cindex FISH
+
+ Structure of a FISH message:
+
+ +----------------------------------+
+ | orig PE | age | history | hunger |
+ +----------------------------------+
+ */
+
+//@cindex sendFish
+void
+sendFish(GlobalTaskId destPE, GlobalTaskId origPE,
+ int age, int history, int hunger)
+{
+ IF_PAR_DEBUG(fish,
+ belch("$$ [%x] Sending Fish to %x (%d outstanding fishes)",
+ mytid, destPE, outstandingFishes));
+
+ sendOpV(PP_FISH, destPE, 4,
+ (StgWord) origPE, (StgWord) age, (StgWord) history, (StgWord) hunger);
+
+ if (origPE == mytid) {
+ //fishing = rtsTrue;
+ outstandingFishes++;
+ }
+}
+
+/*
+ * unpackFish unpacks a FISH message into the global task id of the
+ * originating PE and 3 data fields: the age, history and hunger of the
+ * fish. The history + hunger are not currently used.
+
+ */
+
+//@cindex unpackFish
+static void
+unpackFish(GlobalTaskId *origPE, int *age, int *history, int *hunger)
+{
+ long buf[4];
+
+ GetArgs(buf, 4);
+
+ IF_PAR_DEBUG(fish,
+ belch("$$ [%x] Unpacking Fish from PE %x (age=%d)",
+ mytid, (GlobalTaskId) buf[0], (int) buf[1]));
+
+ *origPE = (GlobalTaskId) buf[0];
+ *age = (int) buf[1];
+ *history = (int) buf[2];
+ *hunger = (int) buf[3];
+}
+
+/*
+ * SendFree sends (weight, slot) pairs for GAs that we no longer need
+ * references to.
+
+//@cindex FREE
+
+ Structure of a FREE message:
+
+ +-----------------------------
+ | n | weight_1 | slot_1 | ...
+ +-----------------------------
+ */
+//@cindex sendFree
+void
+sendFree(GlobalTaskId pe, int nelem, StgPtr data)
+{
+ IF_PAR_DEBUG(free,
+ belch("!! [%x] Sending Free (%d GAs) to %x",
+ mytid, nelem/2, pe));
+
+ sendOpN(PP_FREE, pe, nelem, data);
+}
+
+/*
+ * unpackFree unpacks a FREE message into the amount of data shipped and
+ * a data block.
+ */
+//@cindex unpackFree
+static void
+unpackFree(int *nelem, rtsPackBuffer *data)
+{
+ long buf[1];
+
+ GetArgs(buf, 1);
+ *nelem = (int) buf[0];
+
+ IF_PAR_DEBUG(free,
+ belch("!! [%x] Unpacking Free (%d GAs)",
+ mytid, *nelem/2));
+
+ GetArgs(data, *nelem);
+}
+
+/*
+ * SendSchedule sends a closure to be evaluated in response to a Fish
+ * message. The message is directed to the PE that originated the Fish
+ * (origPE), and includes the packed closure (data) along with its size
+ * (nelem).
+
+//@cindex SCHEDULE
+
+ Structure of a SCHEDULE message:
+
+ +------------------------------------
+ | PE | n | pack buffer of a graph ...
+ +------------------------------------
+ */
+//@cindex sendSchedule
+void
+sendSchedule(GlobalTaskId origPE, int nelem, rtsPackBuffer *data) // StgPtr data)
+{
+ IF_PAR_DEBUG(schedule,
+ PrintPacket(data);
+ belch("-- [%x] Sending Schedule (%d elems) to %x\n",
+ mytid, nelem, origPE));
+
+ sendOpN(PP_SCHEDULE, origPE, nelem + PACK_BUFFER_HDR_SIZE, (StgPtr)data);
+}
+
+/*
+ * unpackSchedule unpacks a SCHEDULE message into the Global address of
+ * the closure shipped, the amount of data shipped (nelem) and the data
+ * block (data).
+ */
+
+//@cindex unpackSchedule
+static void
+unpackSchedule(int *nelem, rtsPackBuffer *data)
+{
+ long buf[1];
+
+ GetArgs(buf, 1);
+ /* no. of elems, not counting the header of the pack buffer */
+ *nelem = (int) buf[0] - PACK_BUFFER_HDR_SIZE;
+
+ IF_PAR_DEBUG(schedule,
+ belch("-- [%x] Unpacking Schedule (%d elems) on %x\n",
+ mytid, *nelem));
+
+ /* automatic cast of flat pvm-data to rtsPackBuffer */
+ GetArgs(data, *nelem + PACK_BUFFER_HDR_SIZE);
+}
+
+//@node Message-Processing Functions, GUM Message Processor, GUM Message Sending and Unpacking Functions, High Level Communications Routines
+//@subsection Message-Processing Functions
+
+/*
+ * Message-Processing Functions
+ *
+ * The following routines process incoming GUM messages. Often reissuing
+ * messages in response.
+ *
+ * processFish unpacks a fish message, reissuing it if it's our own,
+ * sending work if we have it or sending it onwards otherwise.
+ */
+
+/*
+ * blockFetch blocks a BlockedFetch node on some kind of black hole.
+ */
+//@cindex blockFetch
+static void
+blockFetch(StgBlockedFetch *bf, StgClosure *bh) {
+ bf->node = bh;
+ switch (get_itbl(bh)->type) {
+ case BLACKHOLE:
+ bf->link = END_BQ_QUEUE;
+ //((StgBlockingQueue *)bh)->header.info = &BLACKHOLE_BQ_info;
+ SET_INFO(bh, &BLACKHOLE_BQ_info); // turn closure into a blocking queue
+ ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list
+ recordMutable((StgMutClosure *)bh);
+
+# if 0
+ /*
+ * If we modify a black hole in the old generation, we have to
+ * make sure it goes on the mutables list
+ */
+
+ if (bh <= StorageMgrInfo.OldLim) {
+ MUT_LINK(bh) = (StgWord) StorageMgrInfo.OldMutables;
+ StorageMgrInfo.OldMutables = bh;
+ } else
+ MUT_LINK(bh) = MUT_NOT_LINKED;
+# endif
+ break;
+
+ case BLACKHOLE_BQ:
+ /* enqueue bf on blocking queue of closure bh */
+ bf->link = ((StgBlockingQueue *)bh)->blocking_queue;
+ ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list; ToDo: check
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case FETCH_ME_BQ:
+ /* enqueue bf on blocking queue of closure bh */
+ bf->link = ((StgFetchMeBlockingQueue *)bh)->blocking_queue;
+ ((StgFetchMeBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list; ToDo: check
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case RBH:
+ /* enqueue bf on blocking queue of closure bh */
+ bf->link = ((StgRBH *)bh)->blocking_queue;
+ ((StgRBH *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list; ToDo: check
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ default:
+ barf("Panic (blockFetch): thought %p was a black hole (IP %#lx, %s)",
+ (StgClosure *)bh, get_itbl((StgClosure *)bh),
+ info_type((StgClosure *)bh));
+ }
+ IF_PAR_DEBUG(verbose,
+ belch("## blockFetch: after block the BQ of %p (%s) is:",
+ bh, info_type(bh));
+ print_bq(bh));
+}
+
+
+/*
+ * processFetches constructs and sends resume messages for every
+ * BlockedFetch which is ready to be awakened.
+ * awaken_blocked_queue (in Schedule.c) is responsible for moving
+ * BlockedFetches from a blocking queue to the PendingFetches queue.
+ */
+void GetRoots(void);
+extern StgBlockedFetch *PendingFetches;
+
+nat
+pending_fetches_len(void)
+{
+ StgBlockedFetch *bf;
+ nat n;
+
+ for (n=0, bf=PendingFetches; bf != END_BF_QUEUE; n++, bf = (StgBlockedFetch *)(bf->link)) {
+ ASSERT(get_itbl(bf)->type==BLOCKED_FETCH);
+ }
+ return n;
+}
+
+//@cindex processFetches
+void
+processFetches(void) {
+ StgBlockedFetch *bf;
+ StgClosure *closure, *next;
+ StgInfoTable *ip;
+ globalAddr rga;
+ static rtsPackBuffer *packBuffer;
+
+ IF_PAR_DEBUG(verbose,
+ belch("__ processFetches: %d pending fetches",
+ pending_fetches_len()));
+
+ for (bf = PendingFetches;
+ bf != END_BF_QUEUE;
+ bf=(StgBlockedFetch *)(bf->link)) {
+ /* the PendingFetches list contains only BLOCKED_FETCH closures */
+ ASSERT(get_itbl(bf)->type==BLOCKED_FETCH);
+
+ /*
+ * Find the target at the end of the indirection chain, and
+ * process it in much the same fashion as the original target
+ * of the fetch. Though we hope to find graph here, we could
+ * find a black hole (of any flavor) or even a FetchMe.
+ */
+ closure = bf->node;
+ /*
+ HACK 312: bf->node may have been evacuated since filling it; follow
+ the evacuee in this case; the proper way to handle this is to
+ traverse the blocking queue and update the node fields of
+ BLOCKED_FETCH entries when evacuating an BLACKHOLE_BQ, FETCH_ME_BQ
+ or RBH (but it's late and I'm tired)
+ */
+ if (get_itbl(closure)->type == EVACUATED)
+ closure = ((StgEvacuated *)closure)->evacuee;
+
+ while ((next = IS_INDIRECTION(closure)) != NULL) { closure = next; }
+
+ ip = get_itbl(closure);
+ if (ip->type == FETCH_ME) {
+ /* Forward the Fetch to someone else */
+ rga.payload.gc.gtid = bf->ga.payload.gc.gtid;
+ rga.payload.gc.slot = bf->ga.payload.gc.slot;
+ rga.weight = bf->ga.weight;
+
+ sendFetch(((StgFetchMe *)closure)->ga, &rga, 0 /* load */);
+
+ IF_PAR_DEBUG(forward,
+ belch("__ processFetches: Forwarding fetch from %lx to %lx",
+ mytid, rga.payload.gc.gtid));
+
+ } else if (IS_BLACK_HOLE(closure)) {
+ IF_PAR_DEBUG(verbose,
+ belch("__ processFetches: trying to send a BLACK_HOLE => doign a blockFetch on closure %p (%s)",
+ closure, info_type(closure)));
+ bf->node = closure;
+ blockFetch(bf, closure);
+ } else {
+ /* We now have some local graph to send back */
+ nat size;
+
+ packBuffer = gumPackBuffer;
+ IF_PAR_DEBUG(verbose,
+ belch("__ processFetches: PackNearbyGraph of closure %p (%s)",
+ closure, info_type(closure)));
+
+ if ((packBuffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size)) == NULL) {
+ // Put current BF back on list
+ bf->link = (StgBlockingQueueElement *)PendingFetches;
+ PendingFetches = (StgBlockedFetch *)bf;
+ // ToDo: check that nothing more has to be done to prepare for GC!
+ GarbageCollect(GetRoots);
+ bf = PendingFetches;
+ PendingFetches = (StgBlockedFetch *)(bf->link);
+ closure = bf->node;
+ packBuffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size);
+ ASSERT(packBuffer != (rtsPackBuffer *)NULL);
+ }
+ rga.payload.gc.gtid = bf->ga.payload.gc.gtid;
+ rga.payload.gc.slot = bf->ga.payload.gc.slot;
+ rga.weight = bf->ga.weight;
+
+ sendResume(&rga, size, packBuffer);
+ }
+ }
+ PendingFetches = END_BF_QUEUE;
+}
+
+#if 0
+/*
+ Alternatively to sending fetch messages directly from the FETCH_ME_entry
+ code we could just store the data about the remote data in a global
+ variable and send the fetch request from the main scheduling loop (similar
+ to processFetches above). This would save an expensive STGCALL in the entry
+ code because we have to go back to the scheduler anyway.
+*/
+//@cindex processFetches
+void
+processTheRealFetches(void) {
+ StgBlockedFetch *bf;
+ StgClosure *closure, *next;
+
+ IF_PAR_DEBUG(verbose,
+ belch("__ processTheRealFetches: ");
+ printGA(&theGlobalFromGA);
+ printGA(&theGlobalToGA));
+
+ ASSERT(theGlobalFromGA.payload.gc.gtid != 0 &&
+ theGlobalToGA.payload.gc.gtid != 0);
+
+ /* the old version did this in the FETCH_ME entry code */
+ sendFetch(&theGlobalFromGA, &theGlobalToGA, 0/*load*/);
+
+#if DEBUG
+ theGlobalFromGA.payload.gc.gtid = 0;
+ theGlobalToGA.payload.gc.gtid = 0;
+#endif DEBUG
+}
+#endif
+
+
+/*
+ * processFish unpacks a fish message, reissuing it if it's our own,
+ * sending work if we have it or sending it onwards otherwise.
+ */
+//@cindex processFish
+static void
+processFish(void)
+{
+ GlobalTaskId origPE;
+ int age, history, hunger;
+ rtsSpark spark;
+ static rtsPackBuffer *packBuffer;
+
+ unpackFish(&origPE, &age, &history, &hunger);
+
+ if (origPE == mytid) {
+ //fishing = rtsFalse; // fish has come home
+ outstandingFishes--;
+ last_fish_arrived_at = CURRENT_TIME; // remember time (see schedule fct)
+ return; // that's all
+ }
+
+ ASSERT(origPE != mytid);
+ IF_PAR_DEBUG(fish,
+ belch("$$ [%x] processing fish; %d sparks available",
+ mytid, spark_queue_len(ADVISORY_POOL)));
+ while ((spark = findLocalSpark(rtsTrue)) != NULL) {
+ nat size;
+ // StgClosure *graph;
+
+ packBuffer = gumPackBuffer;
+ ASSERT(closure_SHOULD_SPARK((StgClosure *)spark));
+ if ((packBuffer = PackNearbyGraph(spark, END_TSO_QUEUE, &size)) == NULL) {
+ IF_PAR_DEBUG(fish,
+ belch("$$ GC while trying to satisfy FISH via PackNearbyGraph of node %p",
+ (StgClosure *)spark));
+ GarbageCollect(GetRoots);
+ /* Now go back and try again */
+ } else {
+ IF_PAR_DEBUG(fish,
+ belch("$$ [%x] Replying to FISH from %x by sending graph @ %p (%s)",
+ mytid, origPE,
+ (StgClosure *)spark, info_type((StgClosure *)spark)));
+ sendSchedule(origPE, size, packBuffer);
+ disposeSpark(spark);
+ break;
+ }
+ }
+ if (spark == (rtsSpark)NULL) {
+ IF_PAR_DEBUG(fish,
+ belch("$$ [%x] No sparks available for FISH from %x",
+ mytid, origPE));
+ /* We have no sparks to give */
+ if (age < FISH_LIFE_EXPECTANCY)
+ /* and the fish is atill young, send it to another PE to look for work */
+ sendFish(choosePE(), origPE,
+ (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER);
+
+ /* otherwise, send it home to die */
+ else
+ sendFish(origPE, origPE, (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER);
+ }
+} /* processFish */
+
+/*
+ * processFetch either returns the requested data (if available)
+ * or blocks the remote blocking queue on a black hole (if not).
+ */
+
+//@cindex processFetch
+static void
+processFetch(void)
+{
+ globalAddr ga, rga;
+ int load;
+ StgClosure *closure;
+ StgInfoTable *ip;
+
+ unpackFetch(&ga, &rga, &load);
+ IF_PAR_DEBUG(fetch,
+ belch("%% [%x] Rcvd Fetch for ((%x, %d, 0)), Resume ((%x, %d, %x)) (load %d) from %x",
+ mytid,
+ ga.payload.gc.gtid, ga.payload.gc.slot,
+ rga.payload.gc.gtid, rga.payload.gc.slot, rga.weight, load,
+ rga.payload.gc.gtid));
+
+ closure = GALAlookup(&ga);
+ ASSERT(closure != (StgClosure *)NULL);
+ ip = get_itbl(closure);
+ if (ip->type == FETCH_ME) {
+ /* Forward the Fetch to someone else */
+ sendFetch(((StgFetchMe *)closure)->ga, &rga, load);
+ } else if (rga.payload.gc.gtid == mytid) {
+ /* Our own FETCH forwarded back around to us */
+ StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)GALAlookup(&rga);
+
+ IF_PAR_DEBUG(fetch,
+ belch("%% [%x] Fetch returned to sending PE; closure=%p (%s); receiver=%p (%s)",
+ mytid, closure, info_type(closure), fmbq, info_type(fmbq)));
+ /* We may have already discovered that the fetch target is our own. */
+ if ((StgClosure *)fmbq != closure)
+ CommonUp((StgClosure *)fmbq, closure);
+ (void) addWeight(&rga);
+ } else if (IS_BLACK_HOLE(closure)) {
+ /* This includes RBH's and FMBQ's */
+ StgBlockedFetch *bf;
+
+ ASSERT(GALAlookup(&rga) == NULL);
+
+ /* If we're hitting a BH or RBH or FMBQ we have to put a BLOCKED_FETCH
+ closure into the BQ in order to denote that when updating this node
+ the result should be sent to the originator of this fetch message. */
+ bf = (StgBlockedFetch *)createBlockedFetch(ga, rga);
+ blockFetch(bf, closure);
+
+ IF_PAR_DEBUG(fetch,
+ belch("%% [%x] Blocking Fetch ((%x, %d, %x)) on %p (%s)",
+ mytid,
+ rga.payload.gc.gtid, rga.payload.gc.slot, rga.weight,
+ closure, info_type(closure)));
+ } else {
+ /* The target of the FetchMe is some local graph */
+ nat size;
+ // StgClosure *graph;
+ rtsPackBuffer *buffer = (rtsPackBuffer *)NULL;
+
+ if ((buffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size)) == NULL) {
+ GarbageCollect(GetRoots);
+ closure = GALAlookup(&ga);
+ buffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size);
+ ASSERT(buffer != (rtsPackBuffer *)NULL);
+ }
+ sendResume(&rga, size, buffer);
+ }
+}
+
+/*
+ * processFree unpacks a FREE message and adds the weights to our GAs.
+ */
+//@cindex processFree
+static void
+processFree(void)
+{
+ int nelem;
+ static StgWord *buffer;
+ int i;
+ globalAddr ga;
+
+ buffer = (StgWord *)gumPackBuffer;
+ unpackFree(&nelem, buffer);
+ IF_PAR_DEBUG(free,
+ belch("!! [%x] Rcvd Free (%d GAs)", mytid, nelem / 2));
+
+ ga.payload.gc.gtid = mytid;
+ for (i = 0; i < nelem;) {
+ ga.weight = (rtsWeight) buffer[i++];
+ ga.payload.gc.slot = (int) buffer[i++];
+ IF_PAR_DEBUG(free,
+ fprintf(stderr, "!! [%x] Processing free ", mytid);
+ printGA(&ga);
+ fputc('\n', stderr);
+ );
+ (void) addWeight(&ga);
+ }
+}
+
+/*
+ * processResume unpacks a RESUME message into the graph, filling in
+ * the LA -> GA, and GA -> LA tables. Threads blocked on the original
+ * FetchMe (now a blocking queue) are awakened, and the blocking queue
+ * is converted into an indirection. Finally it sends an ACK in response
+ * which contains any newly allocated GAs.
+ */
+
+//@cindex processResume
+static void
+processResume(GlobalTaskId sender)
+{
+ int nelem;
+ nat nGAs;
+ static rtsPackBuffer *packBuffer;
+ StgClosure *newGraph, *old;
+ globalAddr lga;
+ globalAddr *gagamap;
+
+ packBuffer = gumPackBuffer;
+ unpackResume(&lga, &nelem, (StgPtr)packBuffer);
+
+ IF_PAR_DEBUG(resume,
+ fprintf(stderr, "[] [%x] Rcvd Resume for ", mytid);
+ printGA(&lga);
+ fputc('\n', stderr);
+ PrintPacket((rtsPackBuffer *)packBuffer));
+
+ /*
+ * We always unpack the incoming graph, even if we've received the
+ * requested node in some other data packet (and already awakened
+ * the blocking queue).
+ if (SAVE_Hp + packBuffer[0] >= SAVE_HpLim) {
+ ReallyPerformThreadGC(packBuffer[0], rtsFalse);
+ SAVE_Hp -= packBuffer[0];
+ }
+ */
+
+ // ToDo: Check for GC here !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ /* Do this *after* GC; we don't want to release the object early! */
+
+ if (lga.weight > 0)
+ (void) addWeight(&lga);
+
+ old = GALAlookup(&lga);
+
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ // StgTSO *tso = END_TSO_QUEUE;
+ StgBlockingQueueElement *bqe;
+
+ /* Write REPLY events to the log file, indicating that the remote
+ data has arrived */
+ if (get_itbl(old)->type == FETCH_ME_BQ ||
+ get_itbl(old)->type == RBH)
+ for (bqe = ((StgFetchMeBlockingQueue *)old)->blocking_queue;
+ bqe->link != END_BQ_QUEUE;
+ bqe = bqe->link)
+ if (get_itbl((StgClosure *)bqe)->type == TSO)
+ DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(sender),
+ GR_REPLY, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure,
+ 0, spark_queue_len(ADVISORY_POOL));
+ }
+
+ newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs);
+ ASSERT(newGraph != NULL);
+
+ /*
+ * Sometimes, unpacking will common up the resumee with the
+ * incoming graph, but if it hasn't, we'd better do so now.
+ */
+
+ if (get_itbl(old)->type == FETCH_ME_BQ)
+ CommonUp(old, newGraph);
+
+ IF_PAR_DEBUG(resume,
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ sendAck(sender, nGAs, gagamap);
+}
+
+/*
+ * processSchedule unpacks a SCHEDULE message into the graph, filling
+ * in the LA -> GA, and GA -> LA tables. The root of the graph is added to
+ * the local spark queue. Finally it sends an ACK in response
+ * which contains any newly allocated GAs.
+ */
+//@cindex processSchedule
+static void
+processSchedule(GlobalTaskId sender)
+{
+ nat nelem, space_required, nGAs;
+ rtsBool success;
+ static rtsPackBuffer *packBuffer;
+ StgClosure *newGraph;
+ globalAddr *gagamap;
+
+ packBuffer = gumPackBuffer; /* HWL */
+ unpackSchedule(&nelem, packBuffer);
+
+ IF_PAR_DEBUG(schedule,
+ belch("-- [%x] Rcvd Schedule (%d elems)", mytid, nelem);
+ PrintPacket(packBuffer));
+
+ /*
+ * For now, the graph is a closure to be sparked as an advisory
+ * spark, but in future it may be a complete spark with
+ * required/advisory status, priority etc.
+ */
+
+ /*
+ space_required = packBuffer[0];
+ if (SAVE_Hp + space_required >= SAVE_HpLim) {
+ ReallyPerformThreadGC(space_required, rtsFalse);
+ SAVE_Hp -= space_required;
+ }
+ */
+ // ToDo: check whether GC is necessary !!!!!!!!!!!!!!!!!!!!!1
+ newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs);
+ ASSERT(newGraph != NULL);
+ success = add_to_spark_queue(newGraph, rtsFalse);
+
+ IF_PAR_DEBUG(pack,
+ if (success)
+ belch("+* added spark to unpacked graph %p; %d sparks available on [%x]",
+ newGraph, spark_queue_len(ADVISORY_POOL), mytid);
+ else
+ belch("+* received non-sparkable closure %p; nothing added to spark pool; %d sparks available on [%x]",
+ newGraph, spark_queue_len(ADVISORY_POOL), mytid);
+ belch("-* Unpacked graph with root at %p (%s):",
+ newGraph, info_type(newGraph));
+ PrintGraph(newGraph, 0));
+
+ IF_PAR_DEBUG(pack,
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ if (nGAs > 0)
+ sendAck(sender, nGAs, gagamap);
+
+ //fishing = rtsFalse;
+ ASSERT(outstandingFishes>0);
+ outstandingFishes--;
+}
+
+/*
+ * processAck unpacks an ACK, and uses the GAGA map to convert RBH's
+ * (which represent shared thunks that have been shipped) into fetch-mes
+ * to remote GAs.
+ */
+//@cindex processAck
+static void
+processAck(void)
+{
+ nat nGAs;
+ globalAddr *gaga;
+ globalAddr gagamap[256]; // ToDo: elim magic constant!! MAX_GAS * 2];??
+
+ unpackAck(&nGAs, gagamap);
+
+ IF_PAR_DEBUG(ack,
+ belch(",, [%x] Rcvd Ack (%d pairs)", mytid, nGAs);
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ /*
+ * For each (oldGA, newGA) pair, set the GA of the corresponding
+ * thunk to the newGA, convert the thunk to a FetchMe, and return
+ * the weight from the oldGA.
+ */
+ for (gaga = gagamap; gaga < gagamap + nGAs * 2; gaga += 2) {
+ StgClosure *old_closure = GALAlookup(gaga);
+ StgClosure *new_closure = GALAlookup(gaga + 1);
+
+ ASSERT(old_closure != NULL);
+ if (new_closure == NULL) {
+ /* We don't have this closure, so we make a fetchme for it */
+ globalAddr *ga = setRemoteGA(old_closure, gaga + 1, rtsTrue);
+
+ /* convertToFetchMe should be done unconditionally here.
+ Currently, we assign GAs to CONSTRs, too, (a bit of a hack),
+ so we have to check whether it is an RBH before converting
+
+ ASSERT(get_itbl(old_closure)==RBH);
+ */
+ if (get_itbl(old_closure)->type==RBH)
+ convertToFetchMe(old_closure, ga);
+ } else {
+ /*
+ * Oops...we've got this one already; update the RBH to
+ * point to the object we already know about, whatever it
+ * happens to be.
+ */
+ CommonUp(old_closure, new_closure);
+
+ /*
+ * Increase the weight of the object by the amount just
+ * received in the second part of the ACK pair.
+ */
+ (void) addWeight(gaga + 1);
+ }
+ (void) addWeight(gaga);
+ }
+}
+
+//@node GUM Message Processor, Miscellaneous Functions, Message-Processing Functions, High Level Communications Routines
+//@subsection GUM Message Processor
+
+/*
+ * GUM Message Processor
+
+ * processMessages processes any messages that have arrived, calling
+ * appropriate routines depending on the message tag
+ * (opcode). N.B. Unless profiling it assumes that there {\em ARE} messages
+ * present and performs a blocking receive! During profiling it
+ * busy-waits in order to record idle time.
+ */
+
+//@cindex processMessages
+void
+processMessages(void)
+{
+ rtsPacket packet;
+ OpCode opcode;
+ GlobalTaskId task;
+
+ do {
+ packet = GetPacket(); /* Get next message; block until one available */
+ getOpcodeAndSender(packet, &opcode, &task);
+
+ switch (opcode) {
+ case PP_FINISH:
+ IF_PAR_DEBUG(verbose,
+ belch("== [%x] received FINISH", mytid));
+ /* setting this global variables eventually terminates the main
+ scheduling loop for this PE and causes a shut-down, sending
+ PP_FINISH to SysMan */
+ GlobalStopPending = rtsTrue;
+ break;
+
+ case PP_FETCH:
+ processFetch();
+ break;
+
+ case PP_RESUME:
+ processResume(task);
+ break;
+
+ case PP_ACK:
+ processAck();
+ break;
+
+ case PP_FISH:
+ processFish();
+ break;
+
+ case PP_FREE:
+ processFree();
+ break;
+
+ case PP_SCHEDULE:
+ processSchedule(task);
+ break;
+
+ default:
+ /* Anything we're not prepared to deal with. */
+ barf("Task %x: Unexpected opcode %x from %x",
+ mytid, opcode, task);
+ } /* switch */
+
+ } while (PacketsWaiting()); /* While there are messages: process them */
+} /* processMessages */
+
+//@node Miscellaneous Functions, Index, GUM Message Processor, High Level Communications Routines
+//@subsection Miscellaneous Functions
+
+/*
+ * ChoosePE selects a GlobalTaskId from the array of PEs 'at random'.
+ * Important properties:
+ * - it varies during execution, even if the PE is idle
+ * - it's different for each PE
+ * - we never send a fish to ourselves
+ */
+extern long lrand48 (void);
+
+//@cindex choosePE
+GlobalTaskId
+choosePE(void)
+{
+ long temp;
+
+ temp = lrand48() % nPEs;
+ if (allPEs[temp] == mytid) { /* Never send a FISH to yourself */
+ temp = (temp + 1) % nPEs;
+ }
+ return allPEs[temp];
+}
+
+/*
+ * allocate a BLOCKED_FETCH closure and fill it with the relevant fields
+ * of the ga argument; called from processFetch when the local closure is
+ * under evaluation
+ */
+//@cindex createBlockedFetch
+StgClosure *
+createBlockedFetch (globalAddr ga, globalAddr rga)
+{
+ StgBlockedFetch *bf;
+ StgClosure *closure;
+
+ closure = GALAlookup(&ga);
+ if ((bf = (StgBlockedFetch *)allocate(FIXED_HS + sizeofW(StgBlockedFetch))) == NULL) {
+ GarbageCollect(GetRoots);
+ closure = GALAlookup(&ga);
+ bf = (StgBlockedFetch *)allocate(FIXED_HS + sizeofW(StgBlockedFetch));
+ // ToDo: check whether really guaranteed to succeed 2nd time around
+ }
+
+ ASSERT(bf != (StgClosure *)NULL);
+ SET_INFO((StgClosure *)bf, &BLOCKED_FETCH_info);
+ // ToDo: check whether other header info is needed
+ bf->node = closure;
+ bf->ga.payload.gc.gtid = rga.payload.gc.gtid;
+ bf->ga.payload.gc.slot = rga.payload.gc.slot;
+ bf->ga.weight = rga.weight;
+ // bf->link = NULL; debugging
+
+ IF_PAR_DEBUG(fetch,
+ fprintf(stderr, "%% [%x] created BF: closure=%p (%s), GA: ",
+ mytid, closure, info_type(closure));
+ printGA(&(bf->ga));
+ fputc('\n',stderr));
+ return bf;
+}
+
+/*
+ * waitForTermination enters a loop ignoring spurious messages while
+ * waiting for the termination sequence to be completed.
+ */
+//@cindex waitForTermination
+void
+waitForTermination(void)
+{
+ do {
+ rtsPacket p = GetPacket();
+ processUnexpected(p);
+ } while (rtsTrue);
+}
+
+#ifdef DEBUG
+//@cindex DebugPrintGAGAMap
+void
+DebugPrintGAGAMap(globalAddr *gagamap, int nGAs)
+{
+ int i;
+
+ for (i = 0; i < nGAs; ++i, gagamap += 2)
+ fprintf(stderr, "gagamap[%d] = ((%x, %d, %x)) -> ((%x, %d, %x))\n", i,
+ gagamap[0].payload.gc.gtid, gagamap[0].payload.gc.slot, gagamap[0].weight,
+ gagamap[1].payload.gc.gtid, gagamap[1].payload.gc.slot, gagamap[1].weight);
+}
+#endif
+
+//@cindex freeMsgBuffer
+static StgWord **freeMsgBuffer = NULL;
+//@cindex freeMsgIndex
+static int *freeMsgIndex = NULL;
+
+//@cindex prepareFreeMsgBuffers
+void
+prepareFreeMsgBuffers(void)
+{
+ int i;
+
+ /* Allocate the freeMsg buffers just once and then hang onto them. */
+ if (freeMsgIndex == NULL) {
+ freeMsgIndex = (int *) stgMallocBytes(nPEs * sizeof(int),
+ "prepareFreeMsgBuffers (Index)");
+ freeMsgBuffer = (StgWord **) stgMallocBytes(nPEs * sizeof(long *),
+ "prepareFreeMsgBuffers (Buffer)");
+
+ for(i = 0; i < nPEs; i++)
+ if (i != thisPE)
+ freeMsgBuffer[i] = (StgPtr) stgMallocWords(RtsFlags.ParFlags.packBufferSize,
+ "prepareFreeMsgBuffers (Buffer #i)");
+ }
+
+ /* Initialize the freeMsg buffer pointers to point to the start of their
+ buffers */
+ for (i = 0; i < nPEs; i++)
+ freeMsgIndex[i] = 0;
+}
+
+//@cindex freeRemoteGA
+void
+freeRemoteGA(int pe, globalAddr *ga)
+{
+ int i;
+
+ ASSERT(GALAlookup(ga) == NULL);
+
+ if ((i = freeMsgIndex[pe]) + 2 >= RtsFlags.ParFlags.packBufferSize) {
+ IF_PAR_DEBUG(free,
+ belch("Filled a free message buffer (sending remaining messages indivisually)"));
+
+ sendFree(ga->payload.gc.gtid, i, freeMsgBuffer[pe]);
+ i = 0;
+ }
+ freeMsgBuffer[pe][i++] = (StgWord) ga->weight;
+ freeMsgBuffer[pe][i++] = (StgWord) ga->payload.gc.slot;
+ freeMsgIndex[pe] = i;
+
+#ifdef DEBUG
+ ga->weight = 0x0f0f0f0f;
+ ga->payload.gc.gtid = 0x666;
+ ga->payload.gc.slot = 0xdeaddead;
+#endif
+}
+
+//@cindex sendFreeMessages
+void
+sendFreeMessages(void)
+{
+ int i;
+
+ for (i = 0; i < nPEs; i++)
+ if (freeMsgIndex[i] > 0)
+ sendFree(allPEs[i], freeMsgIndex[i], freeMsgBuffer[i]);
+}
+
+#endif /* PAR -- whole file */
+
+//@node Index, , Miscellaneous Functions, High Level Communications Routines
+//@subsection Index
+
+//@index
+//* ACK:: @cindex\s-+ACK
+//* DebugPrintGAGAMap:: @cindex\s-+DebugPrintGAGAMap
+//* FETCH:: @cindex\s-+FETCH
+//* FISH:: @cindex\s-+FISH
+//* FREE:: @cindex\s-+FREE
+//* RESUME:: @cindex\s-+RESUME
+//* SCHEDULE:: @cindex\s-+SCHEDULE
+//* blockFetch:: @cindex\s-+blockFetch
+//* choosePE:: @cindex\s-+choosePE
+//* freeMsgBuffer:: @cindex\s-+freeMsgBuffer
+//* freeMsgIndex:: @cindex\s-+freeMsgIndex
+//* freeRemoteGA:: @cindex\s-+freeRemoteGA
+//* gumPackBuffer:: @cindex\s-+gumPackBuffer
+//* initMoreBuffers:: @cindex\s-+initMoreBuffers
+//* prepareFreeMsgBuffers:: @cindex\s-+prepareFreeMsgBuffers
+//* processAck:: @cindex\s-+processAck
+//* processFetch:: @cindex\s-+processFetch
+//* processFetches:: @cindex\s-+processFetches
+//* processFish:: @cindex\s-+processFish
+//* processFree:: @cindex\s-+processFree
+//* processMessages:: @cindex\s-+processMessages
+//* processResume:: @cindex\s-+processResume
+//* processSchedule:: @cindex\s-+processSchedule
+//* sendAck:: @cindex\s-+sendAck
+//* sendFetch:: @cindex\s-+sendFetch
+//* sendFish:: @cindex\s-+sendFish
+//* sendFree:: @cindex\s-+sendFree
+//* sendFreeMessages:: @cindex\s-+sendFreeMessages
+//* sendResume:: @cindex\s-+sendResume
+//* sendSchedule:: @cindex\s-+sendSchedule
+//* unpackAck:: @cindex\s-+unpackAck
+//* unpackFetch:: @cindex\s-+unpackFetch
+//* unpackFish:: @cindex\s-+unpackFish
+//* unpackFree:: @cindex\s-+unpackFree
+//* unpackResume:: @cindex\s-+unpackResume
+//* unpackSchedule:: @cindex\s-+unpackSchedule
+//* waitForTermination:: @cindex\s-+waitForTermination
+//@end index
--- /dev/null
+/* --------------------------------------------------------------------------
+ Time-stamp: <Wed Nov 17 1999 16:50:58 Stardate: [-30]3913.51 hwloidl>
+ $Id: LLC.h,v 1.2 2000/01/13 14:34:07 hwloidl Exp $
+
+ Low Level Communications Header (LLC.h)
+
+ Contains the definitions used by the Low-level Communications
+ module of the GUM Haskell runtime environment.
+ Based on the Graph for PVM implementation.
+
+ Phil Trinder, Glasgow University, 13th Dec 1994
+ Adapted for the 4.xx RTS
+ H-W. Loidl, Heriot-Watt, November 1999
+ ----------------------------------------------------------------------- */
+
+#ifndef __LLC_H
+#define __LLC_H
+
+#ifdef PAR
+
+//@node Low Level Communications Header, , ,
+//@section Low Level Communications Header
+
+//@menu
+//* Includes::
+//* Macros and Constants::
+//* PVM macros::
+//* Externs::
+//@end menu
+
+//@node Includes, Macros and Constants, Low Level Communications Header, Low Level Communications Header
+//@subsection Includes
+
+#include "Rts.h"
+#include "Parallel.h"
+
+#include "PEOpCodes.h"
+#include "pvm3.h"
+
+//@node Macros and Constants, PVM macros, Includes, Low Level Communications Header
+//@subsection Macros and Constants
+
+#define ANY_TASK (-1) /* receive messages from any task */
+#define ANY_GLOBAL_TASK ANY_TASK
+#define ANY_OPCODE (-1) /* receive any opcode */
+#define ALL_GROUP (-1) /* wait for barrier from every group member */
+
+#define PEGROUP "PE"
+
+#define MGRGROUP "MGR"
+#define PECTLGROUP "PECTL"
+
+
+#define PETASK "PE"
+
+//@node PVM macros, Externs, Macros and Constants, Low Level Communications Header
+//@subsection PVM macros
+
+#define sync(gp,op) do { \
+ broadcast(gp,op); \
+ pvm_barrier(gp,ALL_GROUP); \
+ } while(0)
+
+#define broadcast(gp,op) do { \
+ pvm_initsend(PvmDataDefault); \
+ pvm_bcast(gp,op); \
+ } while(0)
+
+#define checkComms(c,s) do { \
+ if ((c)<0) { \
+ pvm_perror(s); \
+ stg_exit(EXIT_FAILURE); \
+ }} while(0)
+
+#define _my_gtid pvm_mytid()
+#define GetPacket() pvm_recv(ANY_TASK,ANY_OPCODE)
+#define PacketsWaiting() (pvm_probe(ANY_TASK,ANY_OPCODE) != 0)
+
+#define SPARK_THREAD_DESCRIPTOR 1
+#define GLOBAL_THREAD_DESCRIPTOR 2
+
+#define _extract_jump_field(v) (v)
+
+#define MAX_DATA_WORDS_IN_PACKET 1024
+
+/* basic PVM packing */
+#define PutArg1(a) pvm_pklong(&(a),1,1)
+#define PutArg2(a) pvm_pklong(&(a),1,1)
+#define PutArgN(n,a) pvm_pklong(&(a),1,1)
+#define PutArgs(b,n) pvm_pklong(b,n,1)
+
+#define PutLit(l) { int a = l; PutArgN(?,a); }
+
+/* basic PVM unpacking */
+#define GetArg1(a) pvm_upklong(&(a),1,1)
+#define GetArg2(a) pvm_upklong(&(a),1,1)
+#define GetArgN(n,a) pvm_upklong(&(a),1,1)
+#define GetArgs(b,n) pvm_upklong(b,n,1)
+
+//@node Externs, , PVM macros, Low Level Communications Header
+//@subsection Externs
+
+/* basic message passing routines */
+extern void sendOp (OpCode,GlobalTaskId),
+ sendOp1 (OpCode,GlobalTaskId,StgWord),
+ sendOp2 (OpCode,GlobalTaskId,StgWord,StgWord),
+ sendOpV (OpCode,GlobalTaskId,int,...),
+ sendOpN (OpCode,GlobalTaskId,int,StgPtr),
+ sendOpNV (OpCode,GlobalTaskId,int,StgPtr,int,...);
+
+/* extracting data out of a packet */
+OpCode getOpcode (rtsPacket p);
+void getOpcodeAndSender (rtsPacket p, OpCode *popcode,
+ GlobalTaskId *psender_id);
+GlobalTaskId senderTask (rtsPacket p);
+rtsPacket waitForPEOp (OpCode op, GlobalTaskId who);
+
+/* Init and shutdown routines */
+GlobalTaskId *startUpPE (unsigned nPEs);
+void shutDownPE(void);
+
+/* aux functions */
+char *getOpName (unsigned op); // returns string of opcode
+void processUnexpected (rtsPacket);
+//void NullException(void);
+
+#endif /*PAR */
+#endif /*defined __LLC_H */
--- /dev/null
+/* ----------------------------------------------------------------------------
+ * Time-stamp: <Wed Jan 12 2000 12:29:53 Stardate: [-30]4193.64 hwloidl>
+ * $Id: LLComms.c,v 1.2 2000/01/13 14:34:07 hwloidl Exp $
+ *
+ * GUM Low-Level Inter-Task Communication
+ *
+ * This module defines PVM Routines for PE-PE communication.
+ * P. Trinder, December 5th. 1994.
+ * Adapted for the new RTS
+ * P. Trinder, July 1998
+ * H-W. Loidl, November 1999
+ --------------------------------------------------------------------------- */
+
+#ifdef PAR /* whole file */
+
+//@node GUM Low-Level Inter-Task Communication, , ,
+//@section GUM Low-Level Inter-Task Communication
+
+/*
+ *This module defines the routines which communicate between PEs. The
+ *code is based on Kevin Hammond's GRIP RTS. (OpCodes.h defines
+ *PEOp1 etc. in terms of sendOp1 etc.).
+ *
+ *Routine & Arguments
+ * &
+ *sendOp & 0 \\
+ *sendOp1 & 1 \\
+ *sendOp2 & 2 \\
+ *sendOpN & vector \\
+ *sendOpV & variable \\
+ *sendOpNV & variable+ vector \\
+ *
+ *First the standard include files.
+ */
+
+//@menu
+//* Macros etc::
+//* Includes::
+//* Auxiliary functions::
+//* Index::
+//@end menu
+
+//@node Macros etc, Includes, GUM Low-Level Inter-Task Communication, GUM Low-Level Inter-Task Communication
+//@subsection Macros etc
+
+#define NON_POSIX_SOURCE /* so says Solaris */
+#define UNUSED /* nothing */
+
+//@node Includes, Auxiliary functions, Macros etc, GUM Low-Level Inter-Task Communication
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Parallel.h"
+#include "ParallelRts.h"
+#if defined(DEBUG)
+# include "ParallelDebug.h"
+#endif
+#include "LLC.h"
+
+#ifdef __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+/* Cannot use std macro when compiling for SysMan */
+/* debugging enabled */
+// #define IF_PAR_DEBUG(c,s) { s; }
+/* debugging disabled */
+#define IF_PAR_DEBUG(c,s) /* nothing */
+
+//@node Auxiliary functions, Index, Includes, GUM Low-Level Inter-Task Communication
+//@subsection Auxiliary functions
+
+/*
+ * heapChkCounter tracks the number of heap checks since the last probe.
+ * Not currently used! We check for messages when a thread is resheduled.
+ */
+int heapChkCounter = 0;
+
+/*
+ * Then some miscellaneous functions.
+ * getOpName returns the character-string name of any OpCode.
+ */
+
+char *UserPEOpNames[] = { PEOP_NAMES };
+
+//@cindex getOpName
+char *
+getOpName(nat op)
+{
+ if (op >= MIN_PEOPS && op <= MAX_PEOPS)
+ return (UserPEOpNames[op - MIN_PEOPS]);
+ else
+ return ("Unknown PE OpCode");
+}
+
+/*
+ * traceSendOp handles the tracing of messages.
+ */
+
+//@cindex traceSendOp
+static void
+traceSendOp(OpCode op, GlobalTaskId dest UNUSED,
+ unsigned int data1 UNUSED, unsigned int data2 UNUSED)
+{
+ char *OpName;
+
+ OpName = getOpName(op);
+ IF_PAR_DEBUG(trace,
+ fprintf(stderr," %s [%x,%x] sent from %x to %x",
+ OpName, data1, data2, mytid, dest));
+}
+
+/*
+ * sendOp sends a 0-argument message with OpCode {\em op} to
+ * the global task {\em task}.
+ */
+
+//@cindex sendOp
+void
+sendOp(OpCode op, GlobalTaskId task)
+{
+ traceSendOp(op, task,0,0);
+
+ pvm_initsend(PvmDataRaw);
+ pvm_send(task, op);
+}
+
+/*
+ * sendOp1 sends a 1-argument message with OpCode {\em op}
+ * to the global task {\em task}.
+ */
+
+//@cindex sendOp1
+void
+sendOp1(OpCode op, GlobalTaskId task, StgWord arg1)
+{
+ traceSendOp(op, task, arg1,0);
+
+ pvm_initsend(PvmDataRaw);
+ PutArg1(arg1);
+ pvm_send(task, op);
+}
+
+
+/*
+ * sendOp2 is used by the FP code only.
+ */
+
+//@cindex sendOp2
+void
+sendOp2(OpCode op, GlobalTaskId task, StgWord arg1, StgWord arg2)
+{
+ traceSendOp(op, task, arg1, arg2);
+
+ pvm_initsend(PvmDataRaw);
+ PutArg1(arg1);
+ PutArg2(arg2);
+ pvm_send(task, op);
+}
+
+/*
+ *
+ * sendOpV takes a variable number of arguments, as specified by {\em n}.
+ * For example,
+ *
+ * sendOpV( PP_STATS, StatsTask, 3, start_time, stop_time, sparkcount);
+ */
+
+//@cindex sendOpV
+void
+sendOpV(OpCode op, GlobalTaskId task, int n, ...)
+{
+ va_list ap;
+ int i;
+ StgWord arg;
+
+ va_start(ap, n);
+
+ traceSendOp(op, task, 0, 0);
+
+ pvm_initsend(PvmDataRaw);
+
+ for (i = 0; i < n; ++i) {
+ arg = va_arg(ap, StgWord);
+ PutArgN(i, arg);
+ }
+ va_end(ap);
+
+ pvm_send(task, op);
+}
+
+/*
+ *
+ * sendOpNV takes a variable-size datablock, as specified by {\em
+ * nelem} and a variable number of arguments, as specified by {\em
+ * narg}. N.B. The datablock and the additional arguments are contiguous
+ * and are copied over together. For example,
+ *
+ * sendOpNV(PP_RESUME, tsoga.pe, 6, nelem, data,
+ * (W_) ga.weight, (W_) ga.loc.gc.gtid, (W_) ga.loc.gc.slot,
+ * (W_) tsoga.weight, (W_) tsoga.loc.gc.gtid, (W_) tsoga.loc.gc.slot);
+ *
+ * Important: The variable arguments must all be StgWords.
+
+ sendOpNV(_, tid, m, n, data, x1, ..., xm):
+
+ | n elems
+ +------------------------------
+ | x1 | ... | xm | n | data ....
+ +------------------------------
+ */
+
+//@cindex sendOpNV
+void
+sendOpNV(OpCode op, GlobalTaskId task, int nelem,
+ StgWord *datablock, int narg, ...)
+{
+ va_list ap;
+ int i;
+ StgWord arg;
+
+ va_start(ap, narg);
+
+ traceSendOp(op, task, 0, 0);
+ IF_PAR_DEBUG(trace,
+ fprintf(stderr,"sendOpNV: op = %x (%s), task = %x, narg = %d, nelem = %d",
+ op, getOpName(op), task, narg, nelem));
+
+ pvm_initsend(PvmDataRaw);
+
+ for (i = 0; i < narg; ++i) {
+ arg = va_arg(ap, StgWord);
+ IF_PAR_DEBUG(trace,
+ fprintf(stderr,"sendOpNV: arg = %d\n",arg));
+ PutArgN(i, arg);
+ }
+ arg = (StgWord) nelem;
+ PutArgN(narg, arg);
+
+/* for (i=0; i < nelem; ++i) fprintf(stderr, "%d ",datablock[i]); */
+/* fprintf(stderr," in sendOpNV\n");*/
+
+ PutArgs(datablock, nelem);
+ va_end(ap);
+
+ pvm_send(task, op);
+}
+
+/*
+ * sendOpN take a variable size array argument, whose size is given by
+ * {\em n}. For example,
+ *
+ * sendOpN( PP_STATS, StatsTask, 3, stats_array);
+ */
+
+//@cindex sendOpN
+void
+sendOpN(OpCode op, GlobalTaskId task, int n, StgPtr args)
+{
+ long arg;
+
+ traceSendOp(op, task, 0, 0);
+
+ pvm_initsend(PvmDataRaw);
+ arg = (long) n;
+ PutArgN(0, arg);
+ PutArgs(args, n);
+ pvm_send(task, op);
+}
+
+/*
+ * waitForPEOp waits for a packet from global task {\em who} with the
+ * OpCode {\em op}. Other OpCodes are handled by processUnexpected.
+ */
+//@cindex waitForPEOp
+rtsPacket
+waitForPEOp(OpCode op, GlobalTaskId who)
+{
+ rtsPacket p;
+ int nbytes;
+ OpCode opCode;
+ GlobalTaskId sender_id;
+ rtsBool match;
+
+ do {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"waitForPEOp: op = %x (%s), who = %x\n",
+ op, getOpName(op), who));
+
+ while((p = pvm_recv(ANY_TASK,ANY_OPCODE)) < 0)
+ pvm_perror("waitForPEOp: Waiting for PEOp");
+
+ pvm_bufinfo( p, &nbytes, &opCode, &sender_id );
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"waitForPEOp: received: OpCode = %x, sender_id = %x",
+ opCode, getOpName(opCode), sender_id));
+
+ match = (op == ANY_OPCODE || op == opCode) &&
+ (who == ANY_TASK || who == sender_id);
+
+ if (match)
+ return(p);
+
+ /* Handle the unexpected OpCodes */
+ processUnexpected(p);
+
+ } while(rtsTrue);
+}
+
+/*
+ * processUnexpected processes unexpected messages. If the message is a
+ * FINISH it exits the prgram, and PVM gracefully
+ */
+//@cindex processUnexpected
+void
+processUnexpected(rtsPacket packet)
+{
+ OpCode opCode = getOpcode(packet);
+
+ IF_PAR_DEBUG(verbose,
+ GlobalTaskId sender = senderTask(packet);
+ fprintf(stderr,"== [%x] processUnexpected: Received %x (%s), sender %x\n",
+ mytid, opCode, getOpName(opCode), sender));
+
+ switch (opCode) {
+ case PP_FINISH:
+ stg_exit(EXIT_SUCCESS);
+ break;
+
+ /* Anything we're not prepared to deal with. Note that ALL OpCodes
+ are discarded during termination -- this helps prevent bizarre
+ race conditions. */
+ default:
+ if (!GlobalStopPending) {
+ GlobalTaskId errorTask;
+ OpCode opCode;
+
+ getOpcodeAndSender(packet,&opCode,&errorTask);
+ fprintf(stderr,"Task %x: Unexpected OpCode %x from %x in processUnexpected",
+ mytid, opCode, errorTask );
+
+ stg_exit(EXIT_FAILURE);
+ }
+ }
+}
+
+//@cindex getOpcode
+OpCode
+getOpcode(rtsPacket p)
+{
+ int nbytes;
+ OpCode OpCode;
+ GlobalTaskId sender_id;
+ pvm_bufinfo(p, &nbytes, &OpCode, &sender_id);
+ return(OpCode);
+}
+
+//@cindex getOpcodeAndSender
+void
+getOpcodeAndSender(rtsPacket p, OpCode *opCodep, GlobalTaskId *senderIdp)
+{
+ int nbytes;
+ pvm_bufinfo(p, &nbytes, opCodep, senderIdp);
+}
+
+//@cindex senderTask
+GlobalTaskId
+senderTask(rtsPacket p)
+{
+ int nbytes;
+ OpCode opCode;
+ GlobalTaskId sender_id;
+ pvm_bufinfo(p, &nbytes, &opCode, &sender_id);
+ return(sender_id);
+}
+
+/*
+ * PEStartUp does the low-level comms specific startup stuff for a
+ * PE. It initialises the comms system, joins the appropriate groups,
+ * synchronises with the other PEs. Receives and records in a global
+ * variable the task-id of SysMan. If this is the main thread (discovered
+ * in main.lc), identifies itself to SysMan. Finally it receives
+ * from SysMan an array of the Global Task Ids of each PE, which is
+ * returned as the value of the function.
+ */
+
+//@cindex startUpPE
+GlobalTaskId *
+startUpPE(nat nPEs)
+{
+ int i;
+ rtsPacket addr;
+ long *buffer = (long *) stgMallocBytes(sizeof(long) * nPEs,
+ "PEStartUp (buffer)");
+ GlobalTaskId *thePEs = (GlobalTaskId *)
+ stgMallocBytes(sizeof(GlobalTaskId) * nPEs,
+ "PEStartUp (PEs)");
+
+ mytid = _my_gtid; /* Initialise PVM and get task id into global var.*/
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] PEStartup: Task id = [%x], No. PEs = %d \n",
+ mytid, mytid, nPEs));
+ checkComms(pvm_joingroup(PEGROUP), "PEStartup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] PEStartup: Joined PEGROUP\n", mytid));
+ checkComms(pvm_joingroup(PECTLGROUP), "PEStartup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] PEStartup: Joined PECTLGROUP\n", mytid));
+ checkComms(pvm_barrier(PECTLGROUP, nPEs+1), "PEStartup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] PEStartup, Passed PECTLGROUP barrier\n", mytid));
+
+ addr = waitForPEOp(PP_SYSMAN_TID, ANY_GLOBAL_TASK);
+ SysManTask = senderTask(addr);
+ if (IAmMainThread) { /* Main Thread Identifies itself to SysMan */
+ pvm_initsend(PvmDataDefault);
+ pvm_send(SysManTask, PP_MAIN_TASK);
+ }
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] Thread waits for %s\n",
+ mytid, getOpName(PP_PETIDS)));
+ addr = waitForPEOp(PP_PETIDS, ANY_GLOBAL_TASK);
+ GetArgs(buffer, nPEs);
+ for (i = 0; i < nPEs; ++i) {
+ thePEs[i] = (GlobalTaskId) buffer[i];
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] PEStartup: PEs[%d] = %x \n",
+ mytid, i, thePEs[i]));
+ }
+ free(buffer);
+ return thePEs;
+}
+
+/*
+ * PEShutdown does the low-level comms-specific shutdown stuff for a
+ * single PE. It leaves the groups and then exits from pvm.
+ */
+//@cindex shutDownPE
+void
+shutDownPE(void)
+{
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "== [%x] PEshutdown\n", mytid));
+
+ checkComms(pvm_lvgroup(PEGROUP),"PEShutDown");
+ checkComms(pvm_lvgroup(PECTLGROUP),"PEShutDown");
+ checkComms(pvm_exit(),"PEShutDown");
+}
+
+#endif /* PAR -- whole file */
+
+//@node Index, , Auxiliary functions, GUM Low-Level Inter-Task Communication
+//@subsection Index
+
+//@index
+//* getOpName:: @cindex\s-+getOpName
+//* traceSendOp:: @cindex\s-+traceSendOp
+//* sendOp:: @cindex\s-+sendOp
+//* sendOp1:: @cindex\s-+sendOp1
+//* sendOp2:: @cindex\s-+sendOp2
+//* sendOpV:: @cindex\s-+sendOpV
+//* sendOpNV:: @cindex\s-+sendOpNV
+//* sendOpN:: @cindex\s-+sendOpN
+//* waitForPEOp:: @cindex\s-+waitForPEOp
+//* processUnexpected:: @cindex\s-+processUnexpected
+//* getOpcode:: @cindex\s-+getOpcode
+//* getOpcodeAndSender:: @cindex\s-+getOpcodeAndSender
+//* senderTask:: @cindex\s-+senderTask
+//* startUpPE:: @cindex\s-+startUpPE
+//* shutDownPE:: @cindex\s-+shutDownPE
+//@end index
--- /dev/null
+#ifndef PEOPCODES_H
+#define PEOPCODES_H
+
+/************************************************************************
+* PEOpCodes.h *
+* *
+* This file contains definitions for all the GUM PE Opcodes *
+* It's based on the GRAPH for PVM version *
+* Phil Trinder, Glasgow University 8th December 1994 *
+* *
+************************************************************************/
+
+#define REPLY_OK 0x00
+
+/*Startup + Shutdown*/
+#define PP_SYSMAN_TID 0x50
+#define PP_MAIN_TASK 0x51
+#define PP_FINISH 0x52
+#define PP_PETIDS 0x53
+
+/* Stats stuff */
+#define PP_STATS 0x54
+#define PP_STATS_ON 0x55
+#define PP_STATS_OFF 0x56
+
+#define PP_FAIL 0x57
+
+/*Garbage Collection*/
+#define PP_GC_INIT 0x58
+#define PP_FULL_SYSTEM 0x59
+#define PP_GC_POLL 0x5a
+
+/*GUM Messages*/
+#define PP_FETCH 0x5b
+#define PP_RESUME 0x5c
+#define PP_ACK 0x5d
+#define PP_FISH 0x5e
+#define PP_SCHEDULE 0x5f
+#define PP_FREE 0x60
+
+#define MIN_PEOPS 0x50
+#define MAX_PEOPS 0x60
+
+#define PEOP_NAMES "Init", "IOInit", \
+ "Finish", "PETIDS", \
+ "Stats", "Stats_On", "Stats_Off", \
+ "Fail", \
+ "GCInit", "FullSystem", "GCPoll", \
+ "Fetch","Resume","ACK","Fish","Schedule", \
+ "Free"
+
+#endif /* PEOPCODES_H */
--- /dev/null
+/*
+ Time-stamp: <Thu Dec 16 1999 18:21:17 Stardate: [-30]4058.61 software>
+ $Id: Pack.c,v 1.2 2000/01/13 14:34:08 hwloidl Exp $
+
+ Graph packing and unpacking code for sending it to another processor
+ and retrieving the original graph structure from the packet.
+ In the old RTS the code was split into Pack.c and Unpack.c (now deceased)
+ Used in GUM and GrAnSim.
+
+ The GrAnSim version of the code defines routines for *simulating* the
+ packing of closures in the same way it is done in the parallel runtime
+ system. Basically GrAnSim only puts the addresses of the closures to be
+ transferred into a buffer. This buffer will then be associated with the
+ event of transferring the graph. When this event is scheduled, the
+ @UnpackGraph@ routine is called and the buffer can be discarded
+ afterwards.
+
+ Note that in GranSim we need many buffers, not just one per PE. */
+
+//@node Graph packing, , ,
+//@section Graph packing
+
+#if defined(PAR) || defined(GRAN) /* whole file */
+
+#define _HS (sizeofW(StgHeader))
+
+//@menu
+//* Includes::
+//* Prototypes::
+//* Global variables::
+//* ADT of Closure Queues::
+//* Initialisation for packing::
+//* Packing Functions::
+//* Low level packing routines::
+//* Unpacking routines::
+//* Aux fcts for packing::
+//* Printing Packet Contents::
+//* End of file::
+//@end menu
+//*/
+
+//@node Includes, Prototypes, Graph packing, Graph packing
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "ClosureTypes.h"
+#include "Storage.h"
+#include "Hash.h"
+#include "Parallel.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+# if defined(DEBUG)
+# include "ParallelDebug.h"
+# endif
+#include "FetchMe.h"
+
+/* Which RTS flag should be used to get the size of the pack buffer ? */
+# if defined(PAR)
+# define RTS_PACK_BUFFER_SIZE RtsFlags.ParFlags.packBufferSize
+# else /* GRAN */
+# define RTS_PACK_BUFFER_SIZE RtsFlags.GranFlags.packBufferSize
+# endif
+
+//@node Prototypes, Global variables, Includes, Graph packing
+//@subsection Prototypes
+/*
+ Code declarations.
+*/
+
+//@node ADT of closure queues, Init for packing, Prototypes, Prototypes
+//@subsubsection ADT of closure queues
+
+static inline void AllocClosureQueue(nat size);
+static inline void InitClosureQueue(void);
+static inline rtsBool QueueEmpty(void);
+static inline void QueueClosure(StgClosure *closure);
+static inline StgClosure *DeQueueClosure(void);
+
+//@node Init for packing, Packing routines, ADT of closure queues, Prototypes
+//@subsubsection Init for packing
+
+static void initPacking(void);
+# if defined(PAR)
+rtsBool initPackBuffer(void);
+# elif defined(GRAN)
+rtsPackBuffer *InstantiatePackBuffer (void);
+static void reallocPackBuffer (void);
+# endif
+
+//@node Packing routines, Low level packing fcts, Init for packing, Prototypes
+//@subsubsection Packing routines
+
+static void PackClosure (StgClosure *closure);
+
+//@node Low level packing fcts, Unpacking routines, Packing routines, Prototypes
+//@subsubsection Low level packing fcts
+
+# if defined(GRAN)
+static inline void Pack (StgClosure *data);
+# else
+static inline void Pack (StgWord data);
+
+static void PackPLC (StgPtr addr);
+static void PackOffset (int offset);
+static void GlobaliseAndPackGA (StgClosure *closure);
+# endif
+
+//@node Unpacking routines, Aux fcts for packing, Low level packing fcts, Prototypes
+//@subsubsection Unpacking routines
+
+# if defined(PAR)
+void InitPendingGABuffer(nat size);
+void CommonUp(StgClosure *src, StgClosure *dst);
+StgClosure *UnpackGraph(rtsPackBuffer *packBuffer,
+ globalAddr **gamap,
+ nat *nGAs);
+# elif defined(GRAN)
+void CommonUp(StgClosure *src, StgClosure *dst);
+StgClosure *UnpackGraph(rtsPackBuffer* buffer);
+#endif
+
+//@node Aux fcts for packing, , Unpacking routines, Prototypes
+//@subsubsection Aux fcts for packing
+
+# if defined(PAR)
+static void DonePacking(void);
+static void AmPacking(StgClosure *closure);
+static int OffsetFor(StgClosure *closure);
+static rtsBool NotYetPacking(int offset);
+static rtsBool RoomToPack (nat size, nat ptrs);
+ rtsBool isOffset(globalAddr *ga);
+ rtsBool isFixed(globalAddr *ga);
+# elif defined(GRAN)
+static void DonePacking(void);
+static rtsBool NotYetPacking(StgClosure *closure);
+# endif
+
+//@node Global variables, ADT of Closure Queues, Prototypes, Graph packing
+//@subsection Global variables
+/*
+ Static data declarations
+*/
+
+static nat pack_locn, /* ptr to first free loc in pack buffer */
+ clq_size, clq_pos,
+ buf_id = 1; /* identifier for buffer */
+static nat unpacked_size;
+static nat reservedPAsize; /* Space reserved for primitive arrays */
+static rtsBool RoomInBuffer;
+
+# if defined(GRAN)
+/*
+ The pack buffer
+ To be pedantic: in GrAnSim we're packing *addresses* of closures,
+ not the closures themselves.
+*/
+static rtsPackBuffer *Bonzo = NULL; /* size: can be set via option */
+# else
+static rtsPackBuffer *Bonzo = NULL; /* size: can be set via option */
+# endif
+
+/*
+ Bit of a hack for testing if a closure is the root of the graph. This is
+ set in @PackNearbyGraph@ and tested in @PackClosure@.
+*/
+
+static nat packed_thunks = 0;
+static StgClosure *graph_root;
+
+# if defined(PAR)
+/*
+ The offset hash table is used during packing to record the location in
+ the pack buffer of each closure which is packed.
+*/
+//@cindex offsetTable
+static HashTable *offsetTable;
+
+//@cindex PendingGABuffer
+static globalAddr *PendingGABuffer;
+/* is initialised in main; */
+# endif /* PAR */
+
+//@node ADT of Closure Queues, Initialisation for packing, Global variables, Graph packing
+//@subsection ADT of Closure Queues
+
+//@menu
+//* Closure Queues::
+//* Init routines::
+//* Basic routines::
+//@end menu
+
+//@node Closure Queues, Init routines, ADT of Closure Queues, ADT of Closure Queues
+//@subsubsection Closure Queues
+/*
+ Closure Queues
+
+ These routines manage the closure queue.
+*/
+
+static nat clq_pos, clq_size;
+
+static StgClosure **ClosureQueue = NULL; /* HWL: init in main */
+
+//@node Init routines, Basic routines, Closure Queues, ADT of Closure Queues
+//@subsubsection Init routines
+
+/* @InitClosureQueue@ initialises the closure queue. */
+
+//@cindex AllocClosureQueue
+static inline void
+AllocClosureQueue(size)
+nat size;
+{
+ ASSERT(ClosureQueue == NULL);
+ ClosureQueue = (StgClosure**) stgMallocWords(size, "AllocClosureQueue");
+}
+
+//@cindex InitClosureQueue
+static inline void
+InitClosureQueue(void)
+{
+ clq_pos = clq_size = 0;
+
+ if ( ClosureQueue == NULL )
+ AllocClosureQueue(RTS_PACK_BUFFER_SIZE);
+}
+
+//@node Basic routines, , Init routines, ADT of Closure Queues
+//@subsubsection Basic routines
+
+/*
+ QueueEmpty returns rtsTrue if the closure queue is empty; rtsFalse otherwise.
+*/
+
+//@cindex QueueEmpty
+static inline rtsBool
+QueueEmpty(void)
+{
+ return(clq_pos >= clq_size);
+}
+
+/* QueueClosure adds its argument to the closure queue. */
+
+//@cindex QueueClosure
+static inline void
+QueueClosure(closure)
+StgClosure *closure;
+{
+ if(clq_size < RTS_PACK_BUFFER_SIZE )
+ ClosureQueue[clq_size++] = closure;
+ else
+ barf("Closure Queue Overflow (EnQueueing %p (%s))",
+ closure, info_type(closure));
+}
+
+/* DeQueueClosure returns the head of the closure queue. */
+
+//@cindex DeQueueClosure
+static inline StgClosure *
+DeQueueClosure(void)
+{
+ if(!QueueEmpty())
+ return(ClosureQueue[clq_pos++]);
+ else
+ return((StgClosure*)NULL);
+}
+
+//@node Initialisation for packing, Packing Functions, ADT of Closure Queues, Graph packing
+//@subsection Initialisation for packing
+/*
+ Simple Packing Routines
+
+ About packet sizes in GrAnSim: In GrAnSim we use a malloced block of
+ gransim_pack_buffer_size words to simulate a packet of pack_buffer_size
+ words. In the simulated PackBuffer we only keep the addresses of the
+ closures that would be packed in the parallel system (see Pack). To
+ decide if a packet overflow occurs pack_buffer_size must be compared
+ versus unpacked_size (see RoomToPack). Currently, there is no multi
+ packet strategy implemented, so in the case of an overflow we just stop
+ adding closures to the closure queue. If an overflow of the simulated
+ packet occurs, we just realloc some more space for it and carry on as
+ usual. -- HWL */
+
+# if defined(GRAN)
+rtsPackBuffer *
+InstantiatePackBuffer (void) {
+ extern rtsPackBuffer *Bonzo;
+
+ Bonzo = (rtsPackBuffer *) stgMallocWords(sizeofW(rtsPackBuffer),
+ "InstantiatePackBuffer: failed to alloc packBuffer");
+ Bonzo->size = RtsFlags.GranFlags.packBufferSize_internal;
+ Bonzo->buffer = (StgWord **) stgMallocWords(RtsFlags.GranFlags.packBufferSize_internal,
+ "InstantiatePackBuffer: failed to alloc GranSim internal packBuffer");
+ /* NB: gransim_pack_buffer_size instead of pack_buffer_size -- HWL */
+ /* stgMallocWords is now simple allocate in Storage.c */
+
+ return (Bonzo);
+}
+
+/*
+ Reallocate the GranSim internal pack buffer to make room for more closure
+ pointers. This is independent of the check for packet overflow as in GUM
+*/
+static void
+reallocPackBuffer (void) {
+
+ ASSERT(pack_locn >= (int)Bonzo->size+sizeofW(rtsPackBuffer));
+
+ IF_GRAN_DEBUG(packBuffer,
+ belch("** Increasing size of PackBuffer %p to %d words (PE %u @ %d)\n",
+ Bonzo, Bonzo->size+REALLOC_SZ,
+ CurrentProc, CurrentTime[CurrentProc]));
+
+ Bonzo = (rtsPackBuffer*)realloc(Bonzo,
+ sizeof(StgClosure*)*(REALLOC_SZ +
+ (int)Bonzo->size +
+ sizeofW(rtsPackBuffer))) ;
+ if (Bonzo==(rtsPackBuffer*)NULL)
+ barf("Failing to realloc %d more words for PackBuffer %p (PE %u @ %d)\n",
+ REALLOC_SZ, Bonzo, CurrentProc, CurrentTime[CurrentProc]);
+
+ Bonzo->size += REALLOC_SZ;
+
+ ASSERT(pack_locn < Bonzo->size+sizeofW(rtsPackBuffer));
+}
+# endif
+
+# if defined(PAR)
+/* @initPacking@ initialises the packing buffer etc. */
+//@cindex initPackBuffer
+rtsBool
+initPackBuffer(void)
+{
+ if (Bonzo == NULL) { /* not yet allocated */
+
+ if ((Bonzo = (rtsPackBuffer *)
+ stgMallocWords(sizeofW(rtsPackBuffer)+RtsFlags.ParFlags.packBufferSize,
+ "initPackBuffer")) == NULL)
+ return rtsFalse;
+
+ InitPendingGABuffer(RtsFlags.ParFlags.packBufferSize);
+ AllocClosureQueue(RtsFlags.ParFlags.packBufferSize);
+ }
+ return rtsTrue;
+}
+# endif
+
+static void
+initPacking(void)
+{
+# if defined(GRAN)
+ Bonzo = InstantiatePackBuffer(); /* for GrAnSim only -- HWL */
+ /* NB: free in UnpackGraph */
+# endif
+
+ Bonzo->id = buf_id++; /* buffer id are only used for debugging! */
+ pack_locn = 0; /* the index into the actual pack buffer */
+ unpacked_size = 0; /* the size of the whole graph when unpacked */
+ reservedPAsize = 0;
+ RoomInBuffer = rtsTrue;
+ InitClosureQueue();
+ packed_thunks = 0; /* total number of thunks packed so far */
+# if defined(PAR)
+ offsetTable = allocHashTable();
+# endif
+}
+
+//@node Packing Functions, Low level packing routines, Initialisation for packing, Graph packing
+//@subsection Packing Functions
+
+//@menu
+//* Packing Sections of Nearby Graph::
+//* Packing Closures::
+//@end menu
+
+//@node Packing Sections of Nearby Graph, Packing Closures, Packing Functions, Packing Functions
+//@subsubsection Packing Sections of Nearby Graph
+/*
+ Packing Sections of Nearby Graph
+
+ @PackNearbyGraph@ packs a closure and associated graph into a static
+ buffer (@PackBuffer@). It returns the address of this buffer and the
+ size of the data packed into the buffer (in its second parameter,
+ @packBufferSize@). The associated graph is packed in a depth first
+ manner, hence it uses an explicit queue of closures to be packed rather
+ than simply using a recursive algorithm. Once the packet is full,
+ closures (other than primitive arrays) are packed as FetchMes, and their
+ children are not queued for packing. */
+
+//@cindex PackNearbyGraph
+
+/* NB: this code is shared between GranSim and GUM;
+ tso only used in GranSim */
+rtsPackBuffer *
+PackNearbyGraph(closure, tso, packBufferSize)
+StgClosure* closure;
+StgTSO* tso;
+nat *packBufferSize;
+{
+ extern rtsPackBuffer *Bonzo;
+ /* Ensure enough heap for all possible RBH_Save closures */
+
+ ASSERT(RTS_PACK_BUFFER_SIZE > 0);
+
+ /* ToDo: check that we have enough heap for the packet
+ ngoq ngo'
+ if (Hp + PACK_HEAP_REQUIRED > HpLim)
+ return NULL;
+ */
+
+ initPacking();
+# if defined(GRAN)
+ graph_root = closure;
+# endif
+
+ IF_GRAN_DEBUG(pack,
+ belch(">>> Packing <<%d>> (buffer @ %p); graph root @ %p [PE %d]\n demanded by TSO %d (%p) [PE %u]",
+ Bonzo->id, Bonzo, closure, where_is(closure),
+ tso->id, tso, where_is((StgClosure*)tso)));
+
+ IF_GRAN_DEBUG(pack,
+ belch("** PrintGraph of %p is:", closure);
+ PrintGraph(closure,0));
+
+ IF_PAR_DEBUG(pack,
+ belch(">>> Packing <<%d>> (buffer @ %p); graph root @ %p [%x]\n demanded by TSO %d (%p)",
+ Bonzo->id, Bonzo, closure, mytid,
+ tso->id, tso));
+
+ IF_PAR_DEBUG(pack,
+ belch("** PrintGraph of %p is:", closure);
+ belch("** pack_locn=%d", pack_locn);
+ PrintGraph(closure,0));
+
+ QueueClosure(closure);
+ do {
+ PackClosure(DeQueueClosure());
+ } while (!QueueEmpty());
+
+# if defined(PAR)
+
+ /* Record how much space is needed to unpack the graph */
+ Bonzo->tso = tso; // ToDo: check: used in GUM or only for debugging?
+ Bonzo->unpacked_size = unpacked_size;
+ Bonzo->size = pack_locn;
+
+ /* Set the size parameter */
+ ASSERT(pack_locn <= RtsFlags.ParFlags.packBufferSize);
+ *packBufferSize = pack_locn;
+
+# else /* GRAN */
+
+ /* Record how much space is needed to unpack the graph */
+ // PackBuffer[PACK_FLAG_LOCN] = (P_) MAGIC_PACK_FLAG; for testing
+ Bonzo->tso = tso;
+ Bonzo->unpacked_size = unpacked_size;
+
+ // ASSERT(pack_locn <= PackBuffer[PACK_SIZE_LOCN]+PACK_HDR_SIZE);
+ /* ToDo: Print an earlier, more meaningful message */
+ if (pack_locn==0) /* i.e. packet is empty */
+ barf("EMPTY PACKET! Can't transfer closure %p at all!!\n",
+ closure);
+ Bonzo->size = pack_locn;
+ *packBufferSize = pack_locn;
+
+# endif
+
+ DonePacking(); /* {GrAnSim}vaD 'ut'Ha' */
+
+# if defined(GRAN)
+ IF_GRAN_DEBUG(pack ,
+ belch("** Finished <<%d>> packing graph %p; closures packed: %d; thunks packed: %d; size of graph: %d",
+ Bonzo->id, closure, Bonzo->size, packed_thunks, Bonzo->unpacked_size));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_packets++;
+ globalGranStats.tot_packet_size += pack_locn;
+ }
+
+ IF_GRAN_DEBUG(pack, PrintPacket(Bonzo));
+# elif defined(PAR)
+ IF_GRAN_DEBUG(pack ,
+ belch("** Finished <<%d>> packing graph %p; closures packed: %d; thunks packed: %d; size of graph: %d",
+ Bonzo->id, closure, Bonzo->size, packed_thunks, Bonzo->unpacked_size);
+ PrintPacket(Bonzo));
+# endif /* GRAN */
+
+ return (Bonzo);
+}
+
+//@cindex PackOneNode
+
+# if defined(GRAN)
+/* This version is used when the node is already local */
+
+rtsPackBuffer *
+PackOneNode(closure, tso, packBufferSize)
+StgClosure* closure;
+StgTSO* tso;
+nat *packBufferSize;
+{
+ extern rtsPackBuffer *Bonzo;
+ int i, clpack_locn;
+
+ initPacking();
+
+ IF_GRAN_DEBUG(pack,
+ belch("** PackOneNode: %p (%s)[PE %d] requested by TSO %d (%p) [PE %d]",
+ closure, info_type(closure),
+ where_is(closure), tso->id, tso, where_is((StgClosure *)tso)));
+
+ Pack(closure);
+
+ /* Record how much space is needed to unpack the graph */
+ Bonzo->tso = tso;
+ Bonzo->unpacked_size = unpacked_size;
+
+ /* Set the size parameter */
+ ASSERT(pack_locn <= RTS_PACK_BUFFER_SIZE);
+ Bonzo->size = pack_locn;
+ *packBufferSize = pack_locn;
+
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_packets++;
+ globalGranStats.tot_packet_size += pack_locn;
+ }
+ IF_GRAN_DEBUG(pack,
+ PrintPacket(Bonzo));
+
+ return (Bonzo);
+}
+# endif /* GRAN */
+
+#if defined(GRAN)
+
+/*
+ PackTSO and PackStkO are entry points for two special kinds of closure
+ which are used in the parallel RTS. Compared with other closures they
+ are rather awkward to pack because they don't follow the normal closure
+ layout (where all pointers occur before all non-pointers). Luckily,
+ they're only needed when migrating threads between processors. */
+
+//@cindex PackTSO
+rtsPackBuffer*
+PackTSO(tso, packBufferSize)
+StgTSO *tso;
+nat *packBufferSize;
+{
+ extern rtsPackBuffer *Bonzo;
+ IF_GRAN_DEBUG(pack,
+ belch("** Packing TSO %d (%p)", tso->id, tso));
+ *packBufferSize = 0;
+ // PackBuffer[0] = PackBuffer[1] = 0; ???
+ return(Bonzo);
+}
+
+//@cindex PackStkO
+rtsPackBuffer*
+PackStkO(stko, packBufferSize)
+StgPtr stko;
+nat *packBufferSize;
+{
+ extern rtsPackBuffer *Bonzo;
+ IF_GRAN_DEBUG(pack,
+ belch("** Packing STKO %p", stko));
+ *packBufferSize = 0;
+ // PackBuffer[0] = PackBuffer[1] = 0;
+ return(Bonzo);
+}
+
+void
+PackFetchMe(StgClosure *closure)
+{
+ barf("{PackFetchMe}Daq Qagh: no FetchMe closures in GRAN!");
+}
+
+#elif defined(PAR)
+
+rtsPackBuffer*
+PackTSO(tso, packBufferSize)
+StgTSO *tso;
+nat *packBufferSize;
+{
+ barf("{PackTSO}Daq Qagh: trying to pack a TSO; thread migrations not supported, yet");
+}
+
+rtsPackBuffer*
+PackStkO(stko, packBufferSize)
+StgPtr stko;
+nat *packBufferSize;
+{
+ barf("{PackStkO}Daq Qagh: trying to pack a STKO; thread migrations not supported, yet");
+}
+
+//@cindex PackFetchMe
+void
+PackFetchMe(StgClosure *closure)
+{
+ StgInfoTable *ip;
+ nat i;
+
+#if defined(GRAN)
+ barf("{PackFetchMe}Daq Qagh: no FetchMe closures in GRAN!");
+#else
+ /* Pack a FetchMe closure instead of closure */
+ ip = &FETCH_ME_info;
+ /* this assumes that the info ptr is always the first word in a closure*/
+ Pack((StgWord)ip);
+ for (i = 1; i < _HS; ++i) // pack rest of fixed header
+ Pack((StgWord)*(((StgPtr)closure)+i));
+
+ unpacked_size += _HS; // ToDo: check
+#endif
+}
+
+#endif
+
+//@node Packing Closures, , Packing Sections of Nearby Graph, Packing Functions
+//@subsubsection Packing Closures
+/*
+ Packing Closures
+
+ @PackClosure@ is the heart of the normal packing code. It packs a single
+ closure into the pack buffer, skipping over any indirections and
+ globalising it as necessary, queues any child pointers for further
+ packing, and turns it into a @FetchMe@ or revertible black hole (@RBH@)
+ locally if it was a thunk. Before the actual closure is packed, a
+ suitable global address (GA) is inserted in the pack buffer. There is
+ always room to pack a fetch-me to the closure (guaranteed by the
+ RoomToPack calculation), and this is packed if there is no room for the
+ entire closure.
+
+ Space is allocated for any primitive array children of a closure, and
+ hence a primitive array can always be packed along with it's parent
+ closure. */
+
+//@cindex PackClosure
+
+# if defined(PAR)
+
+void
+PackClosure(closure)
+StgClosure *closure;
+{
+ StgInfoTable *info;
+ StgClosure *indirectee, *rbh;
+ nat size, ptrs, nonptrs, vhs, i, clpack_locn;
+ rtsBool is_CONSTR = rtsFalse;
+ char str[80];
+
+ ASSERT(closure!=NULL);
+ indirectee = closure;
+ do {
+ /* Don't pack indirection closures */
+ closure = indirectee;
+ indirectee = IS_INDIRECTION(closure);
+ IF_PAR_DEBUG(pack,
+ if (indirectee)
+ belch("** Shorted an indirection (%s) at %p (-> %p)",
+ info_type(closure), closure, indirectee));
+ } while (indirectee);
+
+ clpack_locn = OffsetFor(closure);
+
+ /* If the closure has been packed already, just pack an indirection to it
+ to guarantee that the graph doesn't become a tree when unpacked */
+ if (!NotYetPacking(clpack_locn)) {
+ StgInfoTable *info;
+
+ PackOffset(clpack_locn);
+ return;
+ }
+
+ /*
+ * PLCs reside on all of the PEs already. Just pack the
+ * address as a GA (a bit of a kludge, since an address may
+ * not fit in *any* of the individual GA fields). Const,
+ * charlike and small intlike closures are converted into
+ * PLCs.
+ */
+ switch (get_itbl(closure)->type) {
+
+# ifdef DEBUG
+ // check error cases only in a debugging setup
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ barf("** {Pack}Daq Qagh: found return vector %p (%s) when packing (thread migration not implemented)",
+ closure, info_type(closure));
+ /* never reached */
+
+ case UPDATE_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case SEQ_FRAME:
+ barf("** {Pack}Daq Qagh: found stack frame %p (%s) when packing (thread migration not implemented)",
+ closure, info_type(closure));
+ /* never reached */
+
+ case TSO:
+ case BLOCKED_FETCH:
+ case EVACUATED:
+ /* something's very wrong */
+ barf("** {Pack}Daq Qagh: found %s (%p) when packing",
+ info_type(closure), closure);
+ /* never reached */
+# endif
+
+ case CONSTR_CHARLIKE:
+ IF_PAR_DEBUG(pack,
+ belch("** Packing a charlike closure %d",
+ ((StgIntCharlikeClosure*)closure)->data));
+
+ PackPLC(CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)closure)->data));
+ return;
+
+ case CONSTR_INTLIKE:
+ {
+ StgInt val = ((StgIntCharlikeClosure*)closure)->data;
+
+ if ((val <= MAX_INTLIKE) && (val >= MIN_INTLIKE)) {
+ IF_PAR_DEBUG(pack,
+ belch("** Packing a small intlike %d as a PLC", val));
+ PackPLC(INTLIKE_CLOSURE(val));
+ return;
+ } else {
+ IF_PAR_DEBUG(pack,
+ belch("** Packing a big intlike %d as a normal closure",
+ val));
+ break;
+ }
+ }
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ /* it's a constructor (i.e. plain data) but we don't know
+ how many ptrs, non-ptrs there are => use generic code */
+ IF_PAR_DEBUG(pack,
+ belch("** Packing a CONSTR %p (%s) using generic packing with GA",
+ closure, info_type(closure)));
+ // is_CONSTR = rtsTrue;
+ break;
+ /* fall through to generic packing code */
+
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:// For now we ship indirections to CAFs: They are
+ // evaluated on each PE if needed
+ IF_PAR_DEBUG(pack,
+ belch("** Packing a %p (%s) as a PLC",
+ closure, info_type(closure)));
+
+ PackPLC(closure);
+ return;
+
+ case MVAR:
+ /* MVARs may not be copied; they are sticky objects in the new RTS */
+ /* therefore we treat them just as RBHs etc (what a great system!) */
+ IF_PAR_DEBUG(pack,
+ belch("** Found an MVar at %p (%s)",
+ closure, info_type(closure)));
+ /* fall through !! */
+
+ case THUNK_SELECTOR: // ToDo: fix packing of this strange beast
+ IF_PAR_DEBUG(pack,
+ belch("** Found an THUNK_SELECTORE at %p (%s)",
+ closure, info_type(closure)));
+ /* fall through !! */
+
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ case RBH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+
+ /* If it's a (revertible) black-hole, pack a FetchMe closure to it */
+ //ASSERT(pack_locn > PACK_HDR_SIZE);
+
+ IF_PAR_DEBUG(pack,
+ belch("** Packing a BH or FM at %p (%s) of (fixed size %d)",
+ closure, info_type(closure), _HS));
+
+ /* Need a GA even when packing a constructed FETCH_ME (cruel world!) */
+ GlobaliseAndPackGA(closure);
+
+ PackFetchMe(closure);
+ return;
+
+ default:
+/* IF_PAR_DEBUG(pack, */
+/* belch("** Not a PLC or BH ... ")); */
+ } /* switch */
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+
+ ASSERT(!IS_BLACK_HOLE(closure));
+
+ IF_PAR_DEBUG(pack,
+ fprintf(stderr, "** packing %p (%s) (size=%d, ptrs=%d, nonptrs=%d)\n",
+ closure, info_type(closure), size, ptrs, nonptrs));
+
+ /*
+ * Now peek ahead to see whether the closure has any primitive array
+ * children
+ */
+ /*
+ ToDo: fix this code -- HWL
+ for (i = 0; i < ptrs; ++i) {
+ StgInfoTable * childInfo;
+ nat childSize, childPtrs, childNonPtrs, childVhs;
+
+ // extract i-th pointer out of closure
+ childInfo = get_closure_info(((PP_) (closure))[i + FIXED_HS + vhs],
+ &childSize, &childPtrs, &childNonPtrs, &childVhs, str);
+ if (IS_BIG_MOTHER(childInfo)) {
+ reservedPAsize += PACK_GA_SIZE + FIXED_HS + childVhs + childNonPtrs
+ + childPtrs * PACK_FETCHME_SIZE;
+ }
+ }
+ */
+ /* Record the location of the GA */
+ AmPacking(closure);
+
+ /* Pack the global address */
+ if (!is_CONSTR) {
+ GlobaliseAndPackGA(closure);
+ } else {
+ IF_PAR_DEBUG(pack,
+ belch("** No GA allocated for CONSTR %p (%s)",
+ closure, info_type(closure)));
+ }
+
+ /*
+ * Pack a fetchme to the closure if it's a black hole, or the buffer is full
+ * and it isn't a primitive array. N.B. Primitive arrays are always packed
+ * (because their parents index into them directly)
+ */
+
+ // ToDo: pack FMs if no more room available in packet (see below)
+ if (!(RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)))
+ barf("** Qagh: Pack: not enough room in packet to pack closure %p (%s)",
+ closure, info_type(closure));
+
+ /*
+ Has been moved into the switch statement
+
+ if (IS_BLACK_HOLE(closure))
+ !(RoomToPack(PACK_GA_SIZE + FIXED_HS + vhs + nonptrs, ptrs)
+ || IS_BIG_MOTHER(info)))
+ {
+
+ ASSERT(pack_locn > PACK_HDR_SIZE);
+
+ info = FetchMe_info;
+ for (i = 0; i < FIXED_HS; ++i) {
+ if (i == INFO_HDR_POSN)
+ Pack((StgWord) FetchMe_info);
+ else
+ Pack(closure[i]);
+ }
+
+ unpacked_size += FIXED_HS + FETCHME_CLOSURE_SIZE(dummy);
+
+ } else {
+ */
+ if (info->type == ARR_WORDS || info->type == MUT_ARR_PTRS ||
+ info->type == MUT_ARR_PTRS_FROZEN || info->type == MUT_VAR)
+ belch("** ghuH: found %s; packing of primitive arrays not yet implemented",
+ info_type(closure));
+
+ /* At last! A closure we can actually pack! */
+ if (ip_MUTABLE(info) && (info->type != FETCH_ME))
+ fprintf(stderr, "** ghuH: Replicated a Mutable closure!\n");
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* pack fixed and variable header */
+ for (i = 0; i < _HS + vhs; ++i)
+ Pack((StgWord)*(((StgPtr)closure)+i));
+
+ /* register all ptrs for further packing */
+ for (i = 0; i < ptrs; ++i)
+ QueueClosure(((StgClosure *) *(((StgPtr)closure)+(i+_HS+vhs))));
+
+ /* pack non-ptrs */
+ for (i = 0; i < nonptrs; ++i)
+ Pack((StgWord)*(((StgPtr)closure)+(i+_HS+vhs+ptrs)));
+
+ unpacked_size += _HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size);
+
+ /*
+ * Record that this is a revertable black hole so that we can fill in
+ * its address from the fetch reply. Problem: unshared thunks may cause
+ * space leaks this way, their GAs should be deallocated following an
+ * ACK.
+ */
+
+ // IS_UPDATABLE(closure) == !closure_UNPOINTED(closure) !? HWL
+ if (closure_THUNK(closure) && !closure_UNPOINTED(closure)) {
+ rbh = convertToRBH(closure);
+ ASSERT(rbh == closure); // rbh at the same position (minced version)
+ packed_thunks++;
+ }
+}
+
+# else /* GRAN */
+
+/* Fake the packing of a closure */
+
+void
+PackClosure(closure)
+StgClosure *closure;
+{
+ StgInfoTable *info, *childInfo;
+ nat size, ptrs, nonptrs, vhs;
+ char info_hdr_ty[80];
+ nat i;
+ StgClosure *indirectee, *rbh;
+ char str[80];
+ rtsBool is_mutable, will_be_rbh, no_more_thunks_please;
+
+ is_mutable = rtsFalse;
+
+ /* In GranSim we don't pack and unpack closures -- we just simulate
+ packing by updating the bitmask. So, the graph structure is unchanged
+ i.e. we don't short out indirections here. -- HWL */
+
+ /* Nothing to do with packing but good place to (sanity) check closure;
+ if the closure is a thunk, it must be unique; otherwise we have copied
+ work at some point before that which violates one of our main global
+ assertions in GranSim/GUM */
+ ASSERT(!closure_THUNK(closure) || is_unique(closure));
+
+ IF_GRAN_DEBUG(pack,
+ belch("** Packing closure %p (%s)",
+ closure, info_type(closure)));
+
+ if (where_is(closure) != where_is(graph_root)) {
+ IF_GRAN_DEBUG(pack,
+ belch("** faking a FETCHME [current PE: %d, closure's PE: %d]",
+ where_is(graph_root), where_is(closure)));
+
+ /* GUM would pack a FETCHME here; simulate that by increasing the */
+ /* unpacked size accordingly but don't pack anything -- HWL */
+ unpacked_size += _HS + 2 ; // sizeofW(StgFetchMe);
+ return;
+ }
+
+ /* If the closure's not already being packed */
+ if (!NotYetPacking(closure))
+ /* Don't have to do anything in GrAnSim if closure is already */
+ /* packed -- HWL */
+ {
+ IF_GRAN_DEBUG(pack,
+ belch("** Closure %p is already packed and omitted now!",
+ closure));
+ return;
+ }
+
+ switch (get_itbl(closure)->type) {
+ /* ToDo: check for sticky bit here? */
+ /* BH-like closures which must not be moved to another PE */
+ case CAF_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case SE_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case SE_CAF_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case BLACKHOLE_BQ: /* # of ptrs, nptrs: 1,1 */
+ case RBH: /* # of ptrs, nptrs: 1,1 */
+ /* same for these parallel specific closures */
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ IF_GRAN_DEBUG(pack,
+ belch("** Avoid packing BH-like closures (%p, %s)!",
+ closure, info_type(closure)));
+ /* Just ignore RBHs i.e. they stay where they are */
+ return;
+
+ case THUNK_SELECTOR:
+ {
+ StgClosure *sel = ((StgSelector *)closure)->selectee;
+
+ IF_GRAN_DEBUG(pack,
+ belch("** Avoid packing THUNK_SELECTOR (%p, %s) but queuing %p (%s)!",
+ closure, info_type(closure), sel, info_type(sel)));
+ QueueClosure(sel);
+ IF_GRAN_DEBUG(pack,
+ belch("** [%p (%s) (Queueing closure) ....]",
+ sel, info_type(sel)));
+ }
+ return;
+
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ /* For now we ship indirections to CAFs:
+ * They are evaluated on each PE if needed */
+ IF_GRAN_DEBUG(pack,
+ belch("** Nothing to pack for %p (%s)!",
+ closure, info_type(closure)));
+ // Pack(closure); GUM only
+ return;
+
+ case CONSTR_CHARLIKE:
+ case CONSTR_INTLIKE:
+ IF_GRAN_DEBUG(pack,
+ belch("** Nothing to pack for %s (%p)!",
+ closure, info_type(closure)));
+ // PackPLC(((StgIntCharlikeClosure *)closure)->data); GUM only
+ return;
+
+ case AP_UPD:
+ case PAP:
+ /* partial applications; special treatment necessary? */
+ break;
+
+ case CAF_UNENTERED: /* # of ptrs, nptrs: 1,3 */
+ case CAF_ENTERED: /* # of ptrs, nptrs: 0,4 (allegedly bogus!!) */
+ /* CAFs; special treatment necessary? */
+ break;
+
+ case MVAR:
+ barf("{PackClosure}Daq Qagh: found an MVAR (%p, %s); ToDo: implement proper treatment of MVARs",
+ closure, info_type(closure));
+
+ case ARR_WORDS:
+ case MUT_VAR:
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ /* Mutable objects; require special treatment to ship all data */
+ is_mutable = rtsTrue;
+ break;
+
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ /* weak pointers and other FFI objects */
+ barf("{PackClosure}Daq Qagh: found an FFI object (%p, %s); FFI not yet supported by GranSim, sorry",
+ closure, info_type(closure));
+
+ case TSO:
+ /* parallel objects */
+ barf("{PackClosure}Daq Qagh: found a TSO when packing (%p, %s); thread migration not yet implemented, sorry",
+ closure, info_type(closure));
+
+ case BCO:
+ /* Hugs objects (i.e. closures used by the interpreter) */
+ barf("{PackClosure}Daq Qagh: found a Hugs closure when packing (%p, %s); GranSim not yet integrated with Hugs, sorry",
+ closure, info_type(closure));
+
+ case IND: /* # of ptrs, nptrs: 1,0 */
+ case IND_STATIC: /* # of ptrs, nptrs: 1,0 */
+ case IND_PERM: /* # of ptrs, nptrs: 1,1 */
+ case IND_OLDGEN: /* # of ptrs, nptrs: 1,1 */
+ case IND_OLDGEN_PERM: /* # of ptrs, nptrs: 1,1 */
+ /* we shouldn't find an indirection here, because we have shorted them
+ out at the beginning of this functions already.
+ */
+ break;
+ /* should be:
+ barf("{PackClosure}Daq Qagh: found indirection when packing (%p, %s)",
+ closure, info_type(closure));
+ */
+
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case SEQ_FRAME:
+ case STOP_FRAME:
+ /* stack frames; should never be found when packing for now;
+ once we support thread migration these have to be covered properly
+ */
+ barf("{PackClosure}Daq Qagh: found stack frame when packing (%p, %s)",
+ closure, info_type(closure));
+
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ /* vectored returns; should never be found when packing; */
+ barf("{PackClosure}Daq Qagh: found vectored return (%p, %s)",
+ closure, info_type(closure));
+
+ case INVALID_OBJECT:
+ barf("{PackClosure}Daq Qagh: found Invalid object (%p, %s)",
+ closure, info_type(closure));
+
+ default:
+ /*
+ Here we know that the closure is a CONSTR, FUN or THUNK (maybe
+ a specialised version with wired in #ptr/#nptr info; currently
+ we treat these specialised versions like the generic version)
+ */
+ } /* switch */
+
+ /* Otherwise it's not Fixed */
+
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ will_be_rbh = closure_THUNK(closure) && !closure_UNPOINTED(closure);
+
+ IF_GRAN_DEBUG(pack,
+ belch("** Info on closure %p (%s): size=%d; ptrs=%d",
+ closure, info_type(closure),
+ size, ptrs,
+ (will_be_rbh) ? "will become RBH" : "will NOT become RBH"));
+
+ // check whether IS_UPDATABLE(closure) == !closure_UNPOINTED(closure) -- HWL
+ no_more_thunks_please =
+ (RtsFlags.GranFlags.ThunksToPack>0) &&
+ (packed_thunks>=RtsFlags.GranFlags.ThunksToPack);
+
+ /*
+ should be covered by get_closure_info
+ if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||
+ info->type == BLACKHOLE || info->type == RBH )
+ size = ptrs = nonptrs = vhs = 0;
+ */
+ /* Now peek ahead to see whether the closure has any primitive */
+ /* array children */
+ /*
+ ToDo: fix this code
+ for (i = 0; i < ptrs; ++i) {
+ P_ childInfo;
+ W_ childSize, childPtrs, childNonPtrs, childVhs;
+
+ childInfo = get_closure_info(((StgPtrPtr) (closure))[i + FIXED_HS + vhs],
+ &childSize, &childPtrs, &childNonPtrs,
+ &childVhs, junk_str);
+ if (IS_BIG_MOTHER(childInfo)) {
+ reservedPAsize += PACK_GA_SIZE + FIXED_HS +
+ childVhs + childNonPtrs +
+ childPtrs * PACK_FETCHME_SIZE;
+ PAsize += PACK_GA_SIZE + FIXED_HS + childSize;
+ PAptrs += childPtrs;
+ }
+ }
+ */
+ /* Don't pack anything (GrAnSim) if it's a black hole, or the buffer
+ * is full and it isn't a primitive array. N.B. Primitive arrays are
+ * always packed (because their parents index into them directly) */
+
+ if (IS_BLACK_HOLE(closure))
+ /*
+ ToDo: fix this code
+ ||
+ !(RoomToPack(PACK_GA_SIZE + FIXED_HS + vhs + nonptrs, ptrs)
+ || IS_BIG_MOTHER(info)))
+ */
+ return;
+
+ /* At last! A closure we can actually pack! */
+
+ if (closure_MUTABLE(closure)) // not nec. && (info->type != FETCHME))
+ belch("ghuH: Replicated a Mutable closure!");
+
+ if (RtsFlags.GranFlags.GranSimStats.Global &&
+ no_more_thunks_please && will_be_rbh) {
+ globalGranStats.tot_cuts++;
+ if ( RtsFlags.GranFlags.Debug.pack )
+ belch("** PackClosure (w/ ThunksToPack=%d): Cutting tree with root at %#x\n",
+ RtsFlags.GranFlags.ThunksToPack, closure);
+ } else if (will_be_rbh || (closure==graph_root) ) {
+ packed_thunks++;
+ globalGranStats.tot_thunks++;
+ }
+
+ if (no_more_thunks_please && will_be_rbh)
+ return; /* don't pack anything */
+
+ /* actual PACKING done here -- HWL */
+ Pack(closure);
+ for (i = 0; i < ptrs; ++i) {
+ /* extract i-th pointer from closure */
+ QueueClosure((StgClosure *)payloadPtr(closure,i));
+ IF_GRAN_DEBUG(pack,
+ belch("** [%p (%s) (Queueing closure) ....]",
+ payloadPtr(closure,i), info_type(payloadPtr(closure,i))));
+ }
+
+ /*
+ for packing words (GUM only) do something like this:
+
+ for (i = 0; i < ptrs; ++i) {
+ Pack(payloadWord(obj,i+j));
+ }
+ */
+ /* Turn thunk into a revertible black hole. */
+ if (will_be_rbh) {
+ rbh = convertToRBH(closure);
+ ASSERT(rbh != NULL);
+ }
+}
+# endif /* PAR */
+
+//@node Low level packing routines, Unpacking routines, Packing Functions, Graph packing
+//@subsection Low level packing routines
+
+/*
+ @Pack@ is the basic packing routine. It just writes a word of data into
+ the pack buffer and increments the pack location. */
+
+//@cindex Pack
+
+# if defined(PAR)
+static inline void
+Pack(data)
+StgWord data;
+{
+ ASSERT(pack_locn < RtsFlags.ParFlags.packBufferSize);
+ Bonzo->buffer[pack_locn++] = data;
+}
+#endif
+
+#if defined(GRAN)
+static inline void
+Pack(closure)
+StgClosure *closure;
+{
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /* This checks the size of the GrAnSim internal pack buffer. The simulated
+ pack buffer is checked via RoomToPack (as in GUM) */
+ if (pack_locn >= (int)Bonzo->size+sizeofW(rtsPackBuffer))
+ reallocPackBuffer();
+
+ if (closure==(StgClosure*)NULL)
+ belch("Qagh {Pack}Daq: Trying to pack 0");
+ Bonzo->buffer[pack_locn++] = closure;
+ /* ASSERT: Data is a closure in GrAnSim here */
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ unpacked_size += _HS + (size < MIN_UPD_SIZE ?
+ MIN_UPD_SIZE :
+ size);
+}
+# endif /* GRAN */
+
+/*
+ If a closure is local, make it global. Then, divide its weight for
+ export. The GA is then packed into the pack buffer. */
+
+# if defined(PAR)
+
+static void
+GlobaliseAndPackGA(closure)
+StgClosure *closure;
+{
+ globalAddr *ga;
+ globalAddr packGA;
+
+ if ((ga = LAGAlookup(closure)) == NULL)
+ ga = makeGlobal(closure, rtsTrue);
+ splitWeight(&packGA, ga);
+ ASSERT(packGA.weight > 0);
+
+ IF_PAR_DEBUG(pack,
+ fprintf(stderr, "** Globalising closure %p (%s) with GA",
+ closure, info_type(closure));
+ printGA(&packGA);
+ fputc('\n', stderr));
+
+
+ Pack((StgWord) packGA.weight);
+ Pack((StgWord) packGA.payload.gc.gtid);
+ Pack((StgWord) packGA.payload.gc.slot);
+}
+
+/*
+ @PackPLC@ makes up a bogus GA for a PLC. Weight 0 implies that a PLC
+ address follows instead of PE, slot. */
+
+//@cindex PackPLC
+
+static void
+PackPLC(addr)
+StgPtr addr;
+{
+ Pack(0L); /* weight */
+ Pack((StgWord) addr); /* address */
+}
+
+/*
+ @PackOffset@ packs a special GA value that will be interpreted as an
+ offset to a closure in the pack buffer. This is used to avoid unfolding
+ the graph structure into a tree. */
+
+static void
+PackOffset(offset)
+int offset;
+{
+ IF_PAR_DEBUG(pack,
+ belch("** Packing Offset %d at pack location %u",
+ offset, pack_locn));
+ Pack(1L); /* weight */
+ Pack(0L); /* pe */
+ Pack(offset); /* slot/offset */
+}
+# endif /* PAR */
+
+//@node Unpacking routines, Aux fcts for packing, Low level packing routines, Graph packing
+//@subsection Unpacking routines
+
+/*
+ This was formerly in the (now deceased) module Unpack.c
+
+ Unpacking closures which have been exported to remote processors
+
+ This module defines routines for unpacking closures in the parallel
+ runtime system (GUM).
+
+ In the case of GrAnSim, this module defines routines for *simulating* the
+ unpacking of closures as it is done in the parallel runtime system.
+*/
+
+//@node GUM code, Local Definitions, Unpacking routines, Unpacking routines
+//@subsubsection GUM code
+
+#if defined(PAR)
+
+//@cindex InitPendingGABuffer
+void
+InitPendingGABuffer(size)
+nat size;
+{
+ PendingGABuffer = (globalAddr *)
+ stgMallocBytes(size*2*sizeof(globalAddr),
+ "InitPendingGABuffer");
+}
+
+/*
+ @CommonUp@ commons up two closures which we have discovered to be
+ variants of the same object. One is made an indirection to the other. */
+
+//@cindex CommonUp
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ StgBlockingQueueElement *bqe;
+
+ ASSERT(src != (StgClosure *)NULL && dst != (StgClosure *)NULL);
+ ASSERT(src != dst);
+
+ IF_PAR_DEBUG(verbose,
+ belch("__ CommonUp %p (%s) with %p (%s)",
+ src, info_type(src), dst, info_type(dst)));
+
+ switch (get_itbl(src)->type) {
+ case BLACKHOLE_BQ:
+ bqe = ((StgBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case FETCH_ME_BQ:
+ bqe = ((StgFetchMeBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case RBH:
+ bqe = ((StgRBH *)src)->blocking_queue;
+ break;
+
+ case BLACKHOLE:
+ case FETCH_ME:
+ bqe = END_BQ_QUEUE;
+ break;
+
+ default:
+ /* Don't common up anything else */
+ return;
+ }
+ /* NB: this also awakens the blocking queue for src */
+ UPD_IND(src, dst);
+ // updateWithIndirection(src, dst);
+ /*
+ ASSERT(!IS_BIG_MOTHER(INFO_PTR(dst)));
+ if (bqe != END_BQ_QUEUE)
+ awaken_blocked_queue(bqe, src);
+ */
+}
+
+/*
+ @UnpackGraph@ unpacks the graph contained in a message buffer. It
+ returns a pointer to the new graph. The @gamap@ parameter is set to
+ point to an array of (oldGA,newGA) pairs which were created as a result
+ of unpacking the buffer; @nGAs@ is set to the number of GA pairs which
+ were created.
+
+ The format of graph in the pack buffer is as defined in @Pack.lc@. */
+
+//@cindex UnpackGraph
+StgClosure *
+UnpackGraph(packBuffer, gamap, nGAs)
+rtsPackBuffer *packBuffer;
+globalAddr **gamap;
+nat *nGAs;
+{
+ nat size, ptrs, nonptrs, vhs;
+ StgWord **buffer, **bufptr, **slotptr;
+ globalAddr ga, *gaga;
+ StgClosure *closure, *existing,
+ *graphroot, *graph, *parent;
+ StgInfoTable *ip, *oldip;
+ nat bufsize, i,
+ pptr = 0, pptrs = 0, pvhs;
+ rtsBool hasGA;
+ char str[80];
+
+ initPackBuffer(); /* in case it isn't already init'd */
+ graphroot = (StgClosure *)NULL;
+
+ gaga = PendingGABuffer;
+
+ InitClosureQueue();
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ buffer = packBuffer->buffer;
+ bufptr = buffer;
+
+ /* allocate heap */
+ if (bufsize > 0) {
+ graph = allocate(bufsize);
+ ASSERT(graph != NULL);
+ }
+
+ parent = (StgClosure *)NULL;
+
+ do {
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+
+ /* First, unpack the next GA or PLC */
+ ga.weight = (rtsWeight) *bufptr++;
+
+ if (ga.weight > 0) {
+ ga.payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga.payload.gc.slot = (int) *bufptr++;
+ } else {
+ ga.payload.plc = (StgPtr) *bufptr++;
+ }
+
+ /* Now unpack the closure body, if there is one */
+ if (isFixed(&ga)) {
+ /* No more to unpack; just set closure to local address */
+ IF_PAR_DEBUG(pack,
+ belch("_* Unpacked PLC at %x", ga.payload.plc));
+ hasGA = rtsFalse;
+ closure = ga.payload.plc;
+ } else if (isOffset(&ga)) {
+ /* No more to unpack; just set closure to cached address */
+ IF_PAR_DEBUG(pack,
+ belch("_* Unpacked indirection to %p (was offset %x)",
+ (StgClosure *) buffer[ga.payload.gc.slot],
+ ga.payload.gc.slot));
+ ASSERT(parent != (StgClosure *)NULL);
+ hasGA = rtsFalse;
+ closure = (StgClosure *) buffer[ga.payload.gc.slot];
+ } else {
+ /* Now we have to build something. */
+ hasGA = rtsTrue;
+
+ ASSERT(bufsize > 0);
+
+ /*
+ * Close your eyes. You don't want to see where we're looking. You
+ * can't get closure info until you've unpacked the variable header,
+ * but you don't know how big it is until you've got closure info.
+ * So...we trust that the closure in the buffer is organized the
+ * same way as they will be in the heap...at least up through the
+ * end of the variable header.
+ */
+ ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str);
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* Fill in the fixed header */
+ for (i = 0; i < _HS; i++)
+ ((StgPtr)graph)[i] = (StgWord)*bufptr++;
+
+ if (ip->type == FETCH_ME)
+ size = ptrs = nonptrs = vhs = 0;
+
+ /* Fill in the packed variable header */
+ for (i = 0; i < vhs; i++)
+ ((StgPtr)graph)[_HS + i] = (StgWord)*bufptr++;
+
+ /* Pointers will be filled in later */
+
+ /* Fill in the packed non-pointers */
+ for (i = 0; i < nonptrs; i++)
+ ((StgPtr)graph)[_HS + i + vhs + ptrs] = (StgWord)*bufptr++;
+
+ /* Indirections are never packed */
+ // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE);
+
+ /* Add to queue for processing */
+ QueueClosure(graph);
+
+ /*
+ * Common up the new closure with any existing closure having the same
+ * GA
+ */
+
+ if ((existing = GALAlookup(&ga)) == NULL) {
+ globalAddr *newGA;
+ /* Just keep the new object */
+ IF_PAR_DEBUG(pack,
+ belch("_* Unpacking new GA ((%x, %d, %x))",
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight));
+
+ closure = graph;
+ newGA = setRemoteGA(graph, &ga, rtsTrue);
+ if (ip->type == FETCH_ME)
+ // FETCHME_GA(closure) = newGA;
+ ((StgFetchMe *)closure)->ga = newGA;
+ } else {
+ /* Two closures, one global name. Someone loses */
+ oldip = get_itbl(existing);
+
+ if ((oldip->type == FETCH_ME ||
+ // ToDo: don't pack a GA for these in the first place
+ oldip->type == CONSTR ||
+ oldip->type == CONSTR_1_0 ||
+ oldip->type == CONSTR_0_1 ||
+ oldip->type == CONSTR_2_0 ||
+ oldip->type == CONSTR_1_1 ||
+ oldip->type == CONSTR_0_2 ||
+ IS_BLACK_HOLE(existing)) &&
+ ip->type != FETCH_ME) {
+
+ /* What we had wasn't worth keeping */
+ closure = graph;
+ CommonUp(existing, graph);
+ } else {
+ StgWord ty;
+
+ /*
+ * Either we already had something worthwhile by this name or
+ * the new thing is just another FetchMe. However, the thing we
+ * just unpacked has to be left as-is, or the child unpacking
+ * code will fail. Remember that the way pointer words are
+ * filled in depends on the info pointers of the parents being
+ * the same as when they were packed.
+ */
+ IF_PAR_DEBUG(pack,
+ belch("_* Unpacking old GA ((%x, %d, %x)), keeping %#lx",
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight,
+ existing));
+
+ closure = existing;
+ // HACK
+ ty = get_itbl(closure)->type;
+ if (ty == CONSTR ||
+ ty == CONSTR_1_0 ||
+ ty == CONSTR_0_1 ||
+ ty == CONSTR_2_0 ||
+ ty == CONSTR_1_1 ||
+ ty == CONSTR_0_2)
+ CommonUp(closure, graph);
+
+ }
+ /* Pool the total weight in the stored ga */
+ (void) addWeight(&ga);
+ }
+
+ /* Sort out the global address mapping */
+ if (hasGA || // (ip_THUNK(ip) && !ip_UNPOINTED(ip)) ||
+ (ip_MUTABLE(ip) && ip->type != FETCH_ME)) {
+ /* Make up new GAs for single-copy closures */
+ globalAddr *newGA = makeGlobal(closure, rtsTrue);
+
+ // keep this assertion!
+ // ASSERT(closure == graph);
+
+ /* Create an old GA to new GA mapping */
+ *gaga++ = ga;
+ splitWeight(gaga, newGA);
+ ASSERT(gaga->weight == 1L << (BITS_IN(unsigned) - 1));
+ gaga++;
+ }
+ graph += _HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size);
+ }
+
+ /*
+ * Set parent pointer to point to chosen closure. If we're at the top of
+ * the graph (our parent is NULL), then we want to arrange to return the
+ * chosen closure to our caller (possibly in place of the allocated graph
+ * root.)
+ */
+ if (parent == NULL)
+ graphroot = closure;
+ else
+ ((StgPtr)parent)[_HS + pvhs + pptr] = (StgWord) closure;
+
+ /* Save closure pointer for resolving offsets */
+ *slotptr = (StgWord) closure;
+
+ /* Locate next parent pointer */
+ pptr++;
+ while (pptr + 1 > pptrs) {
+ parent = DeQueueClosure();
+
+ if (parent == NULL)
+ break;
+ else {
+ (void) get_closure_info(parent, &size, &pptrs, &nonptrs,
+ &pvhs, str);
+ pptr = 0;
+ }
+ }
+ } while (parent != NULL);
+
+ //ASSERT(bufsize == 0 || graph - 1 <= SAVE_Hp);
+
+ *gamap = PendingGABuffer;
+ *nGAs = (gaga - PendingGABuffer) / 2;
+
+ /* ToDo: are we *certain* graphroot has been set??? WDP 95/07 */
+ ASSERT(graphroot!=NULL);
+ return (graphroot);
+}
+#endif /* PAR */
+
+//@node GranSim Code, , Local Definitions, Unpacking routines
+//@subsubsection GranSim Code
+
+/*
+ For GrAnSim: No actual unpacking should be necessary. We just
+ have to walk over the graph and set the bitmasks appropriately.
+ Since we use RBHs similarly to GUM but without an ACK message/event
+ we have to revert the RBH from within the UnpackGraph routine (good luck!)
+ -- HWL
+*/
+
+#if defined(GRAN)
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ barf("CommonUp: should never be entered in a GranSim setup");
+}
+
+StgClosure*
+UnpackGraph(buffer)
+rtsPackBuffer* buffer;
+{
+ nat size, ptrs, nonptrs, vhs,
+ bufptr = 0;
+ StgClosure *closure, *graphroot, *graph;
+ StgInfoTable *ip;
+ StgWord bufsize, unpackedsize,
+ pptr = 0, pptrs = 0, pvhs;
+ StgTSO* tso;
+ char str[240], str1[80];
+ int i;
+
+ bufptr = 0;
+ graphroot = buffer->buffer[0];
+
+ tso = buffer->tso;
+
+ /* Unpack the header */
+ unpackedsize = buffer->unpacked_size;
+ bufsize = buffer->size;
+
+ IF_GRAN_DEBUG(pack,
+ belch("<<< Unpacking <<%d>> (buffer @ %p):\n (root @ %p, PE %d,size=%d), demanded by TSO %d (%p)[PE %d]",
+ buffer->id, buffer, graphroot, where_is(graphroot),
+ bufsize, tso->id, tso,
+ where_is((StgClosure *)tso)));
+
+ do {
+ closure = buffer->buffer[bufptr++]; /* that's all we need for GrAnSim -- HWL */
+
+ /* Actually only ip is needed; rest is useful for TESTING -- HWL */
+ ip = get_closure_info(closure,
+ &size, &ptrs, &nonptrs, &vhs, str);
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str, "** (%p): Changing bitmask[%s]: 0x%x ",
+ closure, (closure_HNF(closure) ? "NF" : "__"),
+ PROCS(closure)));
+
+ if (get_itbl(closure)->type == RBH) {
+ /* if it's an RBH, we have to revert it into a normal closure, thereby
+ awakening the blocking queue; not that this is code currently not
+ needed in GUM, but it should be added with the new features in
+ GdH (and the implementation of an NACK message)
+ */
+ // closure->header.gran.procs = PE_NUMBER(CurrentProc);
+ SET_GRAN_HDR(closure, PE_NUMBER(CurrentProc)); /* Move node */
+
+ IF_GRAN_DEBUG(pack,
+ strcat(str, " (converting RBH) "));
+
+ convertFromRBH(closure); /* In GUM that's done by convertToFetchMe */
+
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is a RBH; after reverting: IP=%p",
+ closure, info_type(closure), get_itbl(closure)));
+ } else if (IS_BLACK_HOLE(closure)) {
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is a BH; copying node to %d",
+ closure, info_type(closure), CurrentProc));
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ } else if ( (closure->header.gran.procs & PE_NUMBER(CurrentProc)) == 0 ) {
+ if (closure_HNF(closure)) {
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is a HNF; copying node to %d",
+ closure, info_type(closure), CurrentProc));
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ } else {
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is no (R)BH or HNF; moving node to %d",
+ closure, info_type(closure), CurrentProc));
+ closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */
+ }
+ }
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str1, "0x%x", PROCS(closure)); strcat(str, str1));
+ IF_GRAN_DEBUG(pack, belch(str));
+
+ } while (bufptr<buffer->size) ; /* (parent != NULL); */
+
+ /* In GrAnSim we allocate pack buffers dynamically! -- HWL */
+ free(buffer->buffer);
+ free(buffer);
+
+ IF_GRAN_DEBUG(pack,
+ belch("PrintGraph of %p is:", graphroot); PrintGraph(graphroot,0));
+
+ return (graphroot);
+}
+#endif /* GRAN */
+
+//@node Aux fcts for packing, Printing Packet Contents, Unpacking routines, Graph packing
+//@subsection Aux fcts for packing
+
+//@menu
+//* Offset table::
+//* Packet size::
+//* Types of Global Addresses::
+//* Closure Info::
+//@end menu
+
+//@node Offset table, Packet size, Aux fcts for packing, Aux fcts for packing
+//@subsubsection Offset table
+
+/*
+ DonePacking is called when we've finished packing. It releases memory
+ etc. */
+
+//@cindex DonePacking
+
+# if defined(PAR)
+
+static void
+DonePacking(void)
+{
+ freeHashTable(offsetTable, NULL);
+ offsetTable = NULL;
+}
+
+/*
+ AmPacking records that the closure is being packed. Note the abuse of
+ the data field in the hash table -- this saves calling @malloc@! */
+
+//@cindex AmPacking
+
+static void
+AmPacking(closure)
+StgClosure *closure;
+{
+/* IF_PAR_DEBUG(pack, */
+/* fprintf(stderr, "** AmPacking %p (%s)(IP %p) at %u\n", */
+/* closure, info_type(closure), get_itbl(closure), pack_locn)); */
+
+ insertHashTable(offsetTable, (StgWord) closure, (void *) (StgWord) pack_locn);
+}
+
+/*
+ OffsetFor returns an offset for a closure which is already being packed. */
+
+//@cindex OffsetFor
+
+static int
+OffsetFor(closure)
+StgClosure *closure;
+{
+ return (int) (StgWord) lookupHashTable(offsetTable, (StgWord) closure);
+}
+
+/*
+ NotYetPacking determines whether the closure's already being packed.
+ Offsets $<$ @PACK_HDR_SIZE@ (e.g. 0) mean no. */
+
+//@cindex NotYetPacking
+
+static rtsBool
+NotYetPacking(offset)
+int offset;
+{
+ return(offset == 0); // ToDo: what if root is found again?? FIX
+}
+
+# else /* GRAN */
+
+static void
+DonePacking(void)
+{
+ /* nothing */
+}
+
+/*
+ NotYetPacking searches through the whole pack buffer for closure. */
+
+static rtsBool
+NotYetPacking(closure)
+StgClosure *closure;
+{ nat i;
+ rtsBool found = rtsFalse;
+
+ for (i=0; (i<pack_locn) && !found; i++)
+ found = Bonzo->buffer[i]==closure;
+
+ return (!found);
+}
+# endif
+
+//@node Packet size, Types of Global Addresses, Offset table, Aux fcts for packing
+//@subsubsection Packet size
+
+/*
+ RoomToPack determines whether there's room to pack the closure into
+ the pack buffer based on
+
+ o how full the buffer is already,
+ o the closures' size and number of pointers (which must be packed as GAs),
+ o the size and number of pointers held by any primitive arrays that it
+ points to
+
+ It has a *side-effect* (naughty, naughty) in assigning RoomInBuffer
+ to rtsFalse.
+*/
+
+//@cindex RoomToPack
+static rtsBool
+RoomToPack(size, ptrs)
+nat size, ptrs;
+{
+# if defined(PAR)
+ if (RoomInBuffer &&
+ (pack_locn + reservedPAsize + size +
+ ((clq_size - clq_pos) + ptrs) * PACK_FETCHME_SIZE >= RTS_PACK_BUFFER_SIZE))
+ {
+ IF_PAR_DEBUG(pack,
+ fprintf(stderr, "Buffer full\n"));
+
+ RoomInBuffer = rtsFalse;
+ }
+# else /* GRAN */
+ if (RoomInBuffer &&
+ (unpacked_size + reservedPAsize + size +
+ ((clq_size - clq_pos) + ptrs) * PACK_FETCHME_SIZE >= RTS_PACK_BUFFER_SIZE))
+ {
+ IF_GRAN_DEBUG(packBuffer,
+ fprintf(stderr, "Buffer full\n"));
+ RoomInBuffer = rtsFalse;
+ }
+# endif
+ return (RoomInBuffer);
+}
+
+//@node Types of Global Addresses, Closure Info, Packet size, Aux fcts for packing
+//@subsubsection Types of Global Addresses
+
+/*
+ Types of Global Addresses
+
+ These routines determine whether a GA is one of a number of special types
+ of GA.
+*/
+
+# if defined(PAR)
+//@cindex isOffset
+rtsBool
+isOffset(ga)
+globalAddr *ga;
+{
+ return (ga->weight == 1 && ga->payload.gc.gtid == 0);
+}
+
+//@cindex isFixed
+rtsBool
+isFixed(ga)
+globalAddr *ga;
+{
+ return (ga->weight == 0);
+}
+# endif
+
+//@node Closure Info, , Types of Global Addresses, Aux fcts for packing
+//@subsubsection Closure Info
+
+/*
+ Closure Info
+
+ @get_closure_info@ determines the size, number of pointers etc. for this
+ type of closure -- see @SMInfoTables.lh@ for the legal info. types etc.
+
+[Can someone please keep this function up to date. I keep needing it
+ (or something similar) for interpretive code, and it keeps
+ bit-rotting. {\em It really belongs somewhere else too}. KH @@ 17/2/95] */
+
+#if 0
+
+// {Parallel.h}Daq ngoqvam vIroQpu'
+
+# if defined(GRAN) || defined(PAR)
+/* extracting specific info out of closure; currently only used in GRAN -- HWL */
+//@cindex get_closure_info
+StgInfoTable*
+get_closure_info(node, size, ptrs, nonptrs, vhs, info_hdr_ty)
+StgClosure* node;
+nat *size, *ptrs, *nonptrs, *vhs;
+char *info_hdr_ty;
+{
+ StgInfoTable *info;
+
+ info = get_itbl(node);
+ /* the switch shouldn't be necessary, really; just use default case */
+ switch (info->type) {
+#if 0
+ case CONSTR_1_0:
+ case THUNK_1_0:
+ case FUN_1_0:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 1; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 0; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_0_1:
+ case THUNK_0_1:
+ case FUN_0_1:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 0; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 1; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_2_0:
+ case THUNK_2_0:
+ case FUN_2_0:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 2; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 0; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_1_1:
+ case THUNK_1_1:
+ case FUN_1_1:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 1; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 1; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_0_2:
+ case THUNK_0_2:
+ case FUN_0_2:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 0; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 2; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+#endif
+ case RBH:
+ {
+ StgInfoTable *rip = REVERT_INFOPTR(info); // closure to revert to
+ *size = sizeW_fromITBL(rip);
+ *ptrs = (nat) (rip->layout.payload.ptrs);
+ *nonptrs = (nat) (rip->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return rip; // NB: we return the reverted info ptr for a RBH!!!!!!
+ }
+
+ default:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) (info->layout.payload.ptrs);
+ *nonptrs = (nat) (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+ }
+}
+
+//@cindex IS_BLACK_HOLE
+rtsBool
+IS_BLACK_HOLE(StgClosure* node)
+{
+ StgInfoTable *info;
+ info = get_itbl(node);
+ return ((info->type == BLACKHOLE || info->type == RBH) ? rtsTrue : rtsFalse);
+}
+
+//@cindex IS_INDIRECTION
+StgClosure *
+IS_INDIRECTION(StgClosure* node)
+{
+ StgInfoTable *info;
+ info = get_itbl(node);
+ switch (info->type) {
+ case IND:
+ case IND_OLDGEN:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ /* relies on indirectee being at same place for all these closure types */
+ return (((StgInd*)node) -> indirectee);
+ default:
+ return NULL;
+ }
+}
+
+/*
+rtsBool
+IS_THUNK(StgClosure* node)
+{
+ StgInfoTable *info;
+ info = get_itbl(node);
+ return ((info->type == THUNK ||
+ info->type == THUNK_STATIC ||
+ info->type == THUNK_SELECTOR) ? rtsTrue : rtsFalse);
+}
+*/
+
+# endif /* GRAN */
+#endif /* 0 */
+
+# if 0
+/* ngoq ngo' */
+
+P_
+get_closure_info(closure, size, ptrs, nonptrs, vhs, type)
+P_ closure;
+W_ *size, *ptrs, *nonptrs, *vhs;
+char *type;
+{
+ P_ ip = (P_) INFO_PTR(closure);
+
+ if (closure==NULL) {
+ fprintf(stderr, "Qagh {get_closure_info}Daq: NULL closure\n");
+ *size = *ptrs = *nonptrs = *vhs = 0;
+ strcpy(type,"ERROR in get_closure_info");
+ return;
+ } else if (closure==PrelBase_Z91Z93_closure) {
+ /* fprintf(stderr, "Qagh {get_closure_info}Daq: PrelBase_Z91Z93_closure closure\n"); */
+ *size = *ptrs = *nonptrs = *vhs = 0;
+ strcpy(type,"PrelBase_Z91Z93_closure");
+ return;
+ };
+
+ ip = (P_) INFO_PTR(closure);
+
+ switch (INFO_TYPE(ip)) {
+ case INFO_SPEC_U_TYPE:
+ case INFO_SPEC_S_TYPE:
+ case INFO_SPEC_N_TYPE:
+ *size = SPEC_CLOSURE_SIZE(closure);
+ *ptrs = SPEC_CLOSURE_NoPTRS(closure);
+ *nonptrs = SPEC_CLOSURE_NoNONPTRS(closure);
+ *vhs = 0 /*SPEC_VHS*/;
+ strcpy(type,"SPEC");
+ break;
+
+ case INFO_GEN_U_TYPE:
+ case INFO_GEN_S_TYPE:
+ case INFO_GEN_N_TYPE:
+ *size = GEN_CLOSURE_SIZE(closure);
+ *ptrs = GEN_CLOSURE_NoPTRS(closure);
+ *nonptrs = GEN_CLOSURE_NoNONPTRS(closure);
+ *vhs = GEN_VHS;
+ strcpy(type,"GEN");
+ break;
+
+ case INFO_DYN_TYPE:
+ *size = DYN_CLOSURE_SIZE(closure);
+ *ptrs = DYN_CLOSURE_NoPTRS(closure);
+ *nonptrs = DYN_CLOSURE_NoNONPTRS(closure);
+ *vhs = DYN_VHS;
+ strcpy(type,"DYN");
+ break;
+
+ case INFO_TUPLE_TYPE:
+ *size = TUPLE_CLOSURE_SIZE(closure);
+ *ptrs = TUPLE_CLOSURE_NoPTRS(closure);
+ *nonptrs = TUPLE_CLOSURE_NoNONPTRS(closure);
+ *vhs = TUPLE_VHS;
+ strcpy(type,"TUPLE");
+ break;
+
+ case INFO_DATA_TYPE:
+ *size = DATA_CLOSURE_SIZE(closure);
+ *ptrs = DATA_CLOSURE_NoPTRS(closure);
+ *nonptrs = DATA_CLOSURE_NoNONPTRS(closure);
+ *vhs = DATA_VHS;
+ strcpy(type,"DATA");
+ break;
+
+ case INFO_IMMUTUPLE_TYPE:
+ case INFO_MUTUPLE_TYPE:
+ *size = MUTUPLE_CLOSURE_SIZE(closure);
+ *ptrs = MUTUPLE_CLOSURE_NoPTRS(closure);
+ *nonptrs = MUTUPLE_CLOSURE_NoNONPTRS(closure);
+ *vhs = MUTUPLE_VHS;
+ strcpy(type,"(IM)MUTUPLE");
+ break;
+
+ case INFO_STATIC_TYPE:
+ *size = STATIC_CLOSURE_SIZE(closure);
+ *ptrs = STATIC_CLOSURE_NoPTRS(closure);
+ *nonptrs = STATIC_CLOSURE_NoNONPTRS(closure);
+ *vhs = STATIC_VHS;
+ strcpy(type,"STATIC");
+ break;
+
+ case INFO_CAF_TYPE:
+ case INFO_IND_TYPE:
+ *size = IND_CLOSURE_SIZE(closure);
+ *ptrs = IND_CLOSURE_NoPTRS(closure);
+ *nonptrs = IND_CLOSURE_NoNONPTRS(closure);
+ *vhs = IND_VHS;
+ strcpy(type,"CAF|IND");
+ break;
+
+ case INFO_CONST_TYPE:
+ *size = CONST_CLOSURE_SIZE(closure);
+ *ptrs = CONST_CLOSURE_NoPTRS(closure);
+ *nonptrs = CONST_CLOSURE_NoNONPTRS(closure);
+ *vhs = CONST_VHS;
+ strcpy(type,"CONST");
+ break;
+
+ case INFO_SPEC_RBH_TYPE:
+ *size = SPEC_RBH_CLOSURE_SIZE(closure);
+ *ptrs = SPEC_RBH_CLOSURE_NoPTRS(closure);
+ *nonptrs = SPEC_RBH_CLOSURE_NoNONPTRS(closure);
+ if (*ptrs <= 2) {
+ *nonptrs -= (2 - *ptrs);
+ *ptrs = 1;
+ } else
+ *ptrs -= 1;
+ *vhs = SPEC_RBH_VHS;
+ strcpy(type,"SPEC_RBH");
+ break;
+
+ case INFO_GEN_RBH_TYPE:
+ *size = GEN_RBH_CLOSURE_SIZE(closure);
+ *ptrs = GEN_RBH_CLOSURE_NoPTRS(closure);
+ *nonptrs = GEN_RBH_CLOSURE_NoNONPTRS(closure);
+ if (*ptrs <= 2) {
+ *nonptrs -= (2 - *ptrs);
+ *ptrs = 1;
+ } else
+ *ptrs -= 1;
+ *vhs = GEN_RBH_VHS;
+ strcpy(type,"GEN_RBH");
+ break;
+
+ case INFO_CHARLIKE_TYPE:
+ *size = CHARLIKE_CLOSURE_SIZE(closure);
+ *ptrs = CHARLIKE_CLOSURE_NoPTRS(closure);
+ *nonptrs = CHARLIKE_CLOSURE_NoNONPTRS(closure);
+ *vhs = CHARLIKE_VHS;
+ strcpy(type,"CHARLIKE");
+ break;
+
+ case INFO_INTLIKE_TYPE:
+ *size = INTLIKE_CLOSURE_SIZE(closure);
+ *ptrs = INTLIKE_CLOSURE_NoPTRS(closure);
+ *nonptrs = INTLIKE_CLOSURE_NoNONPTRS(closure);
+ *vhs = INTLIKE_VHS;
+ strcpy(type,"INTLIKE");
+ break;
+
+# if !defined(GRAN)
+ case INFO_FETCHME_TYPE:
+ *size = FETCHME_CLOSURE_SIZE(closure);
+ *ptrs = FETCHME_CLOSURE_NoPTRS(closure);
+ *nonptrs = FETCHME_CLOSURE_NoNONPTRS(closure);
+ *vhs = FETCHME_VHS;
+ strcpy(type,"FETCHME");
+ break;
+
+ case INFO_FMBQ_TYPE:
+ *size = FMBQ_CLOSURE_SIZE(closure);
+ *ptrs = FMBQ_CLOSURE_NoPTRS(closure);
+ *nonptrs = FMBQ_CLOSURE_NoNONPTRS(closure);
+ *vhs = FMBQ_VHS;
+ strcpy(type,"FMBQ");
+ break;
+# endif
+
+ case INFO_BQ_TYPE:
+ *size = BQ_CLOSURE_SIZE(closure);
+ *ptrs = BQ_CLOSURE_NoPTRS(closure);
+ *nonptrs = BQ_CLOSURE_NoNONPTRS(closure);
+ *vhs = BQ_VHS;
+ strcpy(type,"BQ");
+ break;
+
+ case INFO_BH_TYPE:
+ *size = BH_CLOSURE_SIZE(closure);
+ *ptrs = BH_CLOSURE_NoPTRS(closure);
+ *nonptrs = BH_CLOSURE_NoNONPTRS(closure);
+ *vhs = BH_VHS;
+ strcpy(type,"BH");
+ break;
+
+ case INFO_TSO_TYPE:
+ *size = 0; /* TSO_CLOSURE_SIZE(closure); */
+ *ptrs = 0; /* TSO_CLOSURE_NoPTRS(closure); */
+ *nonptrs = 0; /* TSO_CLOSURE_NoNONPTRS(closure); */
+ *vhs = TSO_VHS;
+ strcpy(type,"TSO");
+ break;
+
+ case INFO_STKO_TYPE:
+ *size = 0;
+ *ptrs = 0;
+ *nonptrs = 0;
+ *vhs = STKO_VHS;
+ strcpy(type,"STKO");
+ break;
+
+ default:
+ fprintf(stderr, "get_closure_info: Unexpected closure type (%lu), closure %lx\n",
+ INFO_TYPE(ip), (StgWord) closure);
+ EXIT(EXIT_FAILURE);
+ }
+
+ return ip;
+}
+# endif
+
+# if 0
+// Use allocate in Storage.c instead
+/*
+ @AllocateHeap@ will bump the heap pointer by @size@ words if the space
+ is available, but it will not perform garbage collection.
+ ToDo: check whether we can use an existing STG allocation routine -- HWL
+*/
+
+
+//@cindex AllocateHeap
+StgPtr
+AllocateHeap(size)
+nat size;
+{
+ StgPtr newClosure;
+
+ /* Allocate a new closure */
+ if (Hp + size > HpLim)
+ return NULL;
+
+ newClosure = Hp + 1;
+ Hp += size;
+
+ return newClosure;
+}
+# endif
+
+# if defined(PAR)
+
+//@cindex doGlobalGC
+void
+doGlobalGC(void)
+{
+ fprintf(stderr,"Splat -- we just hit global GC!\n");
+ stg_exit(EXIT_FAILURE);
+ //fishing = rtsFalse;
+ outstandingFishes--;
+}
+
+# endif /* PAR */
+
+//@node Printing Packet Contents, End of file, Aux fcts for packing, Graph packing
+//@subsection Printing Packet Contents
+/*
+ Printing Packet Contents
+ */
+
+#if defined(DEBUG) || defined(GRAN_CHECK)
+
+//@cindex PrintPacket
+
+#if defined(PAR)
+void
+PrintPacket(packBuffer)
+rtsPackBuffer *packBuffer;
+{
+ StgClosure *parent, *graphroot, *closure_start;
+ StgInfoTable *ip, *oldip;
+ globalAddr ga;
+ StgWord **buffer, **bufptr, **slotptr;
+
+ nat bufsize;
+ nat pptr = 0, pptrs = 0, pvhs;
+ nat unpack_locn = 0;
+ nat gastart = unpack_locn;
+ nat closurestart = unpack_locn;
+ nat i;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /* NB: this whole routine is more or less a copy of UnpackGraph with all
+ unpacking components replaced by printing fcts
+ Long live higher-order fcts!
+ */
+ initPackBuffer(); /* in case it isn't already init'd */
+ graphroot = (StgClosure *)NULL;
+
+ // gaga = PendingGABuffer;
+
+ InitClosureQueue();
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ buffer = packBuffer->buffer;
+ bufptr = buffer;
+
+ /* allocate heap
+ if (bufsize > 0) {
+ graph = allocate(bufsize);
+ ASSERT(graph != NULL);
+ }
+ */
+
+ fprintf(stderr, ".* Printing <<%d>> (buffer @ %p):\n",
+ packBuffer->id, packBuffer);
+ fprintf(stderr, ".* size: %d; unpacked_size: %d; tso: %p; buffer: %p\n",
+ packBuffer->size, packBuffer->unpacked_size,
+ packBuffer->tso, packBuffer->buffer);
+
+ parent = (StgClosure *)NULL;
+
+ do {
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+
+ /* First, unpack the next GA or PLC */
+ ga.weight = (rtsWeight) *bufptr++;
+
+ if (ga.weight > 0) {
+ ga.payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga.payload.gc.slot = (int) *bufptr++;
+ } else
+ ga.payload.plc = (StgPtr) *bufptr++;
+
+ /* Now unpack the closure body, if there is one */
+ if (isFixed(&ga)) {
+ fprintf(stderr, ".* [%u]: PLC @ %p\n", gastart, ga.payload.plc);
+ // closure = ga.payload.plc;
+ } else if (isOffset(&ga)) {
+ fprintf(stderr, ".* [%u]: OFFSET TO [%d]\n", gastart, ga.payload.gc.slot);
+ // closure = (StgClosure *) buffer[ga.payload.gc.slot];
+ } else {
+ /* Print normal closures */
+
+ ASSERT(bufsize > 0);
+
+ fprintf(stderr, ".* [%u]: ((%x, %d, %x)) ", gastart,
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight);
+
+ closure_start = bufptr;
+ ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str);
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* Print fixed header */
+ fprintf(stderr, "FH [");
+ for (i = 0; i < _HS; i++)
+ fprintf(stderr, " %p", *bufptr++);
+
+ if (ip->type == FETCH_ME)
+ size = ptrs = nonptrs = vhs = 0;
+
+ /* Print variable header */
+ fprintf(stderr, "] VH [");
+ for (i = 0; i < vhs; i++)
+ fprintf(stderr, " %p", *bufptr++);
+
+ fprintf(stderr, "] %d PTRS [", ptrs);
+
+ /* Pointers will be filled in later */
+
+ fprintf(stderr, " ] %d NON-PTRS [", nonptrs);
+ /* Print non-pointers */
+ for (i = 0; i < nonptrs; i++)
+ fprintf(stderr, " %p", *bufptr++);
+
+ fprintf(stderr, "] (%s)\n", str);
+
+ /* Indirections are never packed */
+ // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE);
+
+ /* Add to queue for processing
+ When just printing the packet we do not have an unpacked closure
+ in hand, so we feed it the packet entry;
+ again, this assumes that at least the fixed header of the closure
+ has the same layout in the packet; also we may not overwrite entries
+ in the packet (done in Unpack), but for printing that's a bad idea
+ anyway */
+ QueueClosure((StgClosure *)closure_start);
+
+ /* No Common up needed for printing */
+
+ /* No Sort out the global address mapping for printing */
+
+ } /* normal closure case */
+
+ /* Locate next parent pointer */
+ pptr++;
+ while (pptr + 1 > pptrs) {
+ parent = DeQueueClosure();
+
+ if (parent == NULL)
+ break;
+ else {
+ (void) get_closure_info(parent, &size, &pptrs, &nonptrs,
+ &pvhs, str);
+ pptr = 0;
+ }
+ }
+ } while (parent != NULL);
+ fprintf(stderr, ".* --- End packet <<%d>> ---\n", packBuffer->id);
+}
+#else /* GRAN */
+void
+PrintPacket(buffer)
+rtsPackBuffer *buffer;
+{
+ // extern char *info_hdr_type(P_ infoptr); /* defined in Threads.lc */
+ // extern char *display_info_type(P_ infoptr); /* defined in Threads.lc */
+
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs;
+ char info_hdr_ty[80];
+ char str1[80], str2[80], junk_str[80];
+
+ /* globalAddr ga; */
+
+ nat bufsize, unpacked_size ;
+ StgClosure *parent;
+ nat pptr = 0, pptrs = 0, pvhs;
+
+ nat unpack_locn = 0;
+ nat gastart = unpack_locn;
+ nat closurestart = unpack_locn;
+
+ StgTSO *tso;
+ StgClosure *closure, *p;
+
+ nat i;
+
+ fprintf(stderr, "*** Printing <<%d>> (buffer @ %p):\n", buffer->id, buffer);
+ fprintf(stderr, " size: %d; unpacked_size: %d; tso: %d (%p); buffer: %p\n",
+ buffer->size, buffer->unpacked_size, buffer->tso, buffer->buffer);
+ fputs(" contents: ", stderr);
+ for (unpack_locn=0; unpack_locn<buffer->size; unpack_locn++) {
+ closure = buffer->buffer[unpack_locn];
+ fprintf(stderr, ", %p (%s)",
+ closure, info_type(closure));
+ }
+ fputc('\n', stderr);
+
+#if 0
+ /* traverse all elements of the graph; omitted for now, but might be usefule */
+ InitClosureQueue();
+
+ tso = buffer->tso;
+
+ /* Unpack the header */
+ unpacked_size = buffer->unpacked_size;
+ bufsize = buffer->size;
+
+ fprintf(stderr, "Packet %p, size %u (unpacked size is %u); demanded by TSO %d (%p)[PE %d]\n--- Begin ---\n",
+ buffer, bufsize, unpacked_size,
+ tso->id, tso, where_is((StgClosure*)tso));
+
+ do {
+ closurestart = unpack_locn;
+ closure = buffer->buffer[unpack_locn++];
+
+ fprintf(stderr, "[%u]: (%p) ", closurestart, closure);
+
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str1);
+ strcpy(str2, str1);
+ fprintf(stderr, "(%s|%s) ", str1, str2);
+
+ if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||
+ IS_BLACK_HOLE(closure))
+ size = ptrs = nonptrs = vhs = 0;
+
+ if (closure_THUNK(closure)) {
+ if (closure_UNPOINTED(closure))
+ fputs("UNPOINTED ", stderr);
+ else
+ fputs("POINTED ", stderr);
+ }
+ if (IS_BLACK_HOLE(closure)) {
+ fputs("BLACK HOLE\n", stderr);
+ } else {
+ /* Fixed header */
+ fprintf(stderr, "FH [");
+ for (i = 0, p = (StgClosure*)&(closure->header); i < _HS; i++, p++)
+ fprintf(stderr, " %p", *p);
+
+ /* Variable header
+ if (vhs > 0) {
+ fprintf(stderr, "] VH [%p", closure->payload[_HS]);
+
+ for (i = 1; i < vhs; i++)
+ fprintf(stderr, " %p", closure->payload[_HS+i]);
+ }
+ */
+ fprintf(stderr, "] PTRS %u", ptrs);
+
+ /* Non-pointers */
+ if (nonptrs > 0) {
+ fprintf(stderr, " NPTRS [%p", closure->payload[_HS+vhs]);
+
+ for (i = 1; i < nonptrs; i++)
+ fprintf(stderr, " %p", closure->payload[_HS+vhs+i]);
+
+ putc(']', stderr);
+ }
+ putc('\n', stderr);
+ }
+ } while (unpack_locn<bufsize) ; /* (parent != NULL); */
+
+ fprintf(stderr, "--- End ---\n\n");
+#endif /* 0 */
+}
+#endif /* PAR */
+#endif /* DEBUG || GRAN_CHECK */
+
+#endif /* PAR || GRAN -- whole file */
+
+//@node End of file, , Printing Packet Contents, Graph packing
+//@subsection End of file
+//@index
+//* AllocClosureQueue:: @cindex\s-+AllocClosureQueue
+//* AllocateHeap:: @cindex\s-+AllocateHeap
+//* AmPacking:: @cindex\s-+AmPacking
+//* CommonUp:: @cindex\s-+CommonUp
+//* DeQueueClosure:: @cindex\s-+DeQueueClosure
+//* DonePacking:: @cindex\s-+DonePacking
+//* IS_BLACK_HOLE:: @cindex\s-+IS_BLACK_HOLE
+//* IS_INDIRECTION:: @cindex\s-+IS_INDIRECTION
+//* InitClosureQueue:: @cindex\s-+InitClosureQueue
+//* InitPendingGABuffer:: @cindex\s-+InitPendingGABuffer
+//* NotYetPacking:: @cindex\s-+NotYetPacking
+//* OffsetFor:: @cindex\s-+OffsetFor
+//* Pack:: @cindex\s-+Pack
+//* PackClosure:: @cindex\s-+PackClosure
+//* PackNearbyGraph:: @cindex\s-+PackNearbyGraph
+//* PackOneNode:: @cindex\s-+PackOneNode
+//* PackPLC:: @cindex\s-+PackPLC
+//* PackStkO:: @cindex\s-+PackStkO
+//* PackTSO:: @cindex\s-+PackTSO
+//* PendingGABuffer:: @cindex\s-+PendingGABuffer
+//* PrintPacket:: @cindex\s-+PrintPacket
+//* QueueClosure:: @cindex\s-+QueueClosure
+//* QueueEmpty:: @cindex\s-+QueueEmpty
+//* RoomToPack:: @cindex\s-+RoomToPack
+//* UnpackGraph:: @cindex\s-+UnpackGraph
+//* doGlobalGC:: @cindex\s-+doGlobalGC
+//* get_closure_info:: @cindex\s-+get_closure_info
+//* get_closure_info:: @cindex\s-+get_closure_info
+//* initPackBuffer:: @cindex\s-+initPackBuffer
+//* isFixed:: @cindex\s-+isFixed
+//* isOffset:: @cindex\s-+isOffset
+//* offsetTable:: @cindex\s-+offsetTable
+//@end index
--- /dev/null
+/* --------------------------------------------------------------------------
+ Time-stamp: <Sat Dec 04 1999 18:26:22 Stardate: [-30]3998.84 hwloidl>
+ $Id: ParInit.c,v 1.2 2000/01/13 14:34:08 hwloidl Exp $
+
+ Initialising the parallel RTS
+
+ An extension based on Kevin Hammond's GRAPH for PVM version
+ P. Trinder, January 17th 1995.
+ Adapted for the new RTS
+ P. Trinder, July 1997.
+ H-W. Loidl, November 1999.
+
+ ------------------------------------------------------------------------ */
+
+#ifdef PAR /* whole file */
+
+#define NON_POSIX_SOURCE /* so says Solaris */
+
+//@menu
+//* Includes::
+//* Global variables::
+//* Initialisation Routines::
+//@end menu
+
+//@node Includes, Global variables
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "ParallelRts.h"
+#include <setjmp.h>
+#include "LLC.h"
+#include "HLC.h"
+
+//@node Global variables, Initialisation Routines, Includes
+//@subsection Global variables
+
+/* Global conditions defined here. */
+
+rtsBool IAmMainThread = rtsFalse, /* Set for the main thread */
+ GlobalStopPending = rtsFalse; /* Terminating */
+
+/* Task identifiers for various interesting global tasks. */
+
+GlobalTaskId IOTask = 0, /* The IO Task Id */
+ SysManTask = 0, /* The System Manager Task Id */
+ mytid = 0; /* This PE's Task Id */
+
+rtsTime main_start_time; /* When the program started */
+rtsTime main_stop_time; /* When the program finished */
+jmp_buf exit_parallel_system; /* How to abort from the RTS */
+
+
+//rtsBool fishing = rtsFalse; /* We have no fish out in the stream */
+rtsTime last_fish_arrived_at = 0; /* Time of arrival of most recent fish*/
+nat outstandingFishes = 0; /* Number of active fishes */
+
+//@cindex spark queue
+/* GranSim: a globally visible array of spark queues */
+rtsSpark *pending_sparks_hd[SPARK_POOLS], /* ptr to start of a spark pool */
+ *pending_sparks_tl[SPARK_POOLS], /* ptr to end of a spark pool */
+ *pending_sparks_lim[SPARK_POOLS],
+ *pending_sparks_base[SPARK_POOLS];
+
+//@cindex spark_limit
+/* max number of sparks permitted on the PE;
+ see RtsFlags.ParFlags.maxLocalSparks */
+nat spark_limit[SPARK_POOLS];
+
+globalAddr theGlobalFromGA, theGlobalToGA;
+/*
+ HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK !! see FETCH_ME_entry
+ Only used within FETCH_ME_entry as local vars, but they shouldn't
+ be defined locally in there -- that would move %esp and you'll never
+ return from STG land.
+ -- HWL
+*/
+globalAddr *rga_GLOBAL;
+globalAddr *lga_GLOBAL;
+globalAddr fmbqga_GLOBAL;
+StgClosure *p_GLOBAL;
+
+//@cindex PendingFetches
+/* A list of fetch reply messages not yet processed; this list is filled
+ by awaken_blocked_queue and processed by processFetches */
+StgBlockedFetch *PendingFetches = END_BF_QUEUE;
+
+//@cindex allPEs
+GlobalTaskId *allPEs;
+
+//@cindex nPEs
+nat nPEs = 0;
+
+//@cindex sparksIgnored
+nat sparksIgnored = 0, sparksCreated = 0,
+ threadsIgnored = 0, threadsCreated = 0;
+
+//@cindex advisory_thread_count
+nat advisory_thread_count = 0;
+
+/* Where to write the log file
+ This is now in Parallel.c
+FILE *gr_file = NULL;
+char gr_filename[STATS_FILENAME_MAXLEN];
+*/
+
+/* Flag handling. */
+
+#if 0
+/* that's now all done via RtsFlags.ParFlags... */
+rtsBool TraceSparks = rtsFalse; /* Enable the spark trace mode */
+rtsBool SparkLocally = rtsFalse; /* Use local threads if possible */
+rtsBool DelaySparks = rtsFalse; /* Use delayed sparking */
+rtsBool LocalSparkStrategy = rtsFalse; /* Either delayed threads or local threads*/
+rtsBool GlobalSparkStrategy = rtsFalse; /* Export all threads */
+
+rtsBool DeferGlobalUpdates = rtsFalse; /* Defer updating of global nodes */
+#endif
+
+//@node Initialisation Routines, , Global variables
+//@subsection Initialisation Routines
+
+/*
+ par_exit defines how to terminate the program. If the exit code is
+ non-zero (i.e. an error has occurred), the PE should not halt until
+ outstanding error messages have been processed. Otherwise, messages
+ might be sent to non-existent Task Ids. The infinite loop will actually
+ terminate, since STG_Exception will call myexit\tr{(0)} when
+ it received a PP_FINISH from the system manager task.
+*/
+//@cindex par_exit
+void
+shutdownParallelSystem(StgInt n)
+{
+ belch(" entered shutdownParallelSystem ...");
+ ASSERT(GlobalStopPending = rtsTrue);
+ sendOp(PP_FINISH, SysManTask);
+ if (n != 0)
+ waitForTermination();
+ else
+ waitForPEOp(PP_FINISH, SysManTask);
+ shutDownPE();
+ IF_PAR_DEBUG(verbose,
+ belch("--++ shutting down PE %lx, %ld sparks created, %ld sparks Ignored, %ld threads created, %ld threads Ignored",
+ (W_) mytid, sparksCreated, sparksIgnored,
+ threadsCreated, threadsIgnored));
+ exit(n);
+}
+
+/* Some prototypes */
+void srand48 (long);
+time_t time (time_t *);
+
+//@cindex initParallelSystem
+void
+initParallelSystem(void)
+{
+ belch("entered initParallelSystem ...");
+
+ /* Don't buffer standard channels... */
+ setbuf(stdout,NULL);
+ setbuf(stderr,NULL);
+
+ srand48(time(NULL) * getpid()); /*Initialise Random-number generator seed*/
+ /* Used to select target of FISH message*/
+
+ theGlobalFromGA.payload.gc.gtid = 0;
+ theGlobalToGA.payload.gc.gtid = 0;
+
+ //IF_PAR_DEBUG(verbose,
+ belch("initPackBuffer ...");
+ if (!initPackBuffer())
+ barf("initPackBuffer");
+
+ // IF_PAR_DEBUG(verbose,
+ belch("initMoreBuffers ...");
+ if (!initMoreBuffers())
+ barf("initMoreBuffers");
+
+ // IF_PAR_DEBUG(verbose,
+ belch("initSparkPools ...");
+ if (!initSparkPools())
+ barf("initSparkPools");
+}
+
+/*
+ * SynchroniseSystem synchronises the reduction task with the system
+ * manager, and initialises the Global address tables (LAGA & GALA)
+ */
+
+//@cindex SynchroniseSystem
+void
+SynchroniseSystem(void)
+{
+ int i;
+
+ fprintf(stderr, "SynchroniseSystem: nPEs=%d\n", nPEs);
+
+ initEachPEHook(); /* HWL: hook to be execed on each PE */
+
+ fprintf(stderr, "SynchroniseSystem: initParallelSystem\n");
+ initParallelSystem();
+ allPEs = startUpPE(nPEs);
+
+ /* Initialize global address tables */
+ initGAtables();
+
+ /* Record the shortened the PE identifiers for LAGA etc. tables */
+ for (i = 0; i < nPEs; ++i) {
+ fprintf(stderr, "[%x] registering %d-th PE as %x\n", mytid, i, allPEs[i]);
+ registerTask(allPEs[i]);
+ }
+}
+
+#endif /* PAR -- whole file */
+
+//@index
+//* PendingFetches:: @cindex\s-+PendingFetches
+//* SynchroniseSystem:: @cindex\s-+SynchroniseSystem
+//* allPEs:: @cindex\s-+allPEs
+//* initParallelSystem:: @cindex\s-+initParallelSystem
+//* nPEs:: @cindex\s-+nPEs
+//* par_exit:: @cindex\s-+par_exit
+//* spark queue:: @cindex\s-+spark queue
+//* sparksIgnored:: @cindex\s-+sparksIgnored
+//@end index
--- /dev/null
+/* -----------------------------------------------------------------------------
+ * ParInit.h,1
+ *
+ * Phil Trinder
+ * July 1998
+ *
+ * External Parallel Initialisation Interface
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef PARINIT_H
+#define PARINIT_H
+
+extern void RunParallelSystem (P_);
+extern void initParallelSystem(void);
+extern void SynchroniseSystem(void);
+extern void par_exit(I_);
+
+#endif PARINIT_H
--- /dev/null
+/* ---------------------------------------------------------------------------
+ * Time-stamp: <Tue Nov 09 1999 16:31:38 Stardate: [-30]3873.44 hwloidl>
+ * $Id: ParTypes.h,v 1.2 2000/01/13 14:34:08 hwloidl Exp $
+ *
+ * Runtime system types for GUM
+ *
+ * ------------------------------------------------------------------------- */
+
+#ifndef PARTYPES_H
+#define PARTYPES_H
+
+#ifdef PAR /* all of it */
+
+// now in Parallel.h
+//typedef struct hashtable HashTable;
+//typedef struct hashlist HashList;
+
+/* Global addresses now live in Parallel.h (needed in Closures.h) */
+// gaddr
+
+// now in Parallel.h
+/* (GA, LA) pairs
+typedef struct gala {
+ globalAddr ga;
+ StgPtr la;
+ struct gala *next;
+ rtsBool preferred;
+} rtsGaLa;
+*/
+
+#if defined(GRAN)
+typedef unsigned long TIME;
+typedef unsigned char Proc;
+typedef unsigned char EVTTYPE;
+#endif
+
+#endif /* PAR */
+
+#endif /* ! PARTYPES_H */
--- /dev/null
+/*
+ Time-stamp: <Sat Dec 04 1999 19:43:39 Stardate: [-30]3999.10 hwloidl>
+
+ Basic functions for use in either GranSim or GUM.
+*/
+
+#if defined(GRAN) || defined(PAR) /* whole file */
+
+//@menu
+//* Includes::
+//* Variables and constants::
+//* Writing to the log-file::
+//* Dumping routines::
+//@end menu
+
+//@node Includes, Variables and constants
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+
+
+//@node Variables and constants, Writing to the log-file, Includes
+//@subsection Variables and constants
+
+/* Where to write the log file */
+FILE *gr_file = NULL;
+char gr_filename[STATS_FILENAME_MAXLEN];
+
+//@node Writing to the log-file, Dumping routines, Variables and constants
+//@subsection Writing to the log-file
+/*
+ Writing to the log-file
+
+ These routines dump event-based info to the main log-file.
+ The code for writing log files is shared between GranSim and GUM.
+*/
+
+/*
+ * If you're not using GNUC and you're on a 32-bit machine, you're
+ * probably out of luck here. However, since CONCURRENT currently
+ * requires GNUC, I'm not too worried about it. --JSM
+ */
+
+//@cindex init_gr_simulation
+#if defined(GRAN)
+void
+init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
+char *prog_argv[], *rts_argv[];
+int prog_argc, rts_argc;
+{
+ nat i;
+ char *extension = RtsFlags.GranFlags.GranSimStats.Binary ? "gb" : "gr";
+
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ init_gr_stats();
+
+ /* init global constants for costs of basic operations */
+ gran_arith_cost = RtsFlags.GranFlags.Costs.arith_cost;
+ gran_branch_cost = RtsFlags.GranFlags.Costs.branch_cost;
+ gran_load_cost = RtsFlags.GranFlags.Costs.load_cost;
+ gran_store_cost = RtsFlags.GranFlags.Costs.store_cost;
+ gran_float_cost = RtsFlags.GranFlags.Costs.float_cost;
+
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+
+ if (!RtsFlags.GranFlags.GranSimStats.Full)
+ return;
+
+ sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension);
+
+ if ((gr_file = fopen(gr_filename, "w")) == NULL) {
+ barf("Can't open granularity simulation report file %s\n",
+ gr_filename);
+ }
+
+ setbuf(gr_file, NULL); // for debugging turn buffering off
+
+ /* write header with program name, options and setup to gr_file */
+ fputs("Granularity Simulation for ", gr_file);
+ for (i = 0; i < prog_argc; ++i) {
+ fputs(prog_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+
+ if (rts_argc > 0) {
+ fputs("+RTS ", gr_file);
+
+ for (i = 0; i < rts_argc; ++i) {
+ fputs(rts_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+ }
+
+ fputs("\nStart time: ", gr_file);
+ fputs(time_str(), gr_file); /* defined in RtsUtils.c */
+ fputc('\n', gr_file);
+
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("General Parameters:\n\n", gr_file);
+
+ if (RtsFlags.GranFlags.Light)
+ fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n",
+ RtsFlags.GranFlags.DoFairSchedule?"Fair":"Unfair",
+ RtsFlags.GranFlags.DoThreadMigration?"":"Don't ",
+ RtsFlags.GranFlags.DoThreadMigration && RtsFlags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
+ RtsFlags.GranFlags.DoAsyncFetch ? "Asynchronous Fetch" :
+ "Block on Fetch");
+ else
+ fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n",
+ RtsFlags.GranFlags.proc,RtsFlags.GranFlags.DoFairSchedule?"Fair":"Unfair",
+ RtsFlags.GranFlags.DoThreadMigration?"":"Don't ",
+ RtsFlags.GranFlags.DoThreadMigration && RtsFlags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
+ RtsFlags.GranFlags.DoAsyncFetch ? "Asynchronous Fetch" :
+ "Block on Fetch");
+
+ if (RtsFlags.GranFlags.DoBulkFetching)
+ if (RtsFlags.GranFlags.ThunksToPack)
+ fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n",
+ RtsFlags.GranFlags.ThunksToPack,
+ RtsFlags.GranFlags.packBufferSize);
+ else
+ fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n",
+ RtsFlags.GranFlags.packBufferSize);
+ else
+ fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n");
+
+ fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n",
+ RtsFlags.GranFlags.FetchStrategy,
+ RtsFlags.GranFlags.FetchStrategy==0 ?
+ " block (block-on-fetch)":
+ RtsFlags.GranFlags.FetchStrategy==1 ?
+ "only run runnable threads":
+ RtsFlags.GranFlags.FetchStrategy==2 ?
+ "create threads only from local sparks":
+ RtsFlags.GranFlags.FetchStrategy==3 ?
+ "create threads from local or global sparks":
+ RtsFlags.GranFlags.FetchStrategy==4 ?
+ "create sparks and steal threads if necessary":
+ "unknown");
+
+ if (RtsFlags.GranFlags.DoPrioritySparking)
+ fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n");
+
+ if (RtsFlags.GranFlags.DoPriorityScheduling)
+ fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n");
+
+ fprintf(gr_file, "Thread Creation Time %u, Thread Queue Time %u\n",
+ RtsFlags.GranFlags.Costs.threadcreatetime,
+ RtsFlags.GranFlags.Costs.threadqueuetime);
+ fprintf(gr_file, "Thread DeSchedule Time %u, Thread Schedule Time %u\n",
+ RtsFlags.GranFlags.Costs.threaddescheduletime,
+ RtsFlags.GranFlags.Costs.threadscheduletime);
+ fprintf(gr_file, "Thread Context-Switch Time %u\n",
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime);
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("Communication Metrics:\n\n", gr_file);
+ fprintf(gr_file,
+ "Latency %u (1st) %u (rest), Fetch %u, Notify %u (Global) %u (Local)\n",
+ RtsFlags.GranFlags.Costs.latency,
+ RtsFlags.GranFlags.Costs.additional_latency,
+ RtsFlags.GranFlags.Costs.fetchtime,
+ RtsFlags.GranFlags.Costs.gunblocktime,
+ RtsFlags.GranFlags.Costs.lunblocktime);
+ fprintf(gr_file,
+ "Message Creation %u (+ %u after send), Message Read %u\n",
+ RtsFlags.GranFlags.Costs.mpacktime,
+ RtsFlags.GranFlags.Costs.mtidytime,
+ RtsFlags.GranFlags.Costs.munpacktime);
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("Instruction Metrics:\n\n", gr_file);
+ fprintf(gr_file, "Arith %u, Branch %u, Load %u, Store %u, Float %u, Alloc %u\n",
+ RtsFlags.GranFlags.Costs.arith_cost,
+ RtsFlags.GranFlags.Costs.branch_cost,
+ RtsFlags.GranFlags.Costs.load_cost,
+ RtsFlags.GranFlags.Costs.store_cost,
+ RtsFlags.GranFlags.Costs.float_cost,
+ RtsFlags.GranFlags.Costs.heapalloc_cost);
+ fputs("\n\n++++++++++++++++++++\n\n", gr_file);
+
+# if 0
+ /* binary log files are currently not supported */
+ if (RtsFlags.GranFlags.GranSimStats.Binary)
+ grputw(sizeof(rtsTime));
+# endif
+
+ return (0);
+}
+
+#elif defined(PAR)
+
+void
+init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
+char *prog_argv[], *rts_argv[];
+int prog_argc, rts_argc;
+{
+ nat i;
+ char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN];
+ char *extension = RtsFlags.ParFlags.ParStats.Binary ? "gb" : "gr";
+
+ sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension);
+
+ if (!RtsFlags.ParFlags.ParStats.Full)
+ return;
+
+ if ((gr_file = fopen(gr_filename, "w")) == NULL)
+ barf("Can't open activity report file %s\n", gr_filename);
+
+ setbuf(gr_file, NULL); // for debugging turn buffering off
+
+ /* write header with program name, options and setup to gr_file */
+ for (i = 0; i < prog_argc; ++i) {
+ fputs(prog_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+
+ if (rts_argc > 0) {
+ fputs("+RTS ", gr_file);
+
+ for (i = 0; i < rts_argc; ++i) {
+ fputs(rts_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+ }
+ fputc('\n', gr_file);
+
+ /* record the absolute start time to allow synchronisation of log-files */
+ fputs("Start-Time: ", gr_file);
+ fputs(time_str(), gr_file);
+ fputc('\n', gr_file);
+
+ startTime = CURRENT_TIME;
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+ fprintf(gr_file, "PE %2u [%s]: TIME\n", thisPE, time_string);
+
+ /*
+ IF_PAR_DEBUG(verbose,
+ belch("== Start-time: %ld (%s)",
+ startTime, time_string));
+ */
+# if 0
+ ngoq Dogh'q' vImuS
+
+ if (startTime > LL(1000000000)) {
+ fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE,
+ (rtsTime) (startTime / LL(1000000000)),
+ (rtsTime) (startTime % LL(1000000000)));
+ } else {
+ fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime);
+ }
+ /* binary log files are currently not supported */
+ if (RtsFlags.GranFlags.GranSimStats.Binary)
+ grputw(sizeof(rtsTime));
+# endif
+
+ return;
+}
+#endif /* PAR */
+
+//@cindex end_gr_simulation
+#if defined(GRAN)
+void
+end_gr_simulation(void)
+{
+ char time_string[TIME_STR_LEN];
+
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+
+ /* Print event stats */
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ nat i;
+
+ fprintf(stderr,"Total yields: %d\n",
+ globalGranStats.tot_yields);
+
+ fprintf(stderr,"Total number of threads created: %d ; per PE:\n",
+ globalGranStats.tot_threads_created);
+ for (i=0; i<RtsFlags.GranFlags.proc; i++) {
+ fprintf(stderr," PE %d: %d\t",
+ i, globalGranStats.threads_created_on_PE[i]);
+ if (i+1 % 4 == 0) fprintf(stderr,"\n");
+ }
+ if (RtsFlags.GranFlags.proc+1 % 4 != 0) fprintf(stderr,"\n");
+ fprintf(stderr,"Total number of threads migrated: %d\n",
+ globalGranStats.tot_TSOs_migrated);
+
+ fprintf(stderr,"Total number of sparks created: %d ; per PE:\n",
+ globalGranStats.tot_sparks_created);
+ for (i=0; i<RtsFlags.GranFlags.proc; i++) {
+ fprintf(stderr," PE %d: %d\t",
+ i, globalGranStats.sparks_created_on_PE[i]);
+ if (i+1 % 4 == 0) fprintf(stderr,"\n");
+ }
+ if (RtsFlags.GranFlags.proc+1 % 4 != 0) fprintf(stderr,"\n");
+
+ fprintf(stderr,"Event statistics (number of events: %d):\n",
+ globalGranStats.noOfEvents);
+ for (i=0; i<=MAX_EVENT; i++) {
+ fprintf(stderr," %s (%d): \t%d \t%f%%\t%f%%\n",
+ event_names[i],i,globalGranStats.event_counts[i],
+ (float)(100*globalGranStats.event_counts[i])/(float)(globalGranStats.noOfEvents),
+ (i==ContinueThread ? 0.0 :
+ (float)(100*(globalGranStats.event_counts[i])/(float)(globalGranStats.noOfEvents-globalGranStats.event_counts[ContinueThread])) ));
+ }
+ fprintf(stderr,"Randomized steals: %ld sparks, %ld threads \n \t(Sparks: #%u (avg ntimes=%f; avg fl=%f)\n\t(Threads: %ld)",
+ globalGranStats.rs_sp_count,
+ globalGranStats.rs_t_count,
+ globalGranStats.no_of_steals,
+ (float)globalGranStats.ntimes_total/(float)stg_max(globalGranStats.no_of_steals,1),
+ (float)globalGranStats.fl_total/(float)stg_max(globalGranStats.no_of_steals,1),
+ globalGranStats.no_of_migrates);
+ fprintf(stderr,"Moved sparks: %d Withered sparks: %d (%.2f %%)\n",
+ globalGranStats.tot_sparks, globalGranStats.withered_sparks,
+ ( globalGranStats.tot_sparks == 0 ? 0 :
+ (float)(100*globalGranStats.withered_sparks)/(float)(globalGranStats.tot_sparks)) );
+ /* Print statistics about priority sparking */
+ if (RtsFlags.GranFlags.DoPrioritySparking) {
+ fprintf(stderr,"About Priority Sparking:\n");
+ fprintf(stderr," Total no. NewThreads: %d Avg. spark queue len: %.2f \n", globalGranStats.tot_sq_probes, (float)globalGranStats.tot_sq_len/(float)globalGranStats.tot_sq_probes);
+ }
+ /* Print statistics about priority sparking */
+ if (RtsFlags.GranFlags.DoPriorityScheduling) {
+ fprintf(stderr,"About Priority Scheduling:\n");
+ fprintf(stderr," Total no. of StartThreads: %d (non-end: %d) Avg. thread queue len: %.2f\n",
+ globalGranStats.tot_add_threads, globalGranStats.non_end_add_threads,
+ (float)globalGranStats.tot_tq_len/(float)globalGranStats.tot_add_threads);
+ }
+ /* Blocking queue statistics */
+ if (1) {
+ fprintf(stderr,"Blocking queue statistcs:\n");
+ fprintf(stderr," Total no. of FMBQs generated: %d\n",
+ globalGranStats.tot_FMBQs);
+ fprintf(stderr," Total no. of bqs awakened: %d\n",
+ globalGranStats.tot_awbq);
+ fprintf(stderr," Total length of all bqs: %d\tAvg length of bqs: %.2f\n",
+ globalGranStats.tot_bq_len, (float)globalGranStats.tot_bq_len/(float)globalGranStats.tot_awbq);
+ fprintf(stderr," Percentage of local TSOs in BQs: %.2f\n",
+ (float)globalGranStats.tot_bq_len*100.0/(float)globalGranStats.tot_bq_len);
+ fprintf(stderr," Total time spent processing BQs: %lx\n",
+ globalGranStats.tot_bq_processing_time);
+ }
+
+ /* Fetch misses and thunk stealing */
+ fprintf(stderr,"Number of fetch misses: %d\n",
+ globalGranStats.fetch_misses);
+
+ /* Print packet statistics if GUMM fetching is turned on */
+ if (RtsFlags.GranFlags.DoBulkFetching) {
+ fprintf(stderr,"Packet statistcs:\n");
+ fprintf(stderr," Total no. of packets: %d Avg. packet size: %.2f \n", globalGranStats.tot_packets, (float)globalGranStats.tot_packet_size/(float)globalGranStats.tot_packets);
+ fprintf(stderr," Total no. of thunks: %d Avg. thunks/packet: %.2f \n", globalGranStats.tot_thunks, (float)globalGranStats.tot_thunks/(float)globalGranStats.tot_packets);
+ fprintf(stderr," Total no. of cuts: %d Avg. cuts/packet: %.2f\n", globalGranStats.tot_cuts, (float)globalGranStats.tot_cuts/(float)globalGranStats.tot_packets);
+ /*
+ if (closure_queue_overflows>0)
+ fprintf(stderr," Number of closure queue overflows: %u\n",
+ closure_queue_overflows);
+ */
+ }
+ } /* RtsFlags.GranFlags.GranSimStats.Global */
+
+# if defined(GRAN_COUNT)
+# error "GRAN_COUNT not supported; should be parallel ticky profiling, really"
+ fprintf(stderr,"Update count statistics:\n");
+ fprintf(stderr," Total number of updates: %u\n",nUPDs);
+ fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n",
+ nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ);
+ fprintf(stderr," Number of PAPs: %u\n",nPAPs);
+# endif
+
+ fprintf(stderr, "Simulation finished after @ %s @ cycles. %d sparks created, %d sparks ignored. Check %s for details.\n",
+ time_string, sparksCreated, sparksIgnored, gr_filename);
+
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ fclose(gr_file);
+}
+
+#elif defined(PAR)
+
+/*
+ Under GUM we print only one line.
+*/
+void
+end_gr_simulation(void)
+{
+ char time_string[TIME_STR_LEN];
+
+ ullong_format_string(CURRENT_TIME-startTime, time_string, rtsFalse/*no commas!*/);
+
+ fprintf(stderr, "Computation finished after @ %s @ ms. %d sparks created, %d sparks ignored. Check %s for details.\n",
+ time_string, sparksCreated, sparksIgnored, gr_filename);
+
+ if (RtsFlags.ParFlags.ParStats.Full)
+ fclose(gr_file);
+}
+#endif /* PAR */
+
+//@node Dumping routines, , Writing to the log-file
+//@subsection Dumping routines
+
+//@cindex DumpGranEvent
+void
+DumpGranEvent(name, tso)
+GranEventType name;
+StgTSO *tso;
+{
+ DumpRawGranEvent(CURRENT_PROC, (PEs)0, name, tso, END_TSO_QUEUE, (StgInt)0, (StgInt)0);
+}
+
+//@cindex DumpRawGranEvent
+void
+DumpRawGranEvent(proc, p, name, tso, node, sparkname, len)
+PEs proc, p; /* proc ... where it happens; p ... where node lives */
+GranEventType name;
+StgTSO *tso;
+StgClosure *node;
+StgInt sparkname, len;
+{
+ FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1
+ StgWord id;
+ char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN];
+ ullong_format_string(TIME_ON_PROC(proc), time_string, rtsFalse/*no commas!*/);
+
+ output_file = gr_file;
+ ASSERT(output_file!=NULL);
+# if defined(GRAN)
+ IF_DEBUG(gran,
+ fprintf(stderr, "GRAN: Dumping info to file with handle %#x\n", output_file))
+
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+# endif
+
+ id = tso == NULL ? -1 : tso->id;
+ if (node==stgCast(StgClosure*,&END_TSO_QUEUE_closure))
+ strcpy(node_str,"________"); /* "END_TSO_QUEUE"); */
+ else
+ sprintf(node_str,"0x%-6lx",node);
+
+ if (name > GR_EVENT_MAX)
+ name = GR_EVENT_MAX;
+
+ if (BINARY_STATS)
+ barf("binary log files not yet supported");
+#if 0
+ /* ToDo: fix code for writing binary GrAnSim statistics */
+ switch (name) {
+ case GR_START:
+ case GR_STARTQ:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc));
+ grputw((StgWord)node);
+ break;
+ case GR_FETCH:
+ case GR_REPLY:
+ case GR_BLOCK:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc)); /* this line is bound to */
+ grputw(id); /* do the wrong thing */
+ break;
+ default:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc));
+ grputw((StgWord)node);
+ }
+#endif
+ else /* !BINARY_STATS */
+ switch (name) {
+ case GR_START:
+ case GR_STARTQ:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\t[sparks %u]\n",
+ proc,time_string,gran_event_names[name],
+ id,node_str,sparkname,len);
+ break;
+ case GR_FETCH:
+ case GR_REPLY:
+ case GR_BLOCK:
+ case GR_STOLEN:
+ case GR_STOLENQ:
+ fprintf(output_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n",
+ proc, time_string, gran_event_names[name],
+ id,node_str,p);
+ break;
+ case GR_RESUME:
+ case GR_RESUMEQ:
+ case GR_SCHEDULE:
+ case GR_DESCHEDULE:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx \n",
+ proc,time_string,gran_event_names[name],id);
+ break;
+ case GR_STEALING:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t \t(by %2u)\n",
+ proc,time_string,gran_event_names[name],id,p);
+ break;
+ case GR_ALLOC:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t \tallocating %u words\n",
+ proc,time_string,gran_event_names[name],id,len);
+ break;
+ default:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n",
+ proc,time_string,gran_event_names[name],id,node_str,len);
+ }
+}
+
+//@cindex DumpGranInfo
+void
+DumpEndEvent(proc, tso, mandatory_thread)
+PEs proc;
+StgTSO *tso;
+rtsBool mandatory_thread;
+{
+ FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1
+ char time_string[TIME_STR_LEN];
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+
+ output_file = gr_file;
+ ASSERT(output_file!=NULL);
+#if defined(GRAN)
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+#endif
+
+ if (BINARY_STATS) {
+ barf("binary log files not yet supported");
+#if 0
+ grputw(GR_END);
+ grputw(proc);
+ abort(); /* die please: a single word doesn't represent long long times */
+ grputw(CURRENT_TIME); /* this line is bound to fail */
+ grputw(tso->id);
+#ifdef PAR
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+#else
+ grputw(tso->gran.sparkname);
+ grputw(tso->gran.startedat);
+ grputw(tso->gran.exported);
+ grputw(tso->gran.basicblocks);
+ grputw(tso->gran.allocs);
+ grputw(tso->gran.exectime);
+ grputw(tso->gran.blocktime);
+ grputw(tso->gran.blockcount);
+ grputw(tso->gran.fetchtime);
+ grputw(tso->gran.fetchcount);
+ grputw(tso->gran.localsparks);
+ grputw(tso->gran.globalsparks);
+#endif
+ grputw(mandatory_thread);
+#endif /* 0 */
+ } else {
+
+ /*
+ * NB: DumpGranEvent cannot be used because PE may be wrong
+ * (as well as the extra info)
+ */
+ fprintf(output_file, "PE %2u [%s]: END %lx, SN %u, ST %lu, EXP %c, BB %u, HA %u, RT %u, BT %u (%u), FT %u (%u), LS %u, GS %u, MY %c\n"
+ ,proc
+ ,time_string
+ ,tso->id
+#if defined(GRAN)
+ ,tso->gran.sparkname
+ ,tso->gran.startedat
+ ,tso->gran.exported ? 'T' : 'F'
+ ,tso->gran.basicblocks
+ ,tso->gran.allocs
+ ,tso->gran.exectime
+ ,tso->gran.blocktime
+ ,tso->gran.blockcount
+ ,tso->gran.fetchtime
+ ,tso->gran.fetchcount
+ ,tso->gran.localsparks
+ ,tso->gran.globalsparks
+#elif defined(PAR)
+ ,tso->par.sparkname
+ ,tso->par.startedat
+ ,tso->par.exported ? 'T' : 'F'
+ ,tso->par.basicblocks
+ ,tso->par.allocs
+ ,tso->par.exectime
+ ,tso->par.blocktime
+ ,tso->par.blockcount
+ ,tso->par.fetchtime
+ ,tso->par.fetchcount
+ ,tso->par.localsparks
+ ,tso->par.globalsparks
+#endif
+ ,mandatory_thread ? 'T' : 'F'
+ );
+ }
+}
+
+//@cindex DumpTSO
+void
+DumpTSO(tso)
+StgTSO *tso;
+{
+ FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1
+
+ output_file = gr_file;
+ ASSERT(output_file!=NULL);
+ fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %u, LINK 0x%lx, TYPE %s\n"
+ ,tso
+#if defined(GRAN)
+ ,tso->gran.sparkname
+#elif defined(PAR)
+ ,tso->par.sparkname
+#endif
+ ,tso->id
+ ,tso->link
+ ,/*tso->state==T_MAIN?"MAIN":
+ TSO_TYPE(tso)==T_FAIL?"FAIL":
+ TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
+ TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
+ */
+ "???"
+ );
+
+ fprintf(output_file,"TSO %lx: SN %u, ST %u, GBL %c, BB %u, HA %u, RT %u, BT %u (%u), FT %u (%u) LS %u, GS %u\n"
+ ,tso->id
+#if defined(GRAN)
+ ,tso->gran.sparkname
+ ,tso->gran.startedat
+ ,tso->gran.exported?'T':'F'
+ ,tso->gran.basicblocks
+ ,tso->gran.allocs
+ ,tso->gran.exectime
+ ,tso->gran.blocktime
+ ,tso->gran.blockcount
+ ,tso->gran.fetchtime
+ ,tso->gran.fetchcount
+ ,tso->gran.localsparks
+ ,tso->gran.globalsparks
+#elif defined(PAR)
+ ,tso->par.sparkname
+ ,tso->par.startedat
+ ,tso->par.exported?'T':'F'
+ ,tso->par.basicblocks
+ ,tso->par.allocs
+ ,tso->par.exectime
+ ,tso->par.blocktime
+ ,tso->par.blockcount
+ ,tso->par.fetchtime
+ ,tso->par.fetchcount
+ ,tso->par.localsparks
+ ,tso->par.globalsparks
+#endif
+ );
+}
+
+#if 0
+/*
+ ToDo: fix binary output of log files, and support new log file format.
+*/
+/*
+ Output a terminate event and an 8-byte time.
+*/
+
+//@cindex grterminate
+void
+grterminate(v)
+rtsTime v;
+{
+ if (!BINARY_STATS)
+ barf("grterminate: binary statistics not enabled\n");
+
+# if defined(GRAN)
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+# endif
+
+ DumpGranEvent(GR_TERMINATE, stgCast(StgTSO*,&END_TSO_QUEUE_closure));
+
+ if (sizeof(rtsTime) == 4) {
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ } else {
+ putc(v >> 56l, gr_file);
+ putc((v >> 48l) & 0xffl, gr_file);
+ putc((v >> 40l) & 0xffl, gr_file);
+ putc((v >> 32l) & 0xffl, gr_file);
+ }
+ putc((v >> 24l) & 0xffl, gr_file);
+ putc((v >> 16l) & 0xffl, gr_file);
+ putc((v >> 8l) & 0xffl, gr_file);
+ putc(v & 0xffl, gr_file);
+}
+
+/*
+ Length-coded output: first 3 bits contain length coding
+
+ 00x 1 byte
+ 01x 2 bytes
+ 10x 4 bytes
+ 110 8 bytes
+ 111 5 or 9 bytes
+*/
+
+//@cindex grputw
+void
+grputw(v)
+rtsTime v;
+{
+ if (!BINARY_STATS)
+ barf("grputw: binary statistics not enabled\n");
+
+# if defined(GRAN)
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+# endif
+
+ if (v <= 0x3fl) { /* length v = 1 byte */
+ fputc(v & 0x3f, gr_file);
+ } else if (v <= 0x3fffl) { /* length v = 2 byte */
+ fputc((v >> 8l) | 0x40l, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else if (v <= 0x3fffffffl) { /* length v = 4 byte */
+ fputc((v >> 24l) | 0x80l, gr_file);
+ fputc((v >> 16l) & 0xffl, gr_file);
+ fputc((v >> 8l) & 0xffl, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else if (sizeof(TIME) == 4) {
+ fputc(0x70, gr_file);
+ fputc((v >> 24l) & 0xffl, gr_file);
+ fputc((v >> 16l) & 0xffl, gr_file);
+ fputc((v >> 8l) & 0xffl, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else {
+ if (v <= 0x3fffffffffffffl)
+ putc((v >> 56l) | 0x60l, gr_file);
+ else {
+ putc(0x70, gr_file);
+ putc((v >> 56l) & 0xffl, gr_file);
+ }
+
+ putc((v >> 48l) & 0xffl, gr_file);
+ putc((v >> 40l) & 0xffl, gr_file);
+ putc((v >> 32l) & 0xffl, gr_file);
+ putc((v >> 24l) & 0xffl, gr_file);
+ putc((v >> 16l) & 0xffl, gr_file);
+ putc((v >> 8l) & 0xffl, gr_file);
+ putc(v & 0xffl, gr_file);
+ }
+}
+#endif /* 0 */
+
+#endif /* GRAN || PAR whole file */
--- /dev/null
+/*
+ Time-stamp: <Sun Dec 12 1999 20:37:00 Stardate: [-30]4039.08 software>
+
+Various debugging routines for GranSim and GUM
+*/
+
+#if defined(GRAN) || defined(PAR) /* whole file */
+
+//@node Debugging routines for GranSim and GUM, , ,
+//@section Debugging routines for GranSim and GUM
+
+//@menu
+//* Includes::
+//* Constants and Variables::
+//* Closures::
+//* Threads::
+//* Events::
+//* Sparks::
+//* Processors::
+//* Shortcuts::
+//* Printing info type::
+//* Printing Pack:et Contents::
+//* End of File::
+//@end menu
+//*/
+
+//@node Includes, Prototypes, Debugging routines for GranSim and GUM, Debugging routines for GranSim and GUM
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+#include "StgMiscClosures.h"
+# if defined(DEBUG)
+# include "ParallelDebug.h"
+# endif
+
+//@node Prototypes, Constants and Variables, Includes, Debugging routines for GranSim and GUM
+//@subsection Prototypes
+/*
+rtsBool isOffset(globalAddr *ga);
+rtsBool isFixed(globalAddr *ga);
+*/
+//@node Constants and Variables, Closures, Prototypes, Debugging routines for GranSim and GUM
+//@subsection Constants and Variables
+
+/* Names as strings; needed by get_closure_info in ClosureMacros.h -- HWL */
+static char *closure_type_names[] = {
+ "INVALID_OBJECT", /* 0 */
+ "CONSTR", /* 1 */
+ "CONSTR_1_0", /* 2 */
+ "CONSTR_0_1", /* 3 */
+ "CONSTR_2_0", /* 4 */
+ "CONSTR_1_1", /* 5 */
+ "CONSTR_0_2", /* 6 */
+ "CONSTR_INTLIKE", /* 7 */
+ "CONSTR_CHARLIKE", /* 8 */
+ "CONSTR_STATIC", /* 9 */
+ "CONSTR_NOCAF_STATIC", /* 10 */
+ "FUN", /* 11 */
+ "FUN_1_0", /* 12 */
+ "FUN_0_1", /* 13 */
+ "FUN_2_0", /* 14 */
+ "FUN_1_1", /* 15 */
+ "FUN_0_2", /* 16 */
+ "FUN_STATIC", /* 17 */
+ "THUNK", /* 18 */
+ "THUNK_1_0", /* 19 */
+ "THUNK_0_1", /* 20 */
+ "THUNK_2_0", /* 21 */
+ "THUNK_1_1", /* 22 */
+ "THUNK_0_2", /* 23 */
+ "THUNK_STATIC", /* 24 */
+ "THUNK_SELECTOR", /* 25 */
+ "BCO", /* 26 */
+ "AP_UPD", /* 27 */
+ "PAP", /* 28 */
+ "IND", /* 29 */
+ "IND_OLDGEN", /* 30 */
+ "IND_PERM", /* 31 */
+ "IND_OLDGEN_PERM", /* 32 */
+ "IND_STATIC", /* 33 */
+ "CAF_UNENTERED", /* 34 */
+ "CAF_ENTERED", /* 35 */
+ "CAF_BLACKHOLE", /* 36 */
+ "RET_BCO", /* 37 */
+ "RET_SMALL", /* 38 */
+ "RET_VEC_SMALL", /* 39 */
+ "RET_BIG", /* 40 */
+ "RET_VEC_BIG", /* 41 */
+ "RET_DYN", /* 42 */
+ "UPDATE_FRAME", /* 43 */
+ "CATCH_FRAME", /* 44 */
+ "STOP_FRAME", /* 45 */
+ "SEQ_FRAME", /* 46 */
+ "BLACKHOLE", /* 47 */
+ "BLACKHOLE_BQ", /* 48 */
+ "SE_BLACKHOLE", /* 49 */
+ "SE_CAF_BLACKHOLE", /* 50 */
+ "MVAR", /* 51 */
+ "ARR_WORDS", /* 52 */
+ "MUT_ARR_PTRS", /* 53 */
+ "MUT_ARR_PTRS_FROZEN", /* 54 */
+ "MUT_VAR", /* 55 */
+ "WEAK", /* 56 */
+ "FOREIGN", /* 57 */
+ "STABLE_NAME", /* 58 */
+ "TSO", /* 59 */
+ "BLOCKED_FETCH", /* 60 */
+ "FETCH_ME", /* 61 */
+ "EVACUATED", /* 62 */
+ "N_CLOSURE_TYPES", /* 63 */
+ "FETCH_ME_BQ", /* 64 */
+ "RBH" /* 65 */
+};
+
+
+#if defined(GRAN) && defined(GRAN_CHECK)
+//@node Closures, Threads, Constants and Variables, Debugging routines for GranSim and GUM
+//@subsection Closures
+
+void
+G_PRINT_NODE(node)
+StgClosure* node;
+{
+ StgInfoTable *info_ptr;
+ StgTSO* bqe;
+ nat size = 0, ptrs = 0, nonptrs = 0, i, vhs = 0;
+ char info_hdr_ty[80], info_ty[80];
+
+ if (node==NULL) {
+ fprintf(stderr,"NULL\n");
+ return;
+ } else if (node==END_TSO_QUEUE) {
+ fprintf(stderr,"END_TSO_QUEUE\n");
+ return;
+ }
+ /* size_and_ptrs(node,&size,&ptrs); */
+ info_ptr = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_hdr_ty);
+
+ /* vhs = var_hdr_size(node); */
+ display_info_type(info_ptr,info_ty);
+
+ fprintf(stderr,"Node: 0x%lx", node);
+
+#if defined(PAR)
+ fprintf(stderr," [GA: 0x%lx]",GA(node));
+#endif
+
+#if defined(USE_COST_CENTRES)
+ fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
+#endif
+
+#if defined(GRAN)
+ fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
+#endif
+
+ if (info_ptr->type==TSO)
+ fprintf(stderr," TSO: 0x%lx (%x) IP: 0x%lx (%s), type %s \n ",
+ (StgTSO*)node, ((StgTSO*)node)->id, info_ptr, info_hdr_ty, info_ty);
+ else
+ fprintf(stderr," IP: 0x%lx (%s), type %s \n VHS: %d, size: %ld, ptrs:%ld, nonptrs: %ld\n ",
+ info_ptr,info_hdr_ty,info_ty,vhs,size,ptrs,nonptrs);
+
+ /* For now, we ignore the variable header */
+
+ fprintf(stderr," Ptrs: ");
+ for(i=0; i < ptrs; ++i)
+ {
+ if ( (i+1) % 6 == 0)
+ fprintf(stderr,"\n ");
+ fprintf(stderr," 0x%lx[P]",node->payload[i]);
+ };
+
+ fprintf(stderr," Data: ");
+ for(i=0; i < nonptrs; ++i)
+ {
+ if( (i+1) % 6 == 0)
+ fprintf(stderr,"\n ");
+ fprintf(stderr," %lu[D]",node->payload[ptrs+i]);
+ }
+ fprintf(stderr, "\n");
+
+
+ switch (info_ptr->type)
+ {
+ case TSO:
+ fprintf(stderr,"\n TSO_LINK: %#lx",
+ ((StgTSO*)node)->link);
+ break;
+
+ case BLACKHOLE:
+ case RBH:
+ bqe = ((StgBlockingQueue*)node)->blocking_queue;
+ fprintf(stderr," BQ of %#lx: ", node);
+ G_PRINT_BQ(bqe);
+ break;
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ printf("Panic: found FETCH_ME or FETCH_ME_BQ Infotable in GrAnSim system.\n");
+ break;
+ default:
+ /* do nothing */
+ }
+}
+
+void
+G_PPN(node) /* Extracted from PrintPacket in Pack.lc */
+StgClosure* node;
+{
+ StgInfoTable *info ;
+ nat size = 0, ptrs = 0, nonptrs = 0, i, vhs = 0, locn = 0;
+ char info_type[80];
+
+ /* size_and_ptrs(node,&size,&ptrs); */
+ info = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_type);
+
+ if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||
+ info->type == BLACKHOLE || info->type == RBH )
+ size = ptrs = nonptrs = vhs = 0;
+
+ if (closure_THUNK(node)) {
+ if (!closure_UNPOINTED(node))
+ fputs("SHARED ", stderr);
+ else
+ fputs("UNSHARED ", stderr);
+ }
+ if (info->type==BLACKHOLE) {
+ fputs("BLACK HOLE\n", stderr);
+ } else {
+ /* Fixed header */
+ fprintf(stderr, "(%s) FH [%#lx", info_type, node[locn++]);
+ for (i = 1; i < FIXED_HS; i++)
+ fprintf(stderr, " %#lx", node[locn++]);
+
+ /* Variable header */
+ if (vhs > 0) {
+ fprintf(stderr, "] VH [%#lx", node->payload[0]);
+
+ for (i = 1; i < vhs; i++)
+ fprintf(stderr, " %#lx", node->payload[i]);
+ }
+
+ fprintf(stderr, "] PTRS %u", ptrs);
+
+ /* Non-pointers */
+ if (nonptrs > 0) {
+ fprintf(stderr, " NPTRS [%#lx", node->payload[ptrs]);
+
+ for (i = 1; i < nonptrs; i++)
+ fprintf(stderr, " %#lx", node->payload[ptrs+i]);
+
+ putc(']', stderr);
+ }
+ putc('\n', stderr);
+ }
+
+}
+
+#if 0
+// ToDo: fix this!! -- HWL
+void
+G_INFO_TABLE(node)
+StgClosure *node;
+{
+ StgInfoTable *info_ptr;
+ nat size = 0, ptrs = 0, nonptrs = 0, vhs = 0;
+ char info_type[80], hdr_type[80];
+
+ info_hdr_type(info_ptr, hdr_type);
+
+ // get_itbl(node);
+ info_ptr = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_type);
+ fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
+ info_type,info_ptr,(W_) ENTRY_CODE(info_ptr),
+ size, ptrs);
+ // INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
+
+ if (closure_THUNK(node) && !closure_UNPOINTED(node) ) {
+ fprintf(stderr," RBH InfoPtr: %#lx\n",
+ RBH_INFOPTR(info_ptr));
+ }
+
+#if defined(PAR)
+ fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
+#endif
+
+#if defined(USE_COST_CENTRES)
+ fprintf(stderr,"Cost Centre (???): 0x%lx\n",INFO_CAT(info_ptr));
+#endif
+
+#if defined(_INFO_COPYING)
+ fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n",
+ INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
+#endif
+
+#if defined(_INFO_COMPACTING)
+ fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n",
+ (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
+ fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t",
+ (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
+#if 0 /* avoid INFO_TYPE */
+ if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
+ fprintf(stderr,"plus specialised code\n");
+ else
+ fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
+#endif /* 0 */
+#endif /* _INFO_COMPACTING */
+}
+#endif /* 0 */
+
+//@cindex G_PRINT_BQ
+void
+G_PRINT_BQ(node)
+StgClosure* node;
+{
+ StgInfoTable *info;
+ StgTSO *tso, *last;
+ char str[80], str0[80];
+
+ fprintf(stderr,"\n[PE %d] @ %lu BQ: ",
+ CurrentProc,CurrentTime[CurrentProc]);
+ if ( node == (StgClosure*)NULL ) {
+ fprintf(stderr," NULL.\n");
+ return;
+ }
+ if ( node == END_TSO_QUEUE ) {
+ fprintf(stderr," _|_\n");
+ return;
+ }
+ tso = ((StgBlockingQueue*)node)->blocking_queue;
+ while (node != END_TSO_QUEUE) {
+ PEs proc;
+
+ /* Find where the tso lives */
+ proc = where_is(node);
+ info = get_itbl(node);
+
+ switch (info->type) {
+ case TSO:
+ strcpy(str0,"TSO");
+ break;
+ case BLOCKED_FETCH:
+ strcpy(str0,"BLOCKED_FETCH");
+ break;
+ default:
+ strcpy(str0,"???");
+ break;
+ }
+
+ if(proc == CurrentProc)
+ fprintf(stderr," %#lx (%x) L %s,",
+ node, ((StgBlockingQueue*)node)->blocking_queue, str0);
+ else
+ fprintf(stderr," %#lx (%x) G (PE %d) %s,",
+ node, ((StgBlockingQueue*)node)->blocking_queue, proc, str0);
+
+ last = tso;
+ tso = last->link;
+ }
+ if ( tso == END_TSO_QUEUE )
+ fprintf(stderr," _|_\n");
+}
+
+//@node Threads, Events, Closures, Debugging routines for GranSim and GUM
+//@subsection Threads
+
+void
+G_CURR_THREADQ(verbose)
+StgInt verbose;
+{
+ fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc);
+ G_THREADQ(run_queue_hd, verbose);
+}
+
+void
+G_THREADQ(closure, verbose)
+StgTSO* closure;
+StgInt verbose;
+{
+ StgTSO* x;
+
+ fprintf(stderr,"Thread Queue: ");
+ for (x=closure; x!=END_TSO_QUEUE; x=x->link)
+ if (verbose)
+ G_TSO(x,0);
+ else
+ fprintf(stderr," %#lx",x);
+
+ if (closure==END_TSO_QUEUE)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+G_TSO(closure,verbose)
+StgTSO* closure;
+StgInt verbose;
+{
+
+ if (closure==END_TSO_QUEUE) {
+ fprintf(stderr,"TSO at %#lx is END_TSO_QUEUE!\n");
+ return;
+ }
+
+ if ( verbose & 0x08 ) { /* short info */
+ fprintf(stderr,"[TSO @ %#lx, PE %d]: Id: %#lx, Link: %#lx\n",
+ closure,where_is(closure),
+ closure->id,closure->link);
+ return;
+ }
+
+ fprintf(stderr,"TSO at %#lx has the following contents:\n",
+ closure);
+
+ fprintf(stderr,"> Id: \t%#lx",closure->id);
+ // fprintf(stderr,"\tstate: \t%#lx",closure->state);
+ fprintf(stderr,"\twhatNext: \t%#lx",closure->whatNext);
+ fprintf(stderr,"\tlink: \t%#lx\n",closure->link);
+ // fprintf(stderr,"\tType: \t%s\n",type_name[TSO_TYPE(closure)]);
+ fprintf(stderr,">PRI: \t%#lx", closure->gran.pri);
+ fprintf(stderr,"\tMAGIC: \t%#lx %s\n", closure->gran.magic,
+ (closure->gran.magic==TSO_MAGIC ? "it IS a TSO" : "THIS IS NO TSO!!"));
+ if ( verbose & 0x04 ) {
+ fprintf(stderr, "Stack: stack @ %#lx (stack_size: %u; max_stack_size: %u)\n",
+ closure->stack, closure->stack_size, closure->max_stack_size);
+ fprintf(stderr, " sp: %#lx, su: %#lx, splim: %#lx\n",
+ closure->sp, closure->su, closure->splim);
+ }
+ // fprintf(stderr,"\n");
+ if (verbose & 0x01) {
+ // fprintf(stderr,"} LOCKED: \t%#lx",closure->locked);
+ fprintf(stderr,"} SPARKNAME: \t%#lx\n", closure->gran.sparkname);
+ fprintf(stderr,"} STARTEDAT: \t%#lx", closure->gran.startedat);
+ fprintf(stderr,"\tEXPORTED: \t%#lx\n", closure->gran.exported);
+ fprintf(stderr,"} BASICBLOCKS: \t%#lx", closure->gran.basicblocks);
+ fprintf(stderr,"\tALLOCS: \t%#lx\n", closure->gran.allocs);
+ fprintf(stderr,"} EXECTIME: \t%#lx", closure->gran.exectime);
+ fprintf(stderr,"\tFETCHTIME: \t%#lx\n", closure->gran.fetchtime);
+ fprintf(stderr,"} FETCHCOUNT: \t%#lx", closure->gran.fetchcount);
+ fprintf(stderr,"\tBLOCKTIME: \t%#lx\n", closure->gran.blocktime);
+ fprintf(stderr,"} BLOCKCOUNT: \t%#lx", closure->gran.blockcount);
+ fprintf(stderr,"\tBLOCKEDAT: \t%#lx\n", closure->gran.blockedat);
+ fprintf(stderr,"} GLOBALSPARKS:\t%#lx", closure->gran.globalsparks);
+ fprintf(stderr,"\tLOCALSPARKS:\t%#lx\n", closure->gran.localsparks);
+ }
+ if ( verbose & 0x02 ) {
+ fprintf(stderr,"BQ that starts with this TSO: ");
+ G_PRINT_BQ(closure);
+ }
+}
+
+//@node Events, Sparks, Threads, Debugging routines for GranSim and GUM
+//@subsection Events
+
+void
+G_EVENT(event, verbose)
+rtsEventQ event;
+StgInt verbose;
+{
+ if (verbose) {
+ print_event(event);
+ }else{
+ fprintf(stderr," %#lx",event);
+ }
+}
+
+void
+G_EVENTQ(verbose)
+StgInt verbose;
+{
+ extern rtsEventQ EventHd;
+ rtsEventQ x;
+
+ fprintf(stderr,"RtsEventQ (hd @%#lx):\n",EventHd);
+ for (x=EventHd; x!=NULL; x=x->next) {
+ G_EVENT(x,verbose);
+ }
+ if (EventHd==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+G_PE_EQ(pe,verbose)
+PEs pe;
+StgInt verbose;
+{
+ extern rtsEventQ EventHd;
+ rtsEventQ x;
+
+ fprintf(stderr,"RtsEventQ (hd @%#lx):\n",EventHd);
+ for (x=EventHd; x!=NULL; x=x->next) {
+ if (x->proc==pe)
+ G_EVENT(x,verbose);
+ }
+ if (EventHd==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+//@node Sparks, Processors, Events, Debugging routines for GranSim and GUM
+//@subsection Sparks
+
+void
+G_SPARK(spark, verbose)
+rtsSparkQ spark;
+StgInt verbose;
+{
+ if (spark==(rtsSpark*)NULL) {
+ belch("G_SPARK: NULL spark; aborting");
+ return;
+ }
+ if (verbose)
+ print_spark(spark);
+ else
+ fprintf(stderr," %#lx",spark);
+}
+
+void
+G_SPARKQ(spark,verbose)
+rtsSparkQ spark;
+StgInt verbose;
+{
+ rtsSparkQ x;
+
+ if (spark==(rtsSpark*)NULL) {
+ belch("G_SPARKQ: NULL spark; aborting");
+ return;
+ }
+
+ fprintf(stderr,"RtsSparkQ (hd @%#lx):\n",spark);
+ for (x=spark; x!=NULL; x=x->next) {
+ G_SPARK(x,verbose);
+ }
+ if (spark==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+G_CURR_SPARKQ(verbose)
+StgInt verbose;
+{
+ G_SPARKQ(pending_sparks_hd,verbose);
+}
+
+//@node Processors, Shortcuts, Sparks, Debugging routines for GranSim and GUM
+//@subsection Processors
+
+void
+G_PROC(proc,verbose)
+StgInt proc;
+StgInt verbose;
+{
+ extern rtsEventQ EventHd;
+ extern char *proc_status_names[];
+
+ fprintf(stderr,"Status of proc %d at time %d (%#lx): %s (%s)\n",
+ proc,CurrentTime[proc],CurrentTime[proc],
+ (CurrentProc==proc)?"ACTIVE":"INACTIVE",
+ proc_status_names[procStatus[proc]]);
+ G_THREADQ(run_queue_hds[proc],verbose & 0x2);
+ if ( (CurrentProc==proc) )
+ G_TSO(CurrentTSO,1);
+
+ if (EventHd!=NULL)
+ fprintf(stderr,"Next event (%s) is on proc %d\n",
+ event_names[EventHd->evttype],EventHd->proc);
+
+ if (verbose & 0x1) {
+ fprintf(stderr,"\nREQUIRED sparks: ");
+ G_SPARKQ(pending_sparks_hds[proc],1);
+ fprintf(stderr,"\nADVISORY_sparks: ");
+ G_SPARKQ(pending_sparks_hds[proc],1);
+ }
+}
+
+//@node Shortcuts, Printing info type, Processors, Debugging routines for GranSim and GUM
+//@subsection Shortcuts
+
+/* Debug Processor */
+void
+GP(proc)
+StgInt proc;
+{ G_PROC(proc,1);
+}
+
+/* Debug Current Processor */
+void
+GCP(){ G_PROC(CurrentProc,2); }
+
+/* Debug TSO */
+void
+GT(StgPtr tso){
+ G_TSO(tso,1);
+}
+
+/* Debug CurrentTSO */
+void
+GCT(){
+ fprintf(stderr,"Current Proc: %d\n",CurrentProc);
+ G_TSO(CurrentTSO,1);
+}
+
+/* Shorthand for debugging event queue */
+void
+GEQ() { G_EVENTQ(1); }
+
+/* Shorthand for debugging thread queue of a processor */
+void
+GTQ(PEs p) { G_THREADQ(run_queue_hds[p],1); }
+
+/* Shorthand for debugging thread queue of current processor */
+void
+GCTQ() { G_THREADQ(run_queue_hds[CurrentProc],1); }
+
+/* Shorthand for debugging spark queue of a processor */
+void
+GSQ(PEs p) { G_SPARKQ(pending_sparks_hds[p],1); }
+
+/* Shorthand for debugging spark queue of current processor */
+void
+GCSQ() { G_CURR_SPARKQ(1); }
+
+/* Shorthand for printing a node */
+void
+GN(StgPtr node) { G_PRINT_NODE(node); }
+
+/* Shorthand for printing info table */
+#if 0
+// ToDo: fix -- HWL
+void
+GIT(StgPtr node) { G_INFO_TABLE(node); }
+#endif
+
+void
+printThreadQPtrs(void)
+{
+ PEs p;
+ for (p=0; p<RtsFlags.GranFlags.proc; p++) {
+ fprintf(stderr,", PE %d: (hd=%p,tl=%p)",
+ run_queue_hds[p], run_queue_tls[p]);
+ }
+}
+
+void
+printThreadQ(StgTSO *tso) { G_THREADQ(tso, 0); };
+
+void
+printSparkQ(rtsSpark *spark) { G_SPARKQ(spark, 0); };
+
+void
+printThreadQ_verbose(StgTSO *tso) { G_THREADQ(tso, 1); };
+
+void
+printSparkQ_verbose(rtsSpark *spark) { G_SPARKQ(spark, 1); };
+
+/* Shorthand for some of ADRs debugging functions */
+
+#endif /* GRAN && GRAN_CHECK*/
+
+#if 0
+void
+DEBUG_PRINT_NODE(node)
+StgPtr node;
+{
+ W_ info_ptr = INFO_PTR(node);
+ StgInt size = 0, ptrs = 0, i, vhs = 0;
+ char info_type[80];
+
+ info_hdr_type(info_ptr, info_type);
+
+ size_and_ptrs(node,&size,&ptrs);
+ vhs = var_hdr_size(node);
+
+ fprintf(stderr,"Node: 0x%lx", (W_) node);
+
+#if defined(PAR)
+ fprintf(stderr," [GA: 0x%lx]",GA(node));
+#endif
+
+#if defined(PROFILING)
+ fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
+#endif
+
+#if defined(GRAN)
+ fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
+#endif
+
+ fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n",
+ info_ptr,info_type,size,ptrs);
+
+ /* For now, we ignore the variable header */
+
+ for(i=0; i < size; ++i)
+ {
+ if(i == 0)
+ fprintf(stderr,"Data: ");
+
+ else if(i % 6 == 0)
+ fprintf(stderr,"\n ");
+
+ if(i < ptrs)
+ fprintf(stderr," 0x%lx[P]",*(node+_FHS+vhs+i));
+ else
+ fprintf(stderr," %lu[D]",*(node+_FHS+vhs+i));
+ }
+ fprintf(stderr, "\n");
+}
+
+
+#define INFO_MASK 0x80000000
+
+void
+DEBUG_TREE(node)
+StgPtr node;
+{
+ W_ size = 0, ptrs = 0, i, vhs = 0;
+
+ /* Don't print cycles */
+ if((INFO_PTR(node) & INFO_MASK) != 0)
+ return;
+
+ size_and_ptrs(node,&size,&ptrs);
+ vhs = var_hdr_size(node);
+
+ DEBUG_PRINT_NODE(node);
+ fprintf(stderr, "\n");
+
+ /* Mark the node -- may be dangerous */
+ INFO_PTR(node) |= INFO_MASK;
+
+ for(i = 0; i < ptrs; ++i)
+ DEBUG_TREE((StgPtr)node[i+vhs+_FHS]);
+
+ /* Unmark the node */
+ INFO_PTR(node) &= ~INFO_MASK;
+}
+
+
+void
+DEBUG_INFO_TABLE(node)
+StgPtr node;
+{
+ W_ info_ptr = INFO_PTR(node);
+ char *iStgPtrtype = info_hdr_type(info_ptr);
+
+ fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
+ iStgPtrtype,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
+#if defined(PAR)
+ fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
+#endif
+
+#if defined(PROFILING)
+ fprintf(stderr,"Cost Centre (???): 0x%lx\n",INFO_CAT(info_ptr));
+#endif
+
+#if defined(_INFO_COPYING)
+ fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n",
+ INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
+#endif
+
+#if defined(_INFO_COMPACTING)
+ fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n",
+ (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
+ fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t",
+ (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
+#if 0 /* avoid INFO_TYPE */
+ if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
+ fprintf(stderr,"plus specialised code\n");
+ else
+ fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
+#endif /* 0 */
+#endif /* _INFO_COMPACTING */
+}
+#endif /* 0 */
+
+//@node Printing info type, Printing Packet Contents, Shortcuts, Debugging routines for GranSim and GUM
+//@subsection Printing info type
+
+char *
+display_info_type(closure, str)
+StgClosure *closure;
+char *str;
+{
+ strcpy(str,"");
+ if ( closure_HNF(closure) )
+ strcat(str,"|_HNF ");
+ else if ( closure_BITMAP(closure) )
+ strcat(str,"|_BTM");
+ else if ( !closure_SHOULD_SPARK(closure) )
+ strcat(str,"|_NS");
+ else if ( closure_STATIC(closure) )
+ strcat(str,"|_STA");
+ else if ( closure_THUNK(closure) )
+ strcat(str,"|_THU");
+ else if ( closure_MUTABLE(closure) )
+ strcat(str,"|_MUT");
+ else if ( closure_UNPOINTED(closure) )
+ strcat(str,"|_UPT");
+ else if ( closure_SRT(closure) )
+ strcat(str,"|_SRT");
+
+ return(str);
+}
+
+char *
+info_type(StgClosure *closure){
+ return closure_type_names[get_itbl(closure)->type];
+}
+
+char *
+info_type_by_ip(StgInfoTable *ip){
+ return closure_type_names[ip->type];
+}
+
+void
+info_hdr_type(StgClosure *closure, char *res){
+ strcpy(res,closure_type_names[get_itbl(closure)->type]);
+}
+
+/*
+ PrintPacket is in Pack.c because it makes use of closure queues
+*/
+
+#if defined(GRAN) || defined(PAR)
+
+/*
+ Print graph rooted at q. The structure of this recursive printing routine
+ should be the same as in the graph traversals when packing a graph in
+ GUM. Thus, it demonstrates the structure of such a generic graph
+ traversal, and in particular, how to extract pointer and non-pointer info
+ from the multitude of different heap objects available.
+
+ {evacuate}Daq ngoqvam nIHlu'pu'!!
+*/
+
+void
+PrintGraph(StgClosure *p, int indent_level)
+{
+ StgPtr x, q;
+ rtsBool printed = rtsFalse;
+ nat i, j;
+ const StgInfoTable *info;
+
+ q = p; /* save ptr to object */
+
+ /* indentation */
+ for (j=0; j<indent_level; j++)
+ fputs(" ", stderr);
+
+ ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO((StgClosure *)p))
+ || IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p))));
+
+ printClosure(p); // prints contents of this one closure
+
+ /* indentation */
+ for (j=0; j<indent_level; j++)
+ fputs(" ", stderr);
+
+ info = get_itbl((StgClosure *)p);
+ /* the rest of this fct recursively traverses the graph */
+ switch (info -> type) {
+
+ case BCO:
+ {
+ StgBCO* bco = stgCast(StgBCO*,p);
+ nat i;
+ fprintf(stderr, "BCO (%p) with %d pointers\n", p, bco->n_ptrs);
+ for (i = 0; i < bco->n_ptrs; i++) {
+ // bcoConstCPtr(bco,i) =
+ PrintGraph(bcoConstCPtr(bco,i), indent_level+1);
+ }
+ // p += bco_sizeW(bco);
+ break;
+ }
+
+ case MVAR:
+ /* treat MVars specially, because we don't want to PrintGraph the
+ * mut_link field in the middle of the closure.
+ */
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ // evac_gen = 0;
+ fprintf(stderr, "MVAR (%p) with 3 pointers (head, tail, value)\n", p);
+ // (StgClosure *)mvar->head =
+ PrintGraph((StgClosure *)mvar->head, indent_level+1);
+ // (StgClosure *)mvar->tail =
+ PrintGraph((StgClosure *)mvar->tail, indent_level+1);
+ //(StgClosure *)mvar->value =
+ PrintGraph((StgClosure *)mvar->value, indent_level+1);
+ // p += sizeofW(StgMVar);
+ // evac_gen = saved_evac_gen;
+ break;
+ }
+
+ case THUNK_2_0:
+ if (!printed) {
+ fprintf(stderr, "THUNK_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case FUN_2_0:
+ if (!printed) {
+ fprintf(stderr, "FUN_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_2_0:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // ((StgClosure *)p)->payload[1] =
+ PrintGraph(((StgClosure *)p)->payload[1],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 2;
+ break;
+
+ case THUNK_1_0:
+ // scavenge_srt(info);
+ fprintf(stderr, "THUNK_1_0 (%p) with 1 pointer\n", p);
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */
+ break;
+
+ case FUN_1_0:
+ if (!printed) {
+ fprintf(stderr, "FUN_1_0 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_1_0:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 1;
+ break;
+
+ case THUNK_0_1:
+ fprintf(stderr, "THUNK_0_1 (%p) with 0 pointers\n", p);
+ // scavenge_srt(info);
+ // p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */
+ break;
+
+ case FUN_0_1:
+ fprintf(stderr, "FUN_0_1 (%p) with 0 pointers\n", p);
+ //scavenge_srt(info);
+ case CONSTR_0_1:
+ fprintf(stderr, "CONSTR_0_1 (%p) with 0 pointers\n", p);
+ //p += sizeofW(StgHeader) + 1;
+ break;
+
+ case THUNK_0_2:
+ if (!printed) {
+ fprintf(stderr, "THUNK_0_2 (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case FUN_0_2:
+ if (!printed) {
+ fprintf(stderr, "FUN_0_2 (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_0_2:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_0_2 (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // p += sizeofW(StgHeader) + 2;
+ break;
+
+ case THUNK_1_1:
+ if (!printed) {
+ fprintf(stderr, "THUNK_1_1 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ case FUN_1_1:
+ if (!printed) {
+ fprintf(stderr, "FUN_1_1 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_1_1:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_1_1 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 2;
+ break;
+
+ case FUN:
+ if (!printed) {
+ fprintf(stderr, "FUN (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case THUNK:
+ if (!printed) {
+ fprintf(stderr, "THUNK (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ /* fall through */
+
+ case CONSTR:
+ if (!printed) {
+ fprintf(stderr, "CONSTR (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* basically same as loop in STABLE_NAME case */
+ for (i=0; i<info->layout.payload.ptrs; i++)
+ PrintGraph(((StgClosure *)p)->payload[i],
+ indent_level+1);
+ break;
+ /* NOT fall through */
+
+ case WEAK:
+ if (!printed) {
+ fprintf(stderr, "WEAK (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case FOREIGN:
+ if (!printed) {
+ fprintf(stderr, "FOREIGN (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case STABLE_NAME:
+ {
+ StgPtr end;
+
+ if (!printed) {
+ fprintf(stderr, "STABLE_NAME (%p) with %d pointers (not followed!)\n",
+ p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (p = (StgPtr)((StgClosure *)p)->payload; p < end; p++) {
+ // (StgClosure *)*p =
+ //PrintGraph((StgClosure *)*p, indent_level+1);
+ fprintf(stderr, ", %p", *p);
+ }
+ //fputs("\n", stderr);
+ // p += info->layout.payload.nptrs;
+ break;
+ }
+
+ case IND_PERM:
+ //if (step->gen->no != 0) {
+ // SET_INFO(((StgClosure *)p), &IND_OLDGEN_PERM_info);
+ //}
+ if (!printed) {
+ fprintf(stderr, "IND_PERM (%p) with indirection to\n",
+ p, ((StgIndOldGen *)p)->indirectee);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case IND_OLDGEN_PERM:
+ if (!printed) {
+ fprintf(stderr, "IND_OLDGEN_PERM (%p) with indirection to %p\n",
+ p, ((StgIndOldGen *)p)->indirectee);
+ printed = rtsTrue;
+ }
+ // ((StgIndOldGen *)p)->indirectee =
+ PrintGraph(((StgIndOldGen *)p)->indirectee,
+ indent_level+1);
+ //if (failed_to_evac) {
+ // failed_to_evac = rtsFalse;
+ // recordOldToNewPtrs((StgMutClosure *)p);
+ //}
+ // p += sizeofW(StgIndOldGen);
+ break;
+
+ case CAF_UNENTERED:
+ {
+ StgCAF *caf = (StgCAF *)p;
+
+ fprintf(stderr, "CAF_UNENTERED (%p) pointing to %p\n", p, caf->body);
+ PrintGraph(caf->body, indent_level+1);
+ //if (failed_to_evac) {
+ // failed_to_evac = rtsFalse;
+ // recordOldToNewPtrs((StgMutClosure *)p);
+ //} else {
+ // caf->mut_link = NULL;
+ //}
+ //p += sizeofW(StgCAF);
+ break;
+ }
+
+ case CAF_ENTERED:
+ {
+ StgCAF *caf = (StgCAF *)p;
+
+ fprintf(stderr, "CAF_ENTERED (%p) pointing to %p and %p\n",
+ p, caf->body, caf->value);
+ // caf->body =
+ PrintGraph(caf->body, indent_level+1);
+ //caf->value =
+ PrintGraph(caf->value, indent_level+1);
+ //if (failed_to_evac) {
+ // failed_to_evac = rtsFalse;
+ // recordOldToNewPtrs((StgMutClosure *)p);
+ //} else {
+ // caf->mut_link = NULL;
+ //}
+ //p += sizeofW(StgCAF);
+ break;
+ }
+
+ case MUT_VAR:
+ /* ignore MUT_CONSs */
+ fprintf(stderr, "MUT_VAR (%p) pointing to %p\n", p, ((StgMutVar *)p)->var);
+ if (((StgMutVar *)p)->header.info != &MUT_CONS_info) {
+ //evac_gen = 0;
+ PrintGraph(((StgMutVar *)p)->var, indent_level+1);
+ //evac_gen = saved_evac_gen;
+ }
+ //p += sizeofW(StgMutVar);
+ break;
+
+ case CAF_BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "CAF_BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case SE_CAF_BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "SE_CAF_BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case SE_BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "SE_BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ //p += BLACKHOLE_sizeW();
+ break;
+
+ case BLACKHOLE_BQ:
+ {
+ StgBlockingQueue *bh = (StgBlockingQueue *)p;
+ // (StgClosure *)bh->blocking_queue =
+ fprintf(stderr, "BLACKHOLE_BQ (%p) pointing to %p\n",
+ p, (StgClosure *)bh->blocking_queue);
+ PrintGraph((StgClosure *)bh->blocking_queue, indent_level+1);
+ //if (failed_to_evac) {
+ // failed_to_evac = rtsFalse;
+ // recordMutable((StgMutClosure *)bh);
+ //}
+ // p += BLACKHOLE_sizeW();
+ break;
+ }
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ fprintf(stderr, "THUNK_SELECTOR (%p) pointing to %p\n",
+ p, s->selectee);
+ PrintGraph(s->selectee, indent_level+1);
+ // p += THUNK_SELECTOR_sizeW();
+ break;
+ }
+
+ case IND:
+ fprintf(stderr, "IND (%p) pointing to %p\n", p, ((StgInd*)p)->indirectee);
+ PrintGraph(((StgInd*)p)->indirectee, indent_level+1);
+ break;
+
+ case IND_OLDGEN:
+ fprintf(stderr, "IND_OLDGEN (%p) pointing to %p\n",
+ p, ((StgIndOldGen*)p)->indirectee);
+ PrintGraph(((StgIndOldGen*)p)->indirectee, indent_level+1);
+ break;
+
+ case CONSTR_INTLIKE:
+ fprintf(stderr, "CONSTR_INTLIKE (%p) with 0 pointers\n", p);
+ break;
+ case CONSTR_CHARLIKE:
+ fprintf(stderr, "CONSTR_CHARLIKE (%p) with 0 pointers\n", p);
+ break;
+ case CONSTR_STATIC:
+ fprintf(stderr, "CONSTR_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case CONSTR_NOCAF_STATIC:
+ fprintf(stderr, "CONSTR_NOCAF_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case THUNK_STATIC:
+ fprintf(stderr, "THUNK_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case FUN_STATIC:
+ fprintf(stderr, "FUN_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case IND_STATIC:
+ fprintf(stderr, "IND_STATIC (%p) with 0 pointers\n", p);
+ break;
+
+ case RET_BCO:
+ fprintf(stderr, "RET_BCO (%p) with 0 pointers\n", p);
+ break;
+ case RET_SMALL:
+ fprintf(stderr, "RET_SMALL (%p) with 0 pointers\n", p);
+ break;
+ case RET_VEC_SMALL:
+ fprintf(stderr, "RET_VEC_SMALL (%p) with 0 pointers\n", p);
+ break;
+ case RET_BIG:
+ fprintf(stderr, "RET_BIG (%p) with 0 pointers\n", p);
+ break;
+ case RET_VEC_BIG:
+ fprintf(stderr, "RET_VEC_BIG (%p) with 0 pointers\n", p);
+ break;
+ case RET_DYN:
+ fprintf(stderr, "RET_DYN (%p) with 0 pointers\n", p);
+ break;
+ case UPDATE_FRAME:
+ fprintf(stderr, "UPDATE_FRAME (%p) with 0 pointers\n", p);
+ break;
+ case STOP_FRAME:
+ fprintf(stderr, "STOP_FRAME (%p) with 0 pointers\n", p);
+ break;
+ case CATCH_FRAME:
+ fprintf(stderr, "CATCH_FRAME (%p) with 0 pointers\n", p);
+ break;
+ case SEQ_FRAME:
+ fprintf(stderr, "SEQ_FRAME (%p) with 0 pointers\n", p);
+ break;
+
+ case AP_UPD: /* same as PAPs */
+ fprintf(stderr, "AP_UPD (%p) with 0 pointers\n", p);
+ case PAP:
+ /* Treat a PAP just like a section of stack, not forgetting to
+ * PrintGraph the function pointer too...
+ */
+ {
+ StgPAP* pap = stgCast(StgPAP*,p);
+
+ fprintf(stderr, "PAP (%p) pointing to %p\n", p, pap->fun);
+ // pap->fun =
+ PrintGraph(pap->fun, indent_level+1);
+ //scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args);
+ //p += pap_sizeW(pap);
+ break;
+ }
+
+ case ARR_WORDS:
+ fprintf(stderr, "ARR_WORDS (%p) with 0 pointers\n", p);
+ /* nothing to follow */
+ //p += arr_words_sizeW(stgCast(StgArrWords*,p));
+ break;
+
+ case MUT_ARR_PTRS:
+ /* follow everything */
+ {
+ StgPtr next;
+
+ fprintf(stderr, "MUT_ARR_PTRS (%p) with %d pointers (not followed)\n",
+ p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p));
+ // evac_gen = 0; /* repeatedly mutable */
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ // (StgClosure *)*p =
+ // PrintGraph((StgClosure *)*p, indent_level+1);
+ fprintf(stderr, ", %p", *p);
+ }
+ fputs("\n", stderr);
+ //evac_gen = saved_evac_gen;
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN:
+ /* follow everything */
+ {
+ StgPtr start = p, next;
+
+ fprintf(stderr, "MUT_ARR_PTRS (%p) with %d pointers (not followed)",
+ p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p));
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ // (StgClosure *)*p =
+ // PrintGraph((StgClosure *)*p, indent_level+1);
+ fprintf(stderr, ", %p", *p);
+ }
+ fputs("\n", stderr);
+ //if (failed_to_evac) {
+ /* we can do this easier... */
+ // recordMutable((StgMutClosure *)start);
+ // failed_to_evac = rtsFalse;
+ //}
+ break;
+ }
+
+ case TSO:
+ {
+ StgTSO *tso;
+
+ tso = (StgTSO *)p;
+ fprintf(stderr, "TSO (%p) with link field %p\n", p, (StgClosure *)tso->link);
+ // evac_gen = 0;
+ /* chase the link field for any TSOs on the same queue */
+ // (StgClosure *)tso->link =
+ PrintGraph((StgClosure *)tso->link, indent_level+1);
+ //if (tso->blocked_on) {
+ // tso->blocked_on = PrintGraph(tso->blocked_on);
+ //}
+ /* scavenge this thread's stack */
+ //scavenge_stack(tso->sp, &(tso->stack[tso->stack_size]));
+ //evac_gen = saved_evac_gen;
+ //p += tso_sizeW(tso);
+ break;
+ }
+
+#if defined(GRAN) || defined(PAR)
+ case RBH:
+ {
+ StgInfoTable *rip = REVERT_INFOPTR(get_itbl(p));
+ //if (LOOKS_LIKE_GHC_INFO(rip))
+ // fprintf(stderr, "RBH (%p) with 0 pointers (reverted type=%s)\n",
+ // p, info_type_by_ip(rip));
+ //else
+ fprintf(stderr, "RBH (%p) with 0 pointers (reverted IP=%x)\n",
+ p, rip);
+ }
+ break;
+#endif
+#if defined(PAR)
+ case BLOCKED_FETCH:
+ fprintf(stderr, "BLOCKED_FETCH (%p) with 0 pointers (link=%p)\n",
+ p, ((StgBlockedFetch *)p)->link);
+ break;
+ case FETCH_ME:
+ fprintf(stderr, "FETCH_ME (%p) with 0 pointers\n", p);
+ break;
+ case FETCH_ME_BQ:
+ fprintf(stderr, "FETCH_ME_BQ (%p) with 0 pointers (blocking_queue=%p)\n",
+ p, ((StgFetchMeBlockingQueue *)p)->blocking_queue);
+ break;
+#endif
+ case EVACUATED:
+ fprintf(stderr, "EVACUATED (%p) with 0 pointers (evacuee=%p)\n",
+ p, ((StgEvacuated *)p)->evacuee);
+ break;
+
+ default:
+ barf("PrintGraph: unknown closure %d (%s)",
+ info -> type, info_type(info));
+ }
+
+ /* If we didn't manage to promote all the objects pointed to by
+ * the current object, then we have to designate this object as
+ * mutable (because it contains old-to-new generation pointers).
+ */
+ //if (failed_to_evac) {
+ // mkMutCons((StgClosure *)q, &generations[evac_gen]);
+ // failed_to_evac = rtsFalse;
+ //}
+}
+
+#endif /* GRAN */
+
+#endif /* GRAN || PAR */
+//@node End of File, , Printing Packet Contents, Debugging routines for GranSim and GUM
+//@subsection End of File
--- /dev/null
+/*
+ Time-stamp: <Mon Nov 29 1999 17:17:13 Stardate: [-30]3973.60 hwloidl>
+
+ Prototypes of all parallel debugging functions.
+ */
+
+#ifndef PARALLEL_DEBUG_H
+#define PARALLEL_DEBUG_H
+
+#if defined(GRAN) // || defined(PAR)
+void G_PRINT_NODE(StgClosure* node);
+void G_PPN(StgClosure* node);
+void G_INFO_TABLE(StgClosure* node);
+void G_CURR_THREADQ(StgInt verbose);
+void G_THREADQ(StgTSO* closure, StgInt verbose);
+void G_TSO(StgTSO* closure, StgInt verbose);
+void G_EVENT(rtsEventQ event, StgInt verbose);
+void G_EVENTQ(StgInt verbose);
+void G_PE_EQ(PEs pe, StgInt verbose);
+void G_SPARK(rtsSparkQ spark, StgInt verbose);
+void G_SPARKQ(rtsSparkQ spark, StgInt verbose);
+void G_CURR_SPARKQ(StgInt verbose);
+void G_PROC(StgInt proc, StgInt verbose);
+void GP(StgInt proc);
+void GCP(void);
+void GT(StgPtr tso);
+void GCT(void);
+void GEQ(void);
+void GTQ(PEs p);
+void GCTQ(void);
+void GSQ(PEs p);
+void GCSQ(void);
+void GN(StgPtr node);
+void GIT(StgPtr node);
+#endif
+
+#if defined(GRAN) || defined(PAR)
+
+char *display_info_type(StgClosure *closure, char *str);
+void info_hdr_type(StgClosure *closure, char *res);
+char *info_type(StgClosure *closure);
+char *info_type_by_ip(StgInfoTable *ip);
+
+void PrintPacket(rtsPackBuffer *buffer);
+void PrintGraph(StgClosure *p, int indent_level);
+
+#endif /* GRAN || PAR */
+
+#endif /* PARALLEL_DEBUG_H */
--- /dev/null
+/* --------------------------------------------------------------------------
+ Time-stamp: <Wed Jan 12 2000 16:22:43 Stardate: [-30]4194.45 hwloidl>
+ $Id: ParallelRts.h,v 1.2 2000/01/13 14:34:09 hwloidl Exp $
+
+ Variables and functions specific to the parallel RTS (i.e. GUM or GranSim)
+ ----------------------------------------------------------------------- */
+
+#ifndef PARALLEL_RTS_H
+#define PARALLEL_RTS_H
+
+#if defined(GRAN) || defined(PAR)
+
+//@menu
+//* Packing routines::
+//* Spark handling routines::
+//* GC routines::
+//* Debugging routines::
+//* Generating .gr profiles::
+//* Common macros::
+//* Index::
+//@end menu
+
+#ifndef GRAN
+// Dummy def for NO_PRI if not in GranSim
+#define NO_PRI 0
+#endif
+
+//@node Packing routines, Spark handling routines
+//@subsection Packing routines
+
+#if defined(GRAN)
+/* Statistics info */
+extern nat tot_packets, tot_packet_size, tot_cuts, tot_thunks;
+#endif
+
+#if defined(GRAN)
+/* Pack.c */
+rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso,
+ nat *packBufferSize);
+rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso,
+ nat *packBufferSize);
+rtsPackBuffer *PackTSO(StgTSO *tso, nat *packBufferSize);
+rtsPackBuffer *PackStkO(StgPtr stko, nat *packBufferSize);
+void PackFetchMe(StgClosure *closure);
+
+/* Unpack.c */
+StgClosure* UnpackGraph(rtsPackBuffer* buffer);
+void InitPendingGABuffer(nat size);
+
+/* RBH.c */
+StgClosure *convertToRBH(StgClosure *closure);
+void convertFromRBH(StgClosure *closure);
+
+/* General closure predicates */
+/*
+ {Parallel.h}Daq ngoqvam vIroQpu'
+
+StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty);
+rtsBool IS_BLACK_HOLE(StgClosure* node);
+StgClosure *IS_INDIRECTION(StgClosure* node);
+rtsBool IS_THUNK(StgClosure* closure);
+*/
+
+#elif defined(PAR)
+
+/* Pack.c */
+rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso,
+ nat *packBufferSize);
+
+rtsPackBuffer *PackTSO(StgTSO *tso, nat *packBufferSize);
+rtsPackBuffer *PackStkO(StgPtr stko, nat *packBufferSize);
+void PackFetchMe(StgClosure *closure);
+
+/* Unpack.c */
+void CommonUp(StgClosure *src, StgClosure *dst);
+StgClosure *UnpackGraph(rtsPackBuffer *buffer, globalAddr **gamap,
+ nat *nGAs);
+
+/* RBH.c */
+StgClosure *convertToRBH(StgClosure *closure);
+void convertToFetchMe(StgRBH *rbh, globalAddr *ga);
+
+/* General closure predicates */
+/*
+ {Parallel.h}Daq ngoqvam vIroQpu'
+
+StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty);
+rtsBool IS_BLACK_HOLE(StgClosure* node);
+StgClosure *IS_INDIRECTION(StgClosure* node);
+rtsBool IS_THUNK(StgClosure* closure);
+*/
+
+#endif
+
+/* this routine should be moved to a more general module; currently in Pack.c
+StgInfoTable* get_closure_info(StgClosure* node,
+ nat *size, nat *ptrs, nat *nonptrs, nat *vhs,
+ char *info_hdr_ty);
+*/
+void doGlobalGC(void);
+
+//@node Spark handling routines, GC routines, Packing routines
+//@subsection Spark handling routines
+
+/* now in ../Sparks.c */
+
+#if 0
+
+#if defined(PAR)
+
+rtsSpark findLocalSpark(rtsBool forexport);
+StgTSO* activateSpark (rtsSpark spark);
+void disposeSpark(rtsSpark spark);
+rtsBool add_to_spark_queue(StgClosure *closure, rtsBool required);
+rtsBool initSparkPools (void);
+
+nat spark_queue_len(nat pool);
+void markSparkQueue(void);
+void print_sparkq(void);
+
+#elif defined(GRAN)
+
+void findLocalSpark (rtsEvent *event,
+ rtsBool *found_res, rtsSparkQ *spark_res);
+rtsBool activateSpark (rtsEvent *event, rtsSparkQ spark);
+rtsSpark *newSpark (StgClosure *node, StgInt name, StgInt gran_info,
+ StgInt size_info, StgInt par_info, StgInt local);
+void disposeSpark(rtsSpark *spark);
+void disposeSparkQ(rtsSparkQ spark);
+void add_to_spark_queue(rtsSpark *spark);
+void print_spark(rtsSpark *spark);
+nat spark_queue_len(PEs proc);
+rtsSpark *delete_from_sparkq (rtsSpark *spark, PEs p, rtsBool dispose_too);
+void markSparkQueue(void);
+void print_sparkq(PEs proc);
+void print_sparkq_stats(void);
+
+#endif
+#endif /* 0 */
+
+//@node GC routines, Debugging routines, Spark handling routines
+//@subsection GC routines
+
+#if defined(PAR)
+/* HLComms.c */
+void freeRemoteGA(int pe, globalAddr *ga);
+void sendFreeMessages(void);
+
+/* Global.c */
+void markLocalGAs(rtsBool full);
+void RebuildGAtables(rtsBool full);
+void RebuildLAGAtable(void);
+#endif
+
+//@node Debugging routines, Generating .gr profiles, GC routines
+//@subsection Debugging routines
+
+#if defined(PAR)
+void printGA (globalAddr *ga);
+void printGALA (GALA *gala);
+void printLAGAtable(void);
+#endif
+
+//@node Generating .gr profiles, Common macros, Debugging routines
+//@subsection Generating .gr profiles
+
+#define STATS_FILENAME_MAXLEN 128
+
+/* Where to write the log file */
+//@cindex gr_file
+//@cindex gr_filename
+extern FILE *gr_file;
+extern char gr_filename[STATS_FILENAME_MAXLEN];
+
+//@cindex init_gr_simulation
+//@cindex end_gr_simulation
+void init_gr_simulation(int rts_argc, char *rts_argv[],
+ int prog_argc, char *prog_argv[]);
+void end_gr_simulation(void);
+
+//@node Common macros, Index, Generating .gr profiles
+//@subsection Common macros
+
+/*
+ extracting specific info out of a closure; used in packing (GranSim, GUM)
+*/
+//@cindex get_closure_info
+static inline StgInfoTable*
+get_closure_info(node, size, ptrs, nonptrs, vhs, info_hdr_ty)
+StgClosure* node;
+nat *size, *ptrs, *nonptrs, *vhs;
+char *info_hdr_ty;
+{
+ StgInfoTable *info;
+
+ info = get_itbl(node);
+ /* the switch shouldn't be necessary, really; just use default case */
+ switch (info->type) {
+ case RBH:
+ {
+ StgInfoTable *rip = REVERT_INFOPTR(info); // closure to revert to
+ *size = sizeW_fromITBL(rip);
+ *ptrs = (nat) (rip->layout.payload.ptrs);
+ *nonptrs = (nat) (rip->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "UNKNOWN");
+#endif
+ return rip; // NB: we return the reverted info ptr for a RBH!!!!!!
+ }
+
+ default:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) (info->layout.payload.ptrs);
+ *nonptrs = (nat) (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "UNKNOWN");
+#endif
+ return info;
+ }
+}
+
+//@cindex IS_BLACK_HOLE
+static inline rtsBool
+IS_BLACK_HOLE(StgClosure* node)
+{
+ StgInfoTable *info;
+ switch (get_itbl(node)->type) {
+ case BLACKHOLE:
+ case BLACKHOLE_BQ:
+ case RBH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ return rtsTrue;
+ default:
+ return rtsFalse;
+ }
+//return ((info->type == BLACKHOLE || info->type == RBH) ? rtsTrue : rtsFalse);
+}
+
+//@cindex IS_INDIRECTION
+static inline StgClosure *
+IS_INDIRECTION(StgClosure* node)
+{
+ StgInfoTable *info;
+ info = get_itbl(node);
+ switch (info->type) {
+ case IND:
+ case IND_OLDGEN:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ /* relies on indirectee being at same place for all these closure types */
+ return (((StgInd*)node) -> indirectee);
+ default:
+ return NULL;
+ }
+}
+
+//@cindex unwindInd
+static inline StgClosure *
+UNWIND_IND (StgClosure *closure)
+{
+ StgClosure *next;
+
+ while ((next = IS_INDIRECTION((StgClosure *)closure)) != NULL)
+ closure = next;
+
+ ASSERT(next==(StgClosure *)NULL);
+ return closure;
+}
+
+#endif /* defined(PAR) || defined(GRAN) */
+
+#endif /* PARALLEL_RTS_H */
+
+//@node Index, , Common macros
+//@subsection Index
+
+//@index
+//* IS_BLACK_HOLE:: @cindex\s-+IS_BLACK_HOLE
+//* IS_INDIRECTION:: @cindex\s-+IS_INDIRECTION
+//* end_gr_simulation:: @cindex\s-+end_gr_simulation
+//* get_closure_info:: @cindex\s-+get_closure_info
+//* gr_file:: @cindex\s-+gr_file
+//* gr_filename:: @cindex\s-+gr_filename
+//* init_gr_simulation:: @cindex\s-+init_gr_simulation
+//* unwindInd:: @cindex\s-+unwindInd
+//@end index
--- /dev/null
+/*
+ Time-stamp: <Sun Dec 12 1999 20:39:04 Stardate: [-30]4039.09 software>
+
+ Revertible Black Hole Manipulation.
+ Used in GUM and GranSim during the packing of closures. These black holes
+ must be revertible because a GC might occur while the packet is being
+ transmitted. In this case all RBHs have to be reverted.
+ */
+
+#if defined(PAR) || defined(GRAN) /* whole file */
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+# if defined(DEBUG)
+# include "ParallelDebug.h"
+# endif
+#include "Storage.h" // for recordMutable
+#include "StgMacros.h" // inlined IS_... fcts
+
+/*
+ Turn a closure into a revertible black hole. After the conversion, the
+ first two words of the closure (after the fixed header, of course) will
+ be a link to the mutables list (if appropriate for the garbage
+ collector), and a pointer to the blocking queue. The blocking queue is
+ terminated by a 2-word SPEC closure which holds the original contents of
+ the first two words of the closure.
+*/
+
+//@menu
+//* Externs and prototypes::
+//* Conversion Functions::
+//* Index::
+//@end menu
+//*/
+
+//@node Externs and prototypes, Conversion Functions
+//@section Externs and prototypes
+
+EXTFUN(RBH_Save_0_info);
+EXTFUN(RBH_Save_1_info);
+EXTFUN(RBH_Save_2_info);
+
+//@node Conversion Functions, Index, Externs and prototypes
+//@section Conversion Functions
+
+/*
+ A closure is turned into an RBH upon packing it (see PackClosure in Pack.c).
+ This is needed in case we have to do a GC before the packet is turned
+ into a graph on the PE receiving the packet.
+*/
+//@cindex convertToRBH
+StgClosure *
+convertToRBH(closure)
+StgClosure *closure;
+{
+ StgRBHSave *rbh_save;
+ StgInfoTable *info_ptr, *rbh_info_ptr, *old_info;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /*
+ Closure layout before this routine runs amuck:
+ +-------------------
+ | HEADER | DATA ...
+ +-------------------
+ | FIXED_HS |
+ */
+ /*
+ Turn closure into an RBH. This is done by modifying the info_ptr,
+ grabbing the info_ptr of the RBH for this closure out of its
+ ITBL. Additionally, we have to save the words from the closure, which
+ will hold the link to the blocking queue. For this purpose we use the
+ RBH_Save_N closures, with N being the number of pointers for this
+ closure. */
+ IF_GRAN_DEBUG(pack,
+ belch(":* Converting closure %p (%s) into an RBH",
+ closure, info_type(closure)));
+ IF_PAR_DEBUG(pack,
+ belch(":* Converting closure %p (%s) into an RBH",
+ closure, info_type(closure)));
+
+ ASSERT(closure_THUNK(closure));
+
+ IF_GRAN_DEBUG(pack,
+ old_info = get_itbl(closure));
+
+ /* Allocate a new closure for the holding data ripped out of closure */
+ if ((rbh_save = (StgRBHSave *)allocate(FIXED_HS + 2)) == NULL)
+ return NULL; /* have to Garbage Collect; check that in the caller! */
+
+ info_ptr = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ ASSERT(size >= MIN_UPD_SIZE);
+
+ /* Fill in the RBH_Save closure with the original data from closure */
+ rbh_save->payload[0] = (StgPtr) ((StgRBH *)closure)->blocking_queue;
+ rbh_save->payload[1] = (StgPtr) ((StgRBH *)closure)->mut_link;
+
+ /* Set the info_ptr for the rbh_Save closure according to the number of
+ pointers in the original */
+
+ rbh_info_ptr = (StgInfoTable *) (ptrs == 0 ? &RBH_Save_0_info :
+ ptrs == 1 ? &RBH_Save_1_info :
+ &RBH_Save_2_info);
+ SET_INFO(rbh_save, rbh_info_ptr);
+ /* same bitmask as the original closure */
+ SET_GRAN_HDR(rbh_save, PROCS(closure));
+
+ /* Init the blocking queue of the RBH and have it point to the saved data */
+ ((StgRBH *)closure)->blocking_queue = (StgBlockingQueueElement *)rbh_save;
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(RBH_INFOPTR(get_itbl(closure))));
+ /* Turn the closure into a RBH; a great system, indeed! */
+ SET_INFO(closure, RBH_INFOPTR(get_itbl(closure)));
+
+ /*
+ add closure to the mutable list!
+ do this after having turned the closure into an RBH, because an
+ RBH is mutable but the think it was previously isn't
+ */
+ //recordMutable((StgMutClosure *)closure);
+
+ //IF_GRAN_DEBUG(pack,
+ /* sanity check; make sure that reverting the RBH yields the
+ orig closure, again */
+ //ASSERT(REVERT_INFOPTR(get_itbl(closure))==old_info));
+
+ /*
+ Closure layout after this routine has run amuck:
+ +---------------------
+ | RBH-HEADER | | | ...
+ +--------------|---|--
+ | FIXED_HS | | v
+ | Mutable-list ie another StgMutClosure
+ v
+ +---------
+ | RBH_SAVE with 0-2 words of DATA
+ +---------
+ */
+
+ return closure;
+}
+
+/*
+ An RBH closure is turned into a FETCH_ME when reveiving an ACK message
+ indicating that the transferred closure has been unpacked on the other PE
+ (see processAck in HLComms.c). The ACK also contains the new GA of the
+ closure to which the FETCH_ME closure has to point.
+
+ Converting a closure to a FetchMe is trivial, unless the closure has
+ acquired a blocking queue. If that has happened, we first have to awaken
+ the blocking queue. What a nuisance! Fortunately, @AwakenBlockingQueue@
+ should now know what to do.
+
+ A note on GrAnSim: In GrAnSim we don't have FetchMe closures. However,
+ we have to turn a RBH back to its original form when the simulated
+ transfer of the closure has been finished. Therefore we need the
+ @convertFromRBH@ routine below. After converting the RBH back to its
+ original form and awakening all TSOs, the first TSO will reenter the
+ closure which is now local and carry on merrily reducing it (the other
+ TSO will be less merrily blocked on the now local closure; we're costing
+ the difference between local and global blocks in the BQ code). -- HWL
+*/
+
+# if defined(PAR)
+
+EXTFUN(FETCH_ME_info);
+
+//@cindex convertToFetchMe
+void
+convertToFetchMe(rbh, ga)
+StgRBH *rbh;
+globalAddr *ga;
+{
+ // StgInfoTable *ip = get_itbl(rbh);
+ StgBlockingQueueElement *bqe = rbh->blocking_queue;
+
+ ASSERT(get_itbl(rbh)->type==RBH);
+
+ IF_PAR_DEBUG(pack,
+ belch(":* Converting RBH %p (%s) into a FETCH_ME for GA ((%x, %d, %x))",
+ rbh, info_type(rbh),
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight));
+
+ /* put closure on mutables list, while it is still a RBH */
+ //recordMutable((StgMutClosure *)rbh);
+
+ /* actually turn it into a FETCH_ME */
+ SET_INFO((StgClosure *)rbh, &FETCH_ME_info);
+
+ /* set the global pointer in the FETCH_ME closure to the given value */
+ ((StgFetchMe *)rbh)->ga = ga;
+
+ IF_PAR_DEBUG(pack,
+ if (get_itbl(bqe)->type==TSO || get_itbl(bqe)->type==BLOCKED_FETCH)
+ belch(":* Awakening non-empty BQ of RBH closure %p (first TSO is %d (%p)",
+ rbh, ((StgTSO *)bqe)->id, ((StgTSO *)bqe)));
+
+ /* awaken all TSOs and BLOCKED_FETCHES on the blocking queue */
+ if (get_itbl(bqe)->type==TSO || get_itbl(bqe)->type==BLOCKED_FETCH)
+ awaken_blocked_queue(bqe, (StgClosure *)rbh);
+}
+# else /* GRAN */
+/* Prototype */
+// void UnlinkFromMUT(StgPtr closure);
+
+/*
+ This routine in fact reverts the RBH into its original form; this code
+ should be of interest for GUM, too, but is not needed in the current version.
+ convertFromRBH is called where GUM uses convertToFetchMe.
+*/
+void
+convertFromRBH(closure)
+StgClosure *closure;
+{
+ StgBlockingQueueElement *bqe = ((StgRBH*)closure)->blocking_queue;
+ char str[NODE_STR_LEN]; // debugging only
+ StgInfoTable *rip = REVERT_INFOPTR(get_itbl(closure)); // debugging only
+
+ IF_GRAN_DEBUG(pack,
+ if (get_itbl(bqe)->type==TSO)
+ sprintf(str, "%d (%p)",
+ ((StgTSO *)bqe)->id, ((StgTSO *)bqe));
+ else
+ strcpy(str, "empty");
+ belch(":* Reverting RBH %p (%s) into a ??? closure again; BQ start: %s",
+ closure, info_type(closure), str));
+
+ ASSERT(get_itbl(closure)->type==RBH);
+
+ /* awaken_blocked_queue also restores the RBH_Save closure
+ (have to call it even if there are no TSOs in the queue!) */
+ awaken_blocked_queue(bqe, closure);
+
+ /* Put back old info pointer (grabbed from the RBH's info table).
+ We do that *after* awakening the BQ to be sure node is an RBH when
+ calling awaken_blocked_queue (different in GUM!)
+ */
+ SET_INFO(closure, REVERT_INFOPTR(get_itbl(closure)));
+
+ /* put closure on mutables list */
+ //recordMutable((StgMutClosure *)closure);
+
+# if 0 /* rest of this fct */
+ /* ngoq ngo' */
+ /* FETCHME_GA(closure) = ga; */
+ if (IS_MUTABLE(INFO_PTR(bqe))) {
+ PROC old_proc = CurrentProc, /* NB: For AwakenBlockingQueue, */
+ new_proc = where_is(closure); /* CurentProc must be where */
+ /* closure lives. */
+ CurrentProc = new_proc;
+
+# if defined(GRAN_CHECK)
+ if (RTSflags.GranFlags.debug & 0x100)
+ fprintf(stderr,"===== AwBQ of node 0x%lx (%s) [PE %2u]\n",
+ closure, (isSpec ? "SPEC_RBH" : "GEN_RBH"), new_proc);
+# endif
+
+ rbh_save = AwakenBlockingQueue(bqe); /* AwakenBlockingQueue(bqe); */
+ CurrentProc = old_proc;
+ } else {
+ rbh_save = bqe;
+ }
+
+ /* Put data from special RBH save closures back into the closure */
+ if ( rbh_save == NULL ) {
+ fprintf(stderr,"convertFromRBH: No RBH_Save_? closure found at end of BQ!\n");
+ EXIT(EXIT_FAILURE);
+ } else {
+ closure[isSpec ? SPEC_HS : GEN_HS] = rbh_save[SPEC_HS];
+ closure[(isSpec ? SPEC_HS : GEN_HS) + 1] = rbh_save[SPEC_HS + 1];
+ }
+# endif /* 0 */
+
+# if 0 && (defined(GCap) || defined(GCgn))
+ /* ngoq ngo' */
+ /* If we convert from an RBH in the old generation,
+ we have to make sure it goes on the mutables list */
+
+ if(closure <= StorageMgrInfo.OldLim) {
+ if (IS_MUTABLE(INFO_PTR(closure)) && MUT_LINK(closure) == MUT_NOT_LINKED) {
+ MUT_LINK(closure) = (StgWord) StorageMgrInfo.OldMutables;
+ StorageMgrInfo.OldMutables = closure;
+ }
+ }
+# endif /* 0 */
+}
+#endif /* PAR */
+
+/* Remove closure from the mutables list */
+#if 0
+/* ngoq ngo' */
+void
+UnlinkFromMUT(StgPtr closure)
+{
+ StgPtr curr = StorageMgrInfo.OldMutables, prev = NULL;
+
+ while (curr != NULL && curr != closure) {
+ ASSERT(MUT_LINK(curr)!=MUT_NOT_LINKED);
+ prev=curr;
+ curr=MUT_LINK(curr);
+ }
+ if (curr==closure) {
+ if (prev==NULL)
+ StorageMgrInfo.OldMutables = MUT_LINK(curr);
+ else
+ MUT_LINK(prev) = MUT_LINK(curr);
+ MUT_LINK(curr) = MUT_NOT_LINKED;
+ }
+
+# if 0 && (defined(GCap) || defined(GCgn))
+ {
+ closq newclos;
+ extern closq ex_RBH_q;
+
+ newclos = (closq) stgMallocBytes(sizeof(struct clos), "UnlinkFromMUT");
+ CLOS_CLOSURE(newclos) = closure;
+ CLOS_PREV(newclos) = NULL;
+ CLOS_NEXT(newclos) = ex_RBH_q;
+ if (ex_RBH_q!=NULL)
+ CLOS_PREV(ex_RBH_q) = newclos;
+ ex_RBH_q = newclos;
+ }
+# endif
+}
+#endif /* PAR */
+
+#endif /* PAR || GRAN -- whole file */
+
+//@node Index, , Conversion Functions
+//@section Index
+
+//@index
+//* convertToFetchMe:: @cindex\s-+convertToFetchMe
+//* convertToRBH:: @cindex\s-+convertToRBH
+//@end index
--- /dev/null
+/* ----------------------------------------------------------------------------
+ Time-stamp: <Sat Dec 04 1999 19:29:57 Stardate: [-30]3999.06 hwloidl>
+ $Id: SysMan.c,v 1.2 2000/01/13 14:34:09 hwloidl Exp $
+
+ GUM System Manager Program
+ Handles startup, shutdown and global synchronisation of the parallel system.
+
+ The Parade/AQUA Projects, Glasgow University, 1994-1995.
+ GdH/APART Projects, Heriot-Watt University, Edinburgh, 1997-1999.
+ P. Trinder, November 30th. 1994.
+ Adapted for new RTS
+ P. Trinder, July 1997.
+ H-W. Loidl, November 1999.
+
+ ------------------------------------------------------------------------- */
+
+//@node GUM System Manager Program, , ,
+//@section GUM System Manager Program
+
+//@menu
+//* General docu::
+//* Includes::
+//* Macros etc::
+//* Variables::
+//* Main fct::
+//* Auxiliary fcts::
+//* Index::
+//@end menu
+
+//@node General docu, Includes, GUM System Manager Program, GUM System Manager Program
+//@subsection General docu
+
+/*
+
+The Sysman task currently controls initiation, termination, of a
+parallel Haskell program running under GUM. In the future it may
+control global GC synchronisation and statistics gathering. Based on
+K. Hammond's SysMan.lc in Graph for PVM. SysMan is unusual in that it
+is not part of the executable produced by ghc: it is a free-standing
+program that spawns PVM tasks (logical PEs) to evaluate the
+program. After initialisation it runs in parallel with the PE tasks,
+awaiting messages.
+
+OK children, buckle down for some serious weirdness, it works like this ...
+
+
+o The argument vector (argv) for SysMan has one the following 2 shapes:
+
+-------------------------------------------------------------------------------
+| SysMan path | debug flag | pvm-executable path | Num. PEs | Program Args ...|
+-------------------------------------------------------------------------------
+
+-------------------------------------------------------------------
+| SysMan path | pvm-executable path | Num. PEs | Program Args ... |
+-------------------------------------------------------------------
+
+The "pvm-executable path" is an absolute path of where PVM stashes the
+code for each PE. The arguments passed on to each PE-executable
+spawned by PVM are:
+
+-------------------------------
+| Num. PEs | Program Args ... |
+-------------------------------
+
+The arguments passed to the Main-thread PE-executable are
+
+-------------------------------------------------------------------
+| main flag | pvm-executable path | Num. PEs | Program Args ... |
+-------------------------------------------------------------------
+
+o SysMan's algorithm is as follows.
+
+o use PVM to spawn (nPE-1) PVM tasks
+o fork SysMan to create the main-thread PE. This permits the main-thread to
+read and write to stdin and stdout.
+o Barrier-synchronise waiting for all of the PE-tasks to start.
+o Broadcast the SysMan task-id, so that the main thread knows it.
+o Wait for the Main-thread PE to send it's task-id.
+o Broadcast an array of the PE task-ids to all of the PE-tasks.
+o Enter a loop awaiting incoming messages, e.g. failure, Garbage-collection,
+termination.
+
+The forked Main-thread algorithm, in SysMan, is as follows.
+
+o disconnects from PVM.
+o sets a flag in argv to indicate that it is the main thread.
+o `exec's a copy of the pvm-executable (i.e. the program being run)
+
+
+The pvm-executable run by each PE-task, is initialised as follows.
+
+o Registers with PVM, obtaining a task-id.
+o Joins the barrier synchronisation awaiting the other PEs.
+o Receives and records the task-id of SysMan, for future use.
+o If the PE is the main thread it sends its task-id to SysMan.
+o Receives and records the array of task-ids of the other PEs.
+o Begins execution.
+
+*/
+
+//@node Includes, Macros etc, General docu, GUM System Manager Program
+//@subsection Includes
+
+#include "Rts.h"
+#include "ParTypes.h"
+#include "LLC.h"
+#include "Parallel.h"
+
+//@node Macros etc, Variables, Includes, GUM System Manager Program
+//@subsection Macros etc
+
+#define NON_POSIX_SOURCE /* so says Solaris */
+
+#define checkerr(c) do { \
+ if ((c)<0) { \
+ pvm_perror("Sysman"); \
+ fprintf(stderr,"Sysman"); \
+ stg_exit(EXIT_FAILURE); \
+ } \
+ } while(0)
+
+/* SysMan is put on top of the GHC routine that does the RtsFlags handling.
+ So, we cannot use the standard macros. For the time being we use a macro
+ that is fixed at compile time.
+*/
+/* debugging enabled */
+#define IF_PAR_DEBUG(c,s) { s; }
+/* debugging disabled */
+// #define IF_PAR_DEBUG(c,s) /* nothing */
+
+//@node Variables, Main fct, Macros etc, GUM System Manager Program
+//@subsection Variables
+
+/*
+ The following definitions included so that SysMan can be linked with Low
+ Level Communications module (LLComms). They are not used in SysMan. */
+
+GlobalTaskId mytid, SysManTask;
+rtsBool IAmMainThread;
+rtsBool GlobalStopPending = rtsFalse;
+ /* Handle unexpected messages correctly */
+
+static GlobalTaskId gtids[MAX_PES];
+static GlobalTaskId sysman_id, sender_id, mainThread_id;
+static unsigned PEsTerminated = 0;
+static rtsBool Finishing = rtsFalse;
+static long PEbuffer[MAX_PES];
+nat nPEs = 0;
+
+//@node Main fct, Auxiliary fcts, Variables, GUM System Manager Program
+//@subsection Main fct
+
+//@cindex main
+main (int argc, char **argv) {
+ int rbufid;
+ int opcode, nbytes;
+ char **pargv;
+ int i, cc, spawn_flag = PvmTaskDefault;
+ char *petask, *pvmExecutable;
+ rtsPacket addr;
+
+ setbuf(stdout, NULL); // disable buffering of stdout
+ setbuf(stderr, NULL); // disable buffering of stderr
+
+ if (argc > 1) {
+ if (*argv[1] == '-') {
+ spawn_flag = PvmTaskDebug;
+ argv[1] = argv[0];
+ argv++; argc--;
+ }
+ sysman_id = pvm_mytid(); /* This must be the first PVM call */
+
+ checkerr(sysman_id);
+
+ /*
+ Get the full path and filename of the pvm executable (stashed in some
+ PVM directory), and the number of PEs from the command line.
+ */
+ pvmExecutable = argv[1];
+ nPEs = atoi(argv[2]);
+
+ if ((petask = getenv(PETASK)) == NULL) // PETASK set by driver
+ petask = PETASK;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] nPEs (%s) = %d\n",
+ sysman_id, petask, nPEs));
+
+ /* Check that we can create the number of PE and IMU tasks requested */
+ if (nPEs > MAX_PES) {
+ fprintf(stderr,"SysMan: No more than %d PEs allowed (%d requested)\n",
+ MAX_PES, nPEs);
+ stg_exit(EXIT_FAILURE);
+ }
+ /*
+ Now create the PE Tasks. We spawn (nPEs-1) pvm threads: the Main Thread
+ (which starts execution and performs IO) is created by forking SysMan
+ */
+ nPEs--;
+ if (nPEs > 0) {
+ /* Initialise the PE task arguments from Sysman's arguments */
+ pargv = argv + 2;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "== [%x] Spawning %d PEs(%s) ...\n",
+ sysman_id, nPEs, petask);
+ fprintf(stderr, " args: ");
+ for (i = 0; pargv[i]; ++i)
+ fprintf(stderr, "%s, ", pargv[i]);
+ fprintf(stderr, "\n"));
+
+ checkerr(pvm_spawn(petask, pargv, spawn_flag, "", nPEs, gtids));
+ /*
+ * Stash the task-ids of the PEs away in a buffer, once we know
+ * the Main Thread's task-id, we'll broadcast them all.
+ */
+ for (i = 0; i < nPEs; i++)
+ PEbuffer[i+1] = (long) gtids[i];
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] Spawned\n", sysman_id));
+ }
+
+ /*
+ Create the MainThread PE by forking SysMan. This arcane coding
+ is required to allow MainThread to read stdin and write to stdout.
+ PWT 18/1/96
+ */
+ nPEs++; /* Record that the number of PEs is increasing */
+ if ((cc = fork())) {
+ checkerr(cc); /* Parent continues as SysMan */
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] SysMan Task is [t%x]\n", sysman_id));
+
+ /*
+ SysMan joins PECTLGROUP, so that it can wait (at the
+ barrier sysnchronisation a few instructions later) for the
+ other PE-tasks to start.
+
+ The manager group (MGRGROUP) is vestigial at the moment. It
+ may eventually include a statistics manager, and a (global)
+ garbage collector manager.
+ */
+ checkerr(pvm_joingroup(PECTLGROUP));
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] Joined PECTLGROUP \n", sysman_id));
+
+ /* Wait for all the PEs to arrive */
+ checkerr(pvm_barrier(PECTLGROUP, nPEs + 1));
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] PECTLGROUP barrier passed \n",
+ sysman_id));
+
+ /* Broadcast SysMan's ID, so Main Thread PE knows it */
+ pvm_initsend(PvmDataDefault);
+ pvm_bcast(PEGROUP, PP_SYSMAN_TID);
+
+ /* Wait for Main Thread to identify itself*/
+ addr = waitForPEOp(PP_MAIN_TASK, ANY_GLOBAL_TASK);
+ pvm_bufinfo(addr, &nbytes, &opcode, &mainThread_id);
+ PEbuffer[0] = mainThread_id;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] SysMan received Main Task = %x\n",
+ sysman_id, mainThread_id));
+
+ /* Now that we have them all, broadcast Global Task Ids of all PEs */
+ pvm_initsend(PvmDataDefault);
+ PutArgs(PEbuffer, nPEs);
+ pvm_bcast(PEGROUP, PP_PETIDS);
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] Sysman successfully initialized!\n",
+ sysman_id));
+
+//@cindex message handling loop
+ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+ /* Main message handling loop */
+ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+ /* Process incoming messages */
+ while (1) {
+ if ((rbufid = pvm_recv(ANY_TASK, ANY_OPCODE)) < 0)
+ pvm_perror("Sysman: Receiving Message");
+ else {
+ pvm_bufinfo(rbufid, &nbytes, &opcode, &sender_id);
+
+ /*
+ IF_PAR_DEBUG(trace,
+ fprintf(stderr,"== [%x] SysMan: Message received by SysMan: rbufid=%x, nbytes = %d, opcode = %x, sender_id = %x\n",
+ sysman_id, rbufid, nbytes, opcode, sender_id));
+ */
+ switch (opcode) {
+ case PP_GC_INIT:
+ /* This Function not yet implemented for GUM */
+ fprintf(stderr,"Global GC from %x Not yet implemented for GUM!\n",
+ sender_id);
+ sync(PECTLGROUP, PP_FULL_SYSTEM);
+ broadcast(PEGROUP, PP_GC_INIT);
+ /* DoGlobalGC(); */
+ /* broadcast(PEGROUP, PP_INIT); */
+ break;
+
+ case PP_STATS_ON:
+ fprintf(stderr,"PP_STATS_ON (from %x) not yet implemented for GUM!\n",
+ sender_id);
+ break;
+
+ case PP_STATS_OFF:
+ fprintf(stderr,"PP_STATS_OFF (from %x) not yet implemented for GUM!\n",
+ sender_id);
+ break;
+
+ case PP_FINISH:
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] Finish from %x\n",
+ sysman_id, sender_id));
+ if (!Finishing) {
+ Finishing = rtsTrue;
+ PEsTerminated = 1;
+ pvm_initsend(PvmDataDefault);
+ pvm_bcast(PEGROUP, PP_FINISH);
+ } else {
+ ++PEsTerminated;
+ }
+ if (PEsTerminated >= nPEs) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] Global Shutdown, Goodbye!! (SysMan has received FINISHes from all PEs)\n",
+ sysman_id));
+ broadcast(PEGROUP, PP_FINISH);
+ broadcast(MGRGROUP, PP_FINISH);
+ pvm_lvgroup(PECTLGROUP);
+ pvm_lvgroup(MGRGROUP);
+ pvm_exit();
+ exit(EXIT_SUCCESS);
+ /* Qapla'! */
+ }
+ break;
+
+ case PP_FAIL:
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] Fail from %x\n",
+ sysman_id, sender_id));
+ if (!Finishing) {
+ Finishing = rtsTrue;
+ broadcast(PEGROUP, PP_FAIL);
+ }
+ break;
+
+ default:
+ {
+ /*
+ char *opname = GetOpName(opcode);
+ fprintf(stderr,"Sysman: Unrecognised opcode %s (%x)\n",
+ opname,opcode); */
+ fprintf(stderr,"Qagh: Sysman: Unrecognised opcode (%x)\n",
+ opcode);
+ }
+ break;
+ } /* switch */
+ } /* else */
+ } /* while 1 */
+ } /* forked Sysman Process */
+ else {
+ fprintf(stderr, "Main Thread PE has been forked; doing an execv(%s,...)\n",
+ pvmExecutable);
+ pvmendtask(); /* Disconnect from PVM to avoid confusion: */
+ /* executable reconnects */
+ *argv[0] = '-'; /* Flag that this is the Main Thread PE */
+ execv(pvmExecutable,argv); /* Parent task becomes Main Thread PE */
+ }
+ } /* argc > 1 */
+} /* main */
+
+//@node Auxiliary fcts, Index, Main fct, GUM System Manager Program
+//@subsection Auxiliary fcts
+
+/*
+ * This reproduced from RtsUtlis to save linking with a whole ball of wax
+ */
+/* result-checking malloc wrappers. */
+
+//@cindex stgMallocBytes
+
+void *
+stgMallocBytes (int n, char *msg)
+{
+ char *space;
+
+ if ((space = (char *) malloc((size_t) n)) == NULL) {
+ fflush(stdout);
+ fprintf(stderr, msg);
+ // MallocFailHook((W_) n, msg); /*msg*/
+ stg_exit(EXIT_FAILURE);
+ }
+ return space;
+}
+
+/* Needed here because its used in loads of places like LLComms etc */
+
+//@cindex stg_exit
+
+void stg_exit(n)
+I_ n;
+{
+ exit(n);
+}
+
+//@node Index, , Auxiliary fcts, GUM System Manager Program
+//@subsection Index
+
+//@index
+//* main:: @cindex\s-+main
+//* message handling loop:: @cindex\s-+message handling loop
+//* stgMallocBytes:: @cindex\s-+stgMallocBytes
+//* stg_exit:: @cindex\s-+stg_exit
+//@end index