From: hwloidl Date: Thu, 13 Jan 2000 14:34:09 +0000 (+0000) Subject: [project @ 2000-01-13 14:33:57 by hwloidl] X-Git-Tag: Approximately_9120_patches~5299 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=1b28d4e1f43185ad8c8e7407c66413e1b358402b;p=ghc-hetmet.git [project @ 2000-01-13 14:33:57 by hwloidl] Merged GUM-4-04 branch into the main trunk. In particular merged GUM and SMP code. Most of the GranSim code in GUM-4-04 still has to be carried over. --- diff --git a/ghc/compiler/absCSyn/Costs.lhs b/ghc/compiler/absCSyn/Costs.lhs index 24563c7..7bbadff 100644 --- a/ghc/compiler/absCSyn/Costs.lhs +++ b/ghc/compiler/absCSyn/Costs.lhs @@ -1,7 +1,9 @@ % -% (c) The GRASP/AQUA Project, Glasgow University, 1994-1998 -% Hans Wolfgang Loidl +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % +% $Id: Costs.lhs,v 1.20 2000/01/13 14:33:57 hwloidl Exp $ +% +% Only needed in a GranSim setup -- HWL % --------------------------------------------------------------------------- \section[Costs]{Evaluating the costs of computing some abstract C code} @@ -28,9 +30,11 @@ The meaning of the result tuple is: instructions. \end{itemize} -This function is needed in GrAnSim for parallelism. +This function is needed in GranSim for costing pieces of abstract C. -These are first suggestions for scaling the costs. But, this scaling should be done in the RTS rather than the compiler (this really should be tunable!): +These are first suggestions for scaling the costs. But, this scaling should +be done in the RTS rather than the compiler (this really should be +tunable!): \begin{pseudocode} @@ -82,6 +86,7 @@ instance Num CostRes where negate = mapOp negate abs = mapOp abs signum = mapOp signum + fromInteger _ = error "fromInteger not defined" mapOp :: (Int -> Int) -> CostRes -> CostRes mapOp g ( Cost (i, b, l, s, f) ) = Cost (g i, g b, g l, g s, g f) @@ -202,7 +207,10 @@ costs absC = CSimultaneous absC -> costs absC - CCheck _ amodes code -> Cost (2, 1, 0, 0, 0) + CCheck _ amodes code -> Cost (2, 1, 0, 0, 0) -- ToDo: refine this by + -- looking at the first arg + + CRetDirect _ _ _ _ -> nullCosts CMacroStmt macro modes -> stmtMacroCosts macro modes @@ -215,19 +223,28 @@ costs absC = -- *** the next three [or so...] are DATA (those above are CODE) *** -- as they are data rather than code they all have nullCosts -- HWL + CCallTypedef _ _ _ _ -> nullCosts + CStaticClosure _ _ _ _ -> nullCosts - CClosureInfoAndCode _ _ _ _ -> nullCosts + CSRT _ _ -> nullCosts - CRetDirect _ _ _ _ -> nullCosts + CBitmap _ _ -> nullCosts + + CClosureInfoAndCode _ _ _ _ -> nullCosts CRetVector _ _ _ _ -> nullCosts + CClosureTbl _ -> nullCosts + CCostCentreDecl _ _ -> nullCosts + CCostCentreStackDecl _ -> nullCosts CSplitMarker -> nullCosts + _ -> trace ("Costs.costs") nullCosts + -- --------------------------------------------------------------------------- addrModeCosts :: CAddrMode -> Side -> CostRes @@ -242,7 +259,11 @@ addrModeCosts addr_mode side = CVal _ _ -> if lhs then Cost (0, 0, 0, 1, 0) else Cost (0, 0, 1, 0, 0) - CReg _ -> nullCosts {- loading from, storing to reg is free ! -} + CAddr (CIndex _ n _ ) -> Cost (1, 0, 1, 0, 0) -- does pointer arithmetic + + CAddr _ -> nullCosts + + CReg _ -> nullCosts {- loading from, storing to reg is free ! -} {- for costing CReg->Creg ops see special -} {- case in costs fct -} @@ -277,6 +298,8 @@ addrModeCosts addr_mode side = CMacroExpr _ macro mode_list -> exprMacroCosts side macro mode_list + _ -> trace ("Costs.addrModeCosts") nullCosts + -- --------------------------------------------------------------------------- exprMacroCosts :: Side -> CExprMacro -> [CAddrMode] -> CostRes @@ -288,10 +311,11 @@ exprMacroCosts side macro mode_list = in arg_costs + case macro of - ENTRY_CODE -> nullCosts - ARG_TAG -> nullCosts -- XXX - GET_TAG -> nullCosts -- XXX - + ENTRY_CODE -> nullCosts -- nothing + ARG_TAG -> nullCosts -- nothing + GET_TAG -> Cost (0, 0, 1, 0, 0) -- indirect load + UPD_FRAME_UPDATEE -> Cost (0, 0, 1, 0, 0) -- indirect load + _ -> trace ("Costs.exprMacroCosts") nullCosts -- --------------------------------------------------------------------------- @@ -309,7 +333,9 @@ stmtMacroCosts macro modes = UPD_CAF -> Cost (7, 0, 1, 3, 0) {- SMupdate.lh -} UPD_BH_UPDATABLE -> Cost (3, 0, 0, 1, 0) {- SMupdate.lh -} UPD_BH_SINGLE_ENTRY -> Cost (3, 0, 0, 1, 0) {- SMupdate.lh -} - PUSH_UPD_FRAME -> Cost (3, 0, 0, 4, 0) {- SMupdate.lh -} + PUSH_UPD_FRAME -> Cost (3, 0, 0, 4, 0) {- Updates.h -} + PUSH_SEQ_FRAME -> Cost (2, 0, 0, 3, 0) {- StgMacros.h !-} + UPDATE_SU_FROM_UPD_FRAME -> Cost (1, 0, 1, 0, 0) {- StgMacros.h !-} SET_TAG -> nullCosts {- COptRegs.lh -} GRAN_FETCH -> nullCosts {- GrAnSim bookkeeping -} GRAN_RESCHEDULE -> nullCosts {- GrAnSim bookkeeping -} diff --git a/ghc/compiler/basicTypes/Module.lhs b/ghc/compiler/basicTypes/Module.lhs index 40c25f5..af634fd 100644 --- a/ghc/compiler/basicTypes/Module.lhs +++ b/ghc/compiler/basicTypes/Module.lhs @@ -285,7 +285,8 @@ getAllFilesMatching :: SearchPath -> (ModuleHiMap, ModuleHiMap) -> (FilePath, String) -> IO (ModuleHiMap, ModuleHiMap) -getAllFilesMatching dirs hims (dir_path, suffix) = ( do +getAllFilesMatching dirs hims (dir_path, suffix) = + do -- fpaths entries do not have dir_path prepended fpaths <- getDirectoryContents dir_path is_dll <- catch @@ -297,7 +298,7 @@ getAllFilesMatching dirs hims (dir_path, suffix) = ( do ) (\ _ {-don't care-} -> return NotDll) return (foldl (addModules is_dll) hims fpaths) - ) -- soft failure + -- soft failure `catch` (\ err -> do hPutStrLn stderr diff --git a/ghc/compiler/codeGen/CgCase.lhs b/ghc/compiler/codeGen/CgCase.lhs index 4e755ca..e358b9b 100644 --- a/ghc/compiler/codeGen/CgCase.lhs +++ b/ghc/compiler/codeGen/CgCase.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgCase.lhs,v 1.36 1999/11/01 17:10:06 simonpj Exp $ +% $Id: CgCase.lhs,v 1.37 2000/01/13 14:33:57 hwloidl Exp $ % %******************************************************** %* * @@ -602,9 +602,10 @@ cgAlgDefault gc_flag is_fun uniq cc_slot must_label_branch = -- We have arranged that Node points to the thing restoreCurrentCostCentre cc_slot `thenFC` \restore_cc -> getAbsC (absC restore_cc `thenC` - (if opt_GranMacros && emit_yield - then yield [node] False - else absC AbsCNop) `thenC` + -- HWL: maybe need yield here + --(if emit_yield + -- then yield [node] True + -- else absC AbsCNop) `thenC` possibleHeapCheck gc_flag is_fun [node] [] Nothing (cgExpr rhs) -- Node is live, but doesn't need to point at the thing itself; -- it's ok for Node to point to an indirection or FETCH_ME @@ -633,9 +634,10 @@ cgAlgAlt gc_flag uniq cc_slot must_label_branch = restoreCurrentCostCentre cc_slot `thenFC` \restore_cc -> getAbsC (absC restore_cc `thenC` - (if opt_GranMacros && emit_yield - then yield [node] True -- XXX live regs wrong - else absC AbsCNop) `thenC` + -- HWL: maybe need yield here + -- (if emit_yield + -- then yield [node] True -- XXX live regs wrong + -- else absC AbsCNop) `thenC` (case gc_flag of NoGC -> mapFCs bindNewToTemp args `thenFC` \_ -> nopC GCMayHappen -> bindConArgs con args @@ -667,9 +669,10 @@ cgUnboxedTupleAlt lbl cc_slot emit_yield (con,args,use_mask,rhs) restoreCurrentCostCentre cc_slot `thenFC` \restore_cc -> absC restore_cc `thenC` - (if opt_GranMacros && emit_yield - then yield live_regs True -- XXX live regs wrong? - else absC AbsCNop) `thenC` + -- HWL: maybe need yield here + -- (if emit_yield + -- then yield live_regs True -- XXX live regs wrong? + -- else absC AbsCNop) `thenC` let -- ToDo: could maybe use Nothing here if stack_res is False -- since the heap-check can just return to the top of the diff --git a/ghc/compiler/codeGen/CgClosure.lhs b/ghc/compiler/codeGen/CgClosure.lhs index c40320c..1b80bea 100644 --- a/ghc/compiler/codeGen/CgClosure.lhs +++ b/ghc/compiler/codeGen/CgClosure.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgClosure.lhs,v 1.38 1999/11/11 17:50:49 simonpj Exp $ +% $Id: CgClosure.lhs,v 1.39 2000/01/13 14:33:58 hwloidl Exp $ % \section[CgClosure]{Code generation for closures} @@ -40,7 +40,8 @@ import CgUsages ( adjustSpAndHp, setRealAndVirtualSp, getVirtSp, getSpRelOffset, getHpRelOffset ) import CLabel ( CLabel, mkClosureLabel, mkFastEntryLabel, - mkRednCountsLabel, mkInfoTableLabel + mkRednCountsLabel, mkInfoTableLabel, + pprCLabel ) import ClosureInfo -- lots and lots of stuff import CmdLineOpts ( opt_GranMacros, opt_SccProfilingOn, opt_DoTickyProfiling ) @@ -325,7 +326,12 @@ closureCodeBody binder_info closure_info cc all_args body -- arg_regs = case entry_conv of DirectEntry lbl arity regs -> regs - other -> panic "closureCodeBody:arg_regs" + other -> trace ("*** closureCodeBody:arg_regs " ++ (pprHWL entry_conv) ++ "(HWL ignored; no args passed in regs)") [] + + pprHWL :: EntryConvention -> String + pprHWL (ViaNode) = "ViaNode" + pprHWL (StdEntry cl) = "StdEntry" + pprHWL (DirectEntry cl i l) = "DirectEntry" num_arg_regs = length arg_regs @@ -350,7 +356,7 @@ closureCodeBody binder_info closure_info cc all_args body mapCs bindNewToStack arg_offsets `thenC` setRealAndVirtualSp sp_all_args `thenC` - argSatisfactionCheck closure_info `thenC` + argSatisfactionCheck closure_info arg_regs `thenC` -- OK, so there are enough args. Now we need to stuff as -- many of them in registers as the fast-entry code @@ -516,24 +522,24 @@ relative offset of this word tells how many words of arguments are expected. \begin{code} -argSatisfactionCheck :: ClosureInfo -> Code +argSatisfactionCheck :: ClosureInfo -> [MagicId] {-GRAN-} -> Code -argSatisfactionCheck closure_info +argSatisfactionCheck closure_info arg_regs = nodeMustPointToIt (closureLFInfo closure_info) `thenFC` \ node_points -> - let - emit_gran_macros = opt_GranMacros - in +-- let +-- emit_gran_macros = opt_GranMacros +-- in -- HWL ngo' ngoq: -- absC (CMacroStmt GRAN_FETCH []) `thenC` -- forceHeapCheck [] node_points (absC AbsCNop) `thenC` - (if emit_gran_macros - then if node_points - then fetchAndReschedule [] node_points - else yield [] node_points - else absC AbsCNop) `thenC` + --(if opt_GranMacros + -- then if node_points + -- then fetchAndReschedule arg_regs node_points + -- else yield arg_regs node_points + -- else absC AbsCNop) `thenC` getSpRelOffset 0 `thenFC` \ (SpRel sp) -> let @@ -565,16 +571,13 @@ thunkWrapper closure_info lbl thunk_code = -- Stack and heap overflow checks nodeMustPointToIt (closureLFInfo closure_info) `thenFC` \ node_points -> - let - emit_gran_macros = opt_GranMacros - in - -- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node - -- (we prefer fetchAndReschedule-style context switches to yield ones) - (if emit_gran_macros - then if node_points - then fetchAndReschedule [] node_points - else yield [] node_points - else absC AbsCNop) `thenC` + -- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node + -- (we prefer fetchAndReschedule-style context switches to yield ones) + (if opt_GranMacros + then if node_points + then fetchAndReschedule [] node_points + else yield [] node_points + else absC AbsCNop) `thenC` -- stack and/or heap checks thunkChecks lbl node_points ( @@ -597,13 +600,10 @@ funWrapper :: ClosureInfo -- Closure whose code body this is funWrapper closure_info arg_regs stk_tags info_label fun_body = -- Stack overflow check nodeMustPointToIt (closureLFInfo closure_info) `thenFC` \ node_points -> - let - emit_gran_macros = opt_GranMacros - in -- HWL chu' ngoq: - (if emit_gran_macros - then yield arg_regs node_points - else absC AbsCNop) `thenC` + (if opt_GranMacros + then yield arg_regs node_points + else absC AbsCNop) `thenC` -- heap and/or stack checks fastEntryChecks arg_regs stk_tags info_label node_points ( diff --git a/ghc/compiler/codeGen/CgHeapery.lhs b/ghc/compiler/codeGen/CgHeapery.lhs index a4f6bc2..566cfcb 100644 --- a/ghc/compiler/codeGen/CgHeapery.lhs +++ b/ghc/compiler/codeGen/CgHeapery.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgHeapery.lhs,v 1.19 1999/10/13 16:39:15 simonmar Exp $ +% $Id: CgHeapery.lhs,v 1.20 2000/01/13 14:33:58 hwloidl Exp $ % \section[CgHeapery]{Heap management functions} @@ -32,7 +32,7 @@ import ClosureInfo ( closureSize, closureGoodStuffSize, ) import PrimRep ( PrimRep(..), isFollowableRep ) import Unique ( Unique ) -import CmdLineOpts ( opt_SccProfilingOn ) +import CmdLineOpts ( opt_SccProfilingOn, opt_GranMacros ) import GlaExts import Outputable @@ -78,6 +78,10 @@ fastEntryChecks regs tags ret node_points code getTickyCtrLabel `thenFC` \ ticky_ctr -> ( if all_pointers then -- heap checks are quite easy + -- HWL: gran-yield immediately before heap check proper + --(if node `elem` regs + -- then yield regs True + -- else absC AbsCNop ) `thenC` absC (checking_code stk_words hp_words tag_assts free_reg (length regs) ticky_ctr) @@ -382,22 +386,22 @@ mkRegLiveness (VanillaReg rep i : regs) | isFollowableRep rep = ((int2Word# 1#) `shiftL#` (i -# 1#)) `or#` mkRegLiveness regs mkRegLiveness (_ : regs) = mkRegLiveness regs +-- The two functions below are only used in a GranSim setup -- Emit macro for simulating a fetch and then reschedule fetchAndReschedule :: [MagicId] -- Live registers -> Bool -- Node reqd? -> Code -fetchAndReschedule regs node_reqd = +fetchAndReschedule regs node_reqd = if (node `elem` regs || node_reqd) then fetch_code `thenC` reschedule_code else absC AbsCNop where all_regs = if node_reqd then node:regs else regs - liveness_mask = 0 {-XXX: mkLiveRegsMask all_regs-} - + liveness_mask = mkRegLiveness regs reschedule_code = absC (CMacroStmt GRAN_RESCHEDULE [ - mkIntCLit liveness_mask, + mkIntCLit (IBOX(word2Int# liveness_mask)), mkIntCLit (if node_reqd then 1 else 0)]) --HWL: generate GRAN_FETCH macro for GrAnSim @@ -423,15 +427,16 @@ yield :: [MagicId] -- Live registers -> Bool -- Node reqd? -> Code -yield regs node_reqd = - -- NB: node is not alive; that's why we use DO_YIELD rather than - -- GRAN_RESCHEDULE - yield_code - where - all_regs = if node_reqd then node:regs else regs - liveness_mask = 0 {-XXX: mkLiveRegsMask all_regs-} - - yield_code = absC (CMacroStmt GRAN_YIELD [mkIntCLit liveness_mask]) +yield regs node_reqd = + if opt_GranMacros && node_reqd + then yield_code + else absC AbsCNop + where + -- all_regs = if node_reqd then node:regs else regs + liveness_mask = mkRegLiveness regs + yield_code = + absC (CMacroStmt GRAN_YIELD + [mkIntCLit (IBOX(word2Int# liveness_mask))]) \end{code} %************************************************************************ diff --git a/ghc/compiler/codeGen/CgStackery.lhs b/ghc/compiler/codeGen/CgStackery.lhs index d97476e..33a873a 100644 --- a/ghc/compiler/codeGen/CgStackery.lhs +++ b/ghc/compiler/codeGen/CgStackery.lhs @@ -1,7 +1,7 @@ % % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 % -% $Id: CgStackery.lhs,v 1.12 1999/06/24 13:04:20 simonmar Exp $ +% $Id: CgStackery.lhs,v 1.13 2000/01/13 14:33:58 hwloidl Exp $ % \section[CgStackery]{Stack management functions} @@ -25,9 +25,10 @@ import AbsCSyn import CgUsages ( getRealSp ) import AbsCUtils ( mkAbstractCs, mkAbsCStmts, getAmodeRep ) import PrimRep ( getPrimRepSize, PrimRep(..), isFollowableRep ) -import CmdLineOpts ( opt_SccProfilingOn ) +import CmdLineOpts ( opt_SccProfilingOn, opt_GranMacros ) import Panic ( panic ) -import Constants ( uF_SIZE, sCC_UF_SIZE, sEQ_FRAME_SIZE, sCC_SEQ_FRAME_SIZE ) +import Constants ( uF_SIZE, sCC_UF_SIZE, gRAN_UF_SIZE, + sEQ_FRAME_SIZE, sCC_SEQ_FRAME_SIZE, gRAN_SEQ_FRAME_SIZE ) import IOExts ( trace ) \end{code} @@ -224,11 +225,13 @@ getFinalStackHW fcode info_down (MkCgState absC binds usages) = state1 \end{code} \begin{code} -updateFrameSize | opt_SccProfilingOn = sCC_UF_SIZE - | otherwise = uF_SIZE +updateFrameSize | opt_SccProfilingOn = trace ("updateFrameSize = " ++ (show sCC_UF_SIZE)) sCC_UF_SIZE + | opt_GranMacros = trace ("updateFrameSize = " ++ (show gRAN_UF_SIZE))gRAN_UF_SIZE + | otherwise = trace ("updateFrameSize = " ++ (show uF_SIZE)) uF_SIZE seqFrameSize | opt_SccProfilingOn = sCC_SEQ_FRAME_SIZE - | otherwise = sEQ_FRAME_SIZE + | opt_GranMacros = gRAN_SEQ_FRAME_SIZE + | otherwise = sEQ_FRAME_SIZE \end{code} %************************************************************************ diff --git a/ghc/compiler/main/Constants.lhs b/ghc/compiler/main/Constants.lhs index ae358e2..14f4667 100644 --- a/ghc/compiler/main/Constants.lhs +++ b/ghc/compiler/main/Constants.lhs @@ -34,6 +34,7 @@ module Constants ( uF_SIZE, sCC_UF_SIZE, + gRAN_UF_SIZE, -- HWL uF_RET, uF_SU, uF_UPDATEE, @@ -41,6 +42,7 @@ module Constants ( sEQ_FRAME_SIZE, sCC_SEQ_FRAME_SIZE, + gRAN_SEQ_FRAME_SIZE, -- HWL mAX_Vanilla_REG, mAX_Float_REG, @@ -157,6 +159,9 @@ uF_SIZE = (NOSCC_UF_SIZE::Int) -- Same again, with profiling sCC_UF_SIZE = (SCC_UF_SIZE::Int) +-- Same again, with gransim +gRAN_UF_SIZE = (GRAN_UF_SIZE::Int) + -- Offsets in an update frame. They don't change with profiling! uF_RET = (UF_RET::Int) uF_SU = (UF_SU::Int) @@ -169,6 +174,7 @@ Seq frame sizes. \begin{code} sEQ_FRAME_SIZE = (NOSCC_SEQ_FRAME_SIZE::Int) sCC_SEQ_FRAME_SIZE = (SCC_SEQ_FRAME_SIZE::Int) +gRAN_SEQ_FRAME_SIZE = (GRAN_SEQ_FRAME_SIZE::Int) \end{code} \begin{code} diff --git a/ghc/compiler/nativeGen/StixMacro.lhs b/ghc/compiler/nativeGen/StixMacro.lhs index 747759e..5ff2ea1 100644 --- a/ghc/compiler/nativeGen/StixMacro.lhs +++ b/ghc/compiler/nativeGen/StixMacro.lhs @@ -122,6 +122,7 @@ macroCode PUSH_UPD_FRAME args frame n = StInd PtrRep (StIndex PtrRep stgSp (StInt (toInteger (n-uF_SIZE)))) + -- HWL: these values are *wrong* in a GranSim setup; ToDo: fix a1 = StAssign PtrRep (frame uF_RET) upd_frame_info a3 = StAssign PtrRep (frame uF_SU) stgSu a4 = StAssign PtrRep (frame uF_UPDATEE) bhptr diff --git a/ghc/driver/ghc-asm.lprl b/ghc/driver/ghc-asm.lprl index 590b3a1..d5adb3f 100644 --- a/ghc/driver/ghc-asm.lprl +++ b/ghc/driver/ghc-asm.lprl @@ -687,6 +687,9 @@ sub mangle_asm { print STDERR "$Pgm: unknown prologue mangling? $TargetPlatform\n"; } + # HWL HACK: dont die, just print a warning + #print stderr "HWL: this should die! Prologue junk?: $p\n" if $p =~ /^\t[^\.]/ + # && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test die "Prologue junk?: $p\n" if $p =~ /^\t[^\.]/ && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test @@ -727,6 +730,9 @@ sub mangle_asm { } else { print STDERR "$Pgm: unknown epilogue mangling? $TargetPlatform\n"; } + # HWL HACK: dont die, just print a warning + #print stderr "HWL: this should die! Epilogue junk?: $e\n" if $e =~ /^\t[^\.]/ + # && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test die "Epilogue junk?: $e\n" if $e =~ /^\t[^\.]/ && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test diff --git a/ghc/driver/ghc.lprl b/ghc/driver/ghc.lprl index dca6d70..bb80a14 100644 --- a/ghc/driver/ghc.lprl +++ b/ghc/driver/ghc.lprl @@ -1911,6 +1911,7 @@ eval 'exec perl -S \$0 \${1+"\$@"}' # =!=!=!=!=!=!=!=!=!=!=! # This script is automatically generated: DO NOT EDIT!!! # Generated by Glasgow Haskell, version ${ProjectVersion} +# ngoqvam choHbogh vaj' vIHoHnISbej !!!! # \$pvm_executable = '$pvm_executable'; \$pvm_executable_base = '$pvm_executable_base'; @@ -1942,7 +1943,9 @@ args: while ($a = shift(@ARGV)) { } if ( $a eq '-d' && $in_RTS_args ) { $debug = '-'; - } elsif ( $a =~ /^-N(\d+)/ && $in_RTS_args ) { + } elsif ( $a =~ /^-qN(\d+)/ && $in_RTS_args ) { + $nprocessors = $1; + } elsif ( $a =~ /^-qp(\d+)/ && $in_RTS_args ) { $nprocessors = $1; } else { push(@nonPVM_args, $a); @@ -2817,9 +2820,24 @@ sub saveIntermediate { local ($final,$suffix,$tmp)= @_ ; local ($to_do); + local ($new_suffix); + # $final -- root of where to park ${final}.${suffix} # $tmp -- temporary file where hsc put the intermediate file. + # HWL: use -odir for .hc and .s files, too + if ( $Specific_output_dir ne '' ) { + $final = "${Specific_output_dir}/${final}"; + } + # HWL: use the same suffix as for $Osuffix in generating intermediate file, + # replacing o with hc or s, respectively. + if ( $Osuffix ne '' ) { + ($new_suffix = $Osuffix) =~ s/o$/hc/ if $suffix eq "hc"; + ($new_suffix = $Osuffix) =~ s/o$/s/ if $suffix eq "s"; + $suffix = $new_suffix; + print stderr "HWL says: suffix for intermediate file is $suffix; ${final}.${suffix} overall\n" if $Verbose; + } + # Delete the old file $to_do = "$Rm ${final}.${suffix}"; &run_something($to_do, "Removing old .${suffix} file"); diff --git a/ghc/driver/test_mangler b/ghc/driver/test_mangler index f24f0e4..96cf31c 100644 --- a/ghc/driver/test_mangler +++ b/ghc/driver/test_mangler @@ -1,7 +1,9 @@ -#! /usr/local/bin/perl +#! /usr/bin/perl # a simple wrapper to test a .s-file mangler # reads stdin, writes stdout +push(@INC,"/net/dazdak/BUILDS/gransim-4.04/i386-unknown-linux/ghc/driver"); + $TargetPlatform = $ARGV[0]; shift; # nice error checking, Will require("ghc-asm.prl") || die "require mangler failed!\n"; diff --git a/ghc/includes/CCall.h b/ghc/includes/CCall.h index 97ff9df..3040c17 100644 --- a/ghc/includes/CCall.h +++ b/ghc/includes/CCall.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: CCall.h,v 1.3 1999/02/05 16:02:19 simonm Exp $ + * $Id: CCall.h,v 1.4 2000/01/13 14:34:00 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -55,6 +55,9 @@ #define STGCALL5(f,a,b,c,d,e) \ CALLER_SAVE_ALL (void) f(a,b,c,d,e); CALLER_RESTORE_ALL +#define STGCALL6(f,a,b,c,d,e,z) \ + CALLER_SAVE_ALL (void) f(a,b,c,d,e,z); CALLER_RESTORE_ALL + #define RET_STGCALL0(t,f) \ ({ t _r; CALLER_SAVE_ALL _r = f(); CALLER_RESTORE_ALL; _r; }) @@ -74,6 +77,9 @@ #define RET_STGCALL5(t,f,a,b,c,d,e) \ ({ t _r; CALLER_SAVE_ALL _r = f(a,b,c,d,e); CALLER_RESTORE_ALL; _r; }) +#define RET_STGCALL6(t,f,a,b,c,d,e,z) \ + ({ t _r; CALLER_SAVE_ALL _r = f(a,b,c,d,e,z); CALLER_RESTORE_ALL; _r; }) + /* * A PRIM_STGCALL is used when we have arranged to save the R, @@ -101,6 +107,9 @@ #define PRIM_STGCALL5(f,a,b,c,d,e) \ CALLER_SAVE_SYSTEM (void) f(a,b,c,d,e); CALLER_RESTORE_SYSTEM +#define PRIM_STGCALL6(f,a,b,c,d,e,z) \ + CALLER_SAVE_SYSTEM (void) f(a,b,c,d,e,z); CALLER_RESTORE_SYSTEM + #define RET_PRIM_STGCALL0(t,f) \ ({ t _r; CALLER_SAVE_SYSTEM _r = f(); CALLER_RESTORE_SYSTEM; _r; }) @@ -120,6 +129,9 @@ #define RET_PRIM_STGCALL5(t,f,a,b,c,d,e) \ ({ t _r; CALLER_SAVE_SYSTEM _r = f(a,b,c,d,e); CALLER_RESTORE_SYSTEM; _r; }) +#define RET_PRIM_STGCALL6(t,f,a,b,c,d,e,z) \ + ({ t _r; CALLER_SAVE_SYSTEM _r = f(a,b,c,d,e,z); CALLER_RESTORE_SYSTEM; _r; }) + /* ToDo: ccalls that might garbage collect - do we need to return to * the scheduler to perform these? Similarly, ccalls that might want * to call Haskell right back, or start a new thread or something. diff --git a/ghc/includes/ClosureTypes.h b/ghc/includes/ClosureTypes.h index de58fac..e1a9f2c 100644 --- a/ghc/includes/ClosureTypes.h +++ b/ghc/includes/ClosureTypes.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: ClosureTypes.h,v 1.11 1999/05/11 16:47:40 keithw Exp $ + * $Id: ClosureTypes.h,v 1.12 2000/01/13 14:34:00 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -72,10 +72,15 @@ #define WEAK 56 #define FOREIGN 57 #define STABLE_NAME 58 + #define TSO 59 #define BLOCKED_FETCH 60 #define FETCH_ME 61 -#define EVACUATED 62 -#define N_CLOSURE_TYPES 63 +#define FETCH_ME_BQ 62 +#define RBH 63 + +#define EVACUATED 64 + +#define N_CLOSURE_TYPES 65 #endif CLOSURETYPES_H diff --git a/ghc/includes/Closures.h b/ghc/includes/Closures.h index 3ed2809..1de91ef 100644 --- a/ghc/includes/Closures.h +++ b/ghc/includes/Closures.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: Closures.h,v 1.14 1999/12/01 14:34:48 simonmar Exp $ + * $Id: Closures.h,v 1.15 2000/01/13 14:34:00 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -37,21 +37,39 @@ typedef struct { The parallel header -------------------------------------------------------------------------- */ -#ifdef GRAN +#ifdef PAR typedef struct { - W_ procs; -} StgGranHeader; + /* StgWord ga; */ /* nope! global addresses are managed via a hash table */ +} StgParHeader; #else /* !PAR */ typedef struct { /* empty */ -} StgGranHeader; +} StgParHeader; #endif /* PAR */ /* ----------------------------------------------------------------------------- + The GranSim header + -------------------------------------------------------------------------- */ + +#if defined(GRAN) + +typedef struct { + StgWord procs; /* bitmask indicating on which PEs this closure resides */ +} StgGranHeader; + +#else /* !GRAN */ + +typedef struct { + /* empty */ +} StgGranHeader; + +#endif /* GRAN */ + +/* ----------------------------------------------------------------------------- The ticky-ticky header Comment from old Ticky.h: @@ -96,8 +114,11 @@ typedef struct { #ifdef PROFILING StgProfHeader prof; #endif -#ifdef GRAN - StgGranHeader par; +#ifdef PAR + StgParHeader par; +#endif +#if defined(GRAN) + StgGranHeader gran; #endif #ifdef TICKY_TICKY StgTickyHeader ticky; @@ -189,12 +210,6 @@ typedef struct StgCAF_ { typedef struct { StgHeader header; - struct StgTSO_ *blocking_queue; - StgMutClosure *mut_link; -} StgBlockingQueue; - -typedef struct { - StgHeader header; StgWord words; StgWord payload[0]; } StgArrWords; @@ -294,12 +309,71 @@ typedef struct { StgClosure* value; } StgMVar; -/* Parallel FETCH_ME closures */ -#ifdef PAR -typedef struct { +#if defined(PAR) || defined(GRAN) +/* + StgBlockingQueueElement represents the types of closures that can be + found on a blocking queue: StgTSO, StgRBHSave, StgBlockedFetch. + (StgRBHSave can only appear at the end of a blocking queue). + Logically, this is a union type, but defining another struct with a common + layout is easier to handle in the code (same as for StgMutClosures). +*/ +typedef struct StgBlockingQueueElement_ { + StgHeader header; + struct StgBlockingQueueElement_ *link; + StgMutClosure *mut_link; + struct StgClosure_ *payload[0]; +} StgBlockingQueueElement; + +typedef struct StgBlockingQueue_ { + StgHeader header; + struct StgBlockingQueueElement_ *blocking_queue; + StgMutClosure *mut_link; +} StgBlockingQueue; + +/* this closure is hanging at the end of a blocking queue in (par setup only) */ +typedef struct StgRBHSave_ { StgHeader header; - void *ga; /* type globalAddr is abstract here */ + StgPtr payload[0]; +} StgRBHSave; + +typedef struct StgRBH_ { + StgHeader header; + struct StgBlockingQueueElement_ *blocking_queue; + StgMutClosure *mut_link; +} StgRBH; + +#else +/* old sequential version of a blocking queue, which can only hold TSOs */ +typedef struct StgBlockingQueue_ { + StgHeader header; + struct StgTSO_ *blocking_queue; + StgMutClosure *mut_link; +} StgBlockingQueue; +#endif + +#if defined(PAR) +/* global indirections aka FETCH_ME closures */ +typedef struct StgFetchMe_ { + StgHeader header; + globalAddr *ga; /* type globalAddr is abstract here */ + StgMutClosure *mut_link; } StgFetchMe; + +/* same contents as an ordinary StgBlockingQueue */ +typedef struct StgFetchMeBlockingQueue_ { + StgHeader header; + struct StgBlockingQueueElement_ *blocking_queue; + StgMutClosure *mut_link; +} StgFetchMeBlockingQueue; + +/* entry in a blocking queue, indicating a request from a TSO on another PE */ +typedef struct StgBlockedFetch_ { + StgHeader header; + struct StgBlockingQueueElement_ *link; + StgMutClosure *mut_link; + StgClosure *node; + globalAddr ga; +} StgBlockedFetch; #endif #endif /* CLOSURES_H */ diff --git a/ghc/includes/Constants.h b/ghc/includes/Constants.h index bf7c83e..3983196 100644 --- a/ghc/includes/Constants.h +++ b/ghc/includes/Constants.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: Constants.h,v 1.7 1999/10/27 09:58:36 simonmar Exp $ + * $Id: Constants.h,v 1.8 2000/01/13 14:34:00 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -18,11 +18,13 @@ Header Sizes NOTE: keep these in line with the real definitions in Closures.h + HWL: checked GRAN_HDR_SIZE; ok -------------------------------------------------------------------------- */ #define STD_HDR_SIZE 1 #define PROF_HDR_SIZE 1 #define GRAN_HDR_SIZE 1 +#define PAR_HDR_SIZE 0 #define TICKY_HDR_SIZE 0 #define ARR_HDR_SIZE 1 @@ -36,11 +38,13 @@ NOTE: keep these in line with the real definitions in InfoTables.h NOTE: the PROF, and GRAN values are *wrong* (ToDo) + HWL: checked GRAN_ITBL_SIZE; ok -------------------------------------------------------------------------- */ #define STD_ITBL_SIZE 3 #define PROF_ITBL_SIZE 1 #define GRAN_ITBL_SIZE 1 +#define PAR_ITBL_SIZE 0 #define TICKY_ITBL_SIZE 0 /* ----------------------------------------------------------------------------- @@ -98,9 +102,13 @@ /* ----------------------------------------------------------------------------- Update Frame Layout + GranSim uses an additional word as bitmask in the update frame; actually, + not really necessary, but uses standard closure layout that way + NB: UF_RET etc are *wrong* in a GranSim setup; should be increased by 1 + if compiling for GranSim (currently not used in compiler) -- HWL -------------------------------------------------------------------------- */ - #define NOSCC_UF_SIZE 3 +#define GRAN_UF_SIZE 4 #define SCC_UF_SIZE 4 #define UF_RET 0 @@ -112,9 +120,11 @@ SEQ frame size I don't think seq frames really need sccs --SDM + They don't need a GranSim bitmask either, but who cares anyway -- HWL -------------------------------------------------------------------------- */ #define NOSCC_SEQ_FRAME_SIZE 2 +#define GRAN_SEQ_FRAME_SIZE 3 #define SCC_SEQ_FRAME_SIZE 3 /* ----------------------------------------------------------------------------- diff --git a/ghc/includes/GranSim.h b/ghc/includes/GranSim.h new file mode 100644 index 0000000..88c6ad9 --- /dev/null +++ b/ghc/includes/GranSim.h @@ -0,0 +1,327 @@ +/* + Time-stamp: + $Id: GranSim.h,v 1.2 2000/01/13 14:34:00 hwloidl Exp $ + + Headers for GranSim specific objects. + + Note that in GranSim we have one run-queue and blocking-queue for each + processor. Therefore, this header file redefines variables like + run_queue_hd to be relative to CurrentProc. The main arrays of runnable + and blocking queues are defined in Schedule.c. The important STG-called + GranSim macros (e.g. for fetching nodes) are at the end of this + file. Usually they are just wrappers to proper C functions in GranSim.c. */ + +#ifndef GRANSIM_H +#define GRANSIM_H + +#if !defined(GRAN) + +//Dummy definitions for basic GranSim macros (see GranSim.h) +#define DO_GRAN_ALLOCATE(n) /* nothing */ +#define DO_GRAN_UNALLOCATE(n) /* nothing */ +#define DO_GRAN_FETCH(node) /* nothing */ +#define DO_GRAN_EXEC(arith,branch,load,store,floats) /* nothing */ +#define GRAN_FETCH_AND_RESCHEDULE(liveness_mask,reenter) /* nothing */ +#define GRAN_RESCHEDULE(liveness_mask,reenter) /* nothing */ + +#endif + +#if defined(GRAN) /* whole file */ + +extern StgTSO *CurrentTSOs[]; + +//@node Headers for GranSim specific objects, , , +//@section Headers for GranSim specific objects + +//@menu +//* Includes:: +//* Externs and prototypes:: +//* Run and blocking queues:: +//* Spark queues:: +//* Processor related stuff:: +//* GranSim costs:: +//* STG called GranSim functions:: +//* STG-called routines:: +//@end menu + +//@node Includes, Externs and prototypes, Headers for GranSim specific objects, Headers for GranSim specific objects +//@subsection Includes + +/* +#include "Closures.h" +#include "TSO.h" +#include "Rts.h" +*/ + +//@node Externs and prototypes, Run and blocking queues, Includes, Headers for GranSim specific objects +//@subsection Externs and prototypes + +/* Global constants */ +extern char *gran_event_names[]; +extern char *proc_status_names[]; +extern char *event_names[]; + +/* Vars checked from within STG land */ +extern rtsBool NeedToReSchedule, IgnoreEvents, IgnoreYields; +; +extern rtsTime TimeOfNextEvent, TimeOfLastEvent, EndOfTimeSlice; + +/* costs for basic operations (copied from RTS flags) */ +extern nat gran_arith_cost, gran_branch_cost, gran_load_cost, gran_store_cost, gran_float_cost; + +extern nat SparksAvail; /* How many sparks are available */ +extern nat SurplusThreads; /* How many excess threads are there */ +extern nat sparksIgnored, sparksCreated; + +//@node Run and blocking queues, Spark queues, Externs and prototypes, Headers for GranSim specific objects +//@subsection Run and blocking queues + +/* declared in Schedule.c */ +extern StgTSO *run_queue_hds[], *run_queue_tls[]; +extern StgTSO *blocked_queue_hds[], *blocked_queue_tls[]; +extern StgTSO *ccalling_threadss[]; + +#define run_queue_hd run_queue_hds[CurrentProc] +#define run_queue_tl run_queue_tls[CurrentProc] +#define blocked_queue_hd blocked_queue_hds[CurrentProc] +#define blocked_queue_tl blocked_queue_tls[CurrentProc] +#define pending_sparks_hd pending_sparks_hds[CurrentProc] +#define pending_sparks_tl pending_sparks_tls[CurrentProc] +#define ccalling_threads ccalling_threadss[CurrentProc] + +//@node Spark queues, Processor related stuff, Run and blocking queues, Headers for GranSim specific objects +//@subsection Spark queues + +/* +In GranSim we use a double linked list to represent spark queues. + +This is more flexible, but slower, than the array of pointers +representation used in GUM. We use the flexibility to define new fields in +the rtsSpark structure, representing e.g. granularity info (see HWL's PhD +thesis), or info about the parent of a spark. +*/ + +/* Sparks and spark queues */ +typedef struct rtsSpark_ +{ + StgClosure *node; + StgInt name, global; + StgInt gran_info; /* for granularity improvement mechanisms */ + PEs creator; /* PE that created this spark (unused) */ + struct rtsSpark_ *prev, *next; +} rtsSpark; +typedef rtsSpark *rtsSparkQ; + +/* The spark queues, proper */ +/* In GranSim this is a globally visible array of spark queues */ +extern rtsSparkQ pending_sparks_hds[]; +extern rtsSparkQ pending_sparks_tls[]; + +/* Prototypes of those spark routines visible to compiler generated .hc */ +/* Routines only used inside the RTS are defined in rts/parallel GranSimRts.h */ +rtsSpark *newSpark(StgClosure *node, + StgInt name, StgInt gran_info, StgInt size_info, + StgInt par_info, StgInt local); +void add_to_spark_queue(rtsSpark *spark); + +//@node Processor related stuff, GranSim costs, Spark queues, Headers for GranSim specific objects +//@subsection Processor related stuff + +extern PEs CurrentProc; +extern rtsTime CurrentTime[]; + +/* Maximum number of PEs that can be simulated */ +#define MAX_PROC 32 /* (BITS_IN(StgWord)) */ // ToDo: fix this!! +//#if MAX_PROC==16 +//#else +//#error MAX_PROC should be 32 on this architecture +//#endif + +#define CurrentTSO CurrentTSOs[CurrentProc] + +/* Processor numbers to bitmasks and vice-versa */ +#define MainProc 0 /* Id of main processor */ +#define NO_PRI 0 /* dummy priority */ +#define MAX_PRI 10000 /* max possible priority */ +#define MAIN_PRI MAX_PRI /* priority of main thread */ + +/* GrAnSim uses IdleProcs as bitmask to indicate which procs are idle */ +#define PE_NUMBER(n) (1l << (long)n) +#define ThisPE PE_NUMBER(CurrentProc) +#define MainPE PE_NUMBER(MainProc) +#define Everywhere (~0l) +#define Nowhere (0l) +#define Now CurrentTime[CurrentProc] + +#define IS_LOCAL_TO(ga,proc) ((1l << (PEs) proc) & ga) + +#define GRAN_TIME_SLICE 1000 /* max time between 2 ReSchedules */ + +//@node GranSim costs, STG called GranSim functions, Processor related stuff, Headers for GranSim specific objects +//@subsection GranSim costs + +/* Default constants for communication (see RtsFlags on how to change them) */ + +#define LATENCY 1000 /* Latency for single packet */ +#define ADDITIONAL_LATENCY 100 /* Latency for additional packets */ +#define BASICBLOCKTIME 10 +#define FETCHTIME (LATENCY*2+MSGUNPACKTIME) +#define LOCALUNBLOCKTIME 10 +#define GLOBALUNBLOCKTIME (LATENCY+MSGUNPACKTIME) + +#define MSGPACKTIME 0 /* Cost of creating a packet */ +#define MSGUNPACKTIME 0 /* Cost of receiving a packet */ +#define MSGTIDYTIME 0 /* Cost of cleaning up after send */ + +/* How much to increase GrAnSims internal packet size if an overflow + occurs. + NB: This is a GrAnSim internal variable and is independent of the + simulated packet buffer size. +*/ + +#define GRANSIM_DEFAULT_PACK_BUFFER_SIZE 400 +#define REALLOC_SZ 200 + +/* extern W_ gran_mpacktime, gran_mtidytime, gran_munpacktime; */ + +/* Thread cost model */ +#define THREADCREATETIME (25+THREADSCHEDULETIME) +#define THREADQUEUETIME 12 /* Cost of adding a thread to the running/runnable queue */ +#define THREADDESCHEDULETIME 75 /* Cost of descheduling a thread */ +#define THREADSCHEDULETIME 75 /* Cost of scheduling a thread */ +#define THREADCONTEXTSWITCHTIME (THREADDESCHEDULETIME+THREADSCHEDULETIME) + +/* Instruction Cost model (SPARC, including cache misses) */ +#define ARITH_COST 1 +#define BRANCH_COST 2 +#define LOAD_COST 4 +#define STORE_COST 4 +#define FLOAT_COST 1 /* ? */ + +#define HEAPALLOC_COST 11 + +#define PRI_SPARK_OVERHEAD 5 +#define PRI_SCHED_OVERHEAD 5 + +//@node STG called GranSim functions, STG-called routines, GranSim costs, Headers for GranSim specific objects +//@subsection STG called GranSim functions + +/* STG called GranSim functions */ +void GranSimAllocate(StgInt n); +void GranSimUnallocate(StgInt n); +void GranSimExec(StgWord ariths, StgWord branches, StgWord loads, StgWord stores, StgWord floats); +StgInt GranSimFetch(StgClosure *node); +void GranSimSpark(StgInt local, StgClosure *node); +void GranSimSparkAt(rtsSpark *spark, StgClosure *where,StgInt identifier); +void GranSimSparkAtAbs(rtsSpark *spark, PEs proc, StgInt identifier); +void GranSimBlock(StgTSO *tso, PEs proc, StgClosure *node); + + +//@node STG-called routines, , STG called GranSim functions, Headers for GranSim specific objects +//@subsection STG-called routines + +/* Wrapped version of calls to GranSim-specific STG routines */ + +/* +#define DO_PERFORM_RESCHEDULE(liveness, always_reenter_node) PerformReschedule_wrapper(liveness, always_reenter_node) +*/ +#define DO_GRAN_ALLOCATE(n) STGCALL1(GranSimAllocate, n) +#define DO_GRAN_UNALLOCATE(n) STGCALL1(GranSimUnallocate, n) +#define DO_GRAN_FETCH(node) STGCALL1(GranSimFetch, node) +#define DO_GRAN_EXEC(arith,branch,load,store,floats) GranSimExec(arith,branch,load,store,floats) + +/* + ToDo: Clean up this mess of GRAN macros!!! -- HWL +*/ +// DO_GRAN_FETCH((StgClosure*)R1.p); +#define GRAN_FETCH() /* nothing */ + +#define GRAN_FETCH_AND_RESCHEDULE(liveness,reenter) \ + DO_GRAN_FETCH((StgClosure*)R1.p); \ + DO_GRAN_YIELD(liveness,ENTRY_CODE((D_)(*R1.p))); +// RESTORE_EVERYTHING is done implicitly before entering threaded world agian + +/* + This is the only macro currently enabled; + It should check whether it is time for the current thread to yield + (e.g. if there is a more recent event in the queue) and it should check + whether node is local, via a call to GranSimFetch. + ToDo: split this in 2 routines: + - GRAN_YIELD (as it is below) + - GRAN_FETCH (the rest of this macro) + emit only these 2 macros based on node's liveness + node alive: emit both macros + node not alive: do only a GRAN_YIELD + + replace gran_yield_? with gran_block_? (they really block the current + thread) +*/ +#define GRAN_RESCHEDULE(liveness,ptrs) \ + if (RET_STGCALL1(StgInt, GranSimFetch, (StgClosure*)R1.p)) {\ + EXTFUN_RTS(gran_block_##ptrs); \ + JMP_(gran_block_##ptrs); \ + } else { \ + if (TimeOfLastEvent < CurrentTime[CurrentProc] && \ + HEAP_ALLOCED((StgClosure *)R1.p) && \ + LOOKS_LIKE_GHC_INFO(get_itbl((StgClosure *)R1.p))) { \ + EXTFUN_RTS(gran_yield_##ptrs); \ + JMP_(gran_yield_##ptrs); \ + } \ + /* GRAN_YIELD(ptrs) */ \ + } + + +// YIELD(liveness,reenter) + +// GRAN_YIELD(liveness_mask); + +// GRAN_FETCH_AND_RESCHEDULE(liveness_mask,reenter) + +#define THREAD_CONTEXT_SWITCH(liveness_mask,reenter) \ + do { \ + if (context_switch /* OR_INTERVAL_EXPIRED */) { \ + GRAN_RESCHEDULE(liveness_mask,reenter); \ + } }while(0) + +#define GRAN_EXEC(arith,branch,load,store,floats) \ + { \ + W_ cost = gran_arith_cost*arith + \ + gran_branch_cost*branch + \ + gran_load_cost*load + \ + gran_store_cost*store + \ + gran_float_cost*floats; \ + CurrentTSO->gran.exectime += cost; \ + CurrentTime[CurrentProc] += cost; \ + } + +/* In GranSim we first check whether there is an event to handle; only if + this is the case (or the time slice is over in case of fair scheduling) + we do a yield, which is very similar to that in the concurrent world + ToDo: check whether gran_yield_? can be merged with other yielding codes +*/ + +#define DO_GRAN_YIELD(ptrs) if (!IgnoreYields && \ + TimeOfLastEvent < CurrentTime[CurrentProc] && \ + HEAP_ALLOCED((StgClosure *)R1.p) && \ + LOOKS_LIKE_GHC_INFO(get_itbl((StgClosure *)R1.p))) { \ + EXTFUN_RTS(gran_yield_##ptrs); \ + JMP_(gran_yield_##ptrs); \ + } + +#define GRAN_YIELD(ptrs) \ + { \ + extern nat context_switch; \ + if ( (CurrentTime[CurrentProc]>=EndOfTimeSlice) || \ + ((CurrentTime[CurrentProc]>=TimeOfNextEvent) && \ + (TimeOfNextEvent!=0) && !IgnoreEvents )) { \ + /* context_switch = 1; */ \ + DO_GRAN_YIELD(ptrs); \ + } \ + } + +#define ADD_TO_SPARK_QUEUE(spark) \ + STGCALL1(add_to_spark_queue,spark) \ + +#endif /* GRAN */ + +#endif /* GRANSIM_H */ diff --git a/ghc/includes/InfoMacros.h b/ghc/includes/InfoMacros.h index 91900d4..a85529b 100644 --- a/ghc/includes/InfoMacros.h +++ b/ghc/includes/InfoMacros.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: InfoMacros.h,v 1.8 1999/11/30 11:44:32 simonmar Exp $ + * $Id: InfoMacros.h,v 1.9 2000/01/13 14:34:00 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -31,8 +31,35 @@ #define INIT_VECTOR #endif +/* + On the GRAN/PAR specific parts of the InfoTables: + + In both GranSim and GUM we use revertible black holes (RBH) when putting + an updatable closure into a packet for communication. The entry code for + an RBH performs standard blocking (as with any kind of BH). The info + table for the RBH resides just before the one for the std info + table. (NB: there is one RBH ITBL for every ITBL of an updatable + closure.) The @rbh_infoptr@ field in the ITBL points from the std ITBL to + the RBH ITBL and vice versa. This is used by the RBH_INFOPTR and + REVERT_INFOPTR macros to turn an updatable node into an RBH and vice + versa. Note, that the only case where we have to revert the RBH in its + original form is when a packet is sent back because of garbage collection + on another PE. In the RTS for GdH we will use this reversion mechanism in + order to deal with faults in the system. + ToDo: Check that RBHs are needed for all the info tables below. From a quick + check of the macros generated in the libs it seems that all of them are used + for generating THUNKs. + Possible optimisation: Note that any RBH ITBL is a fixed distance away from + the actual ITBL. We could inline this offset as a constant into the RTS and + avoid the rbh_infoptr fields altogether (Jim did that in the old RTS). + -- HWL +*/ + + /* function/thunk info tables --------------------------------------------- */ +#if defined(GRAN) || defined(PAR) + #define \ INFO_TABLE_SRT(info, /* info-table label */ \ entry, /* entry code label */ \ @@ -41,17 +68,73 @@ INFO_TABLE_SRT(info, /* info-table label */ \ type, /* closure type */ \ info_class, entry_class, /* C storage classes */ \ prof_descr, prof_type) /* profiling info */ \ + entry_class(RBH_##entry); \ entry_class(entry); \ + info_class INFO_TBL_CONST StgInfoTable info; \ + info_class INFO_TBL_CONST StgInfoTable RBH_##info = { \ + layout : { payload : {ptrs,nptrs} }, \ + SRT_INFO(RBH,srt_,srt_off_,srt_len_), \ + INCLUDE_RBH_INFO(info), \ + INIT_ENTRY(RBH_##entry), \ + INIT_VECTOR \ + } ; \ + StgFunPtr RBH_##entry (void) { JMP_(RBH_entry); } ; \ info_class INFO_TBL_CONST StgInfoTable info = { \ layout : { payload : {ptrs,nptrs} }, \ SRT_INFO(type,srt_,srt_off_,srt_len_), \ + INCLUDE_RBH_INFO(RBH_##info), \ INIT_ENTRY(entry), \ INIT_VECTOR \ } +#else + +#define \ +INFO_TABLE_SRT(info, /* info-table label */ \ + entry, /* entry code label */ \ + ptrs, nptrs, /* closure layout info */\ + srt_, srt_off_, srt_len_, /* SRT info */ \ + type, /* closure type */ \ + info_class, entry_class, /* C storage classes */ \ + prof_descr, prof_type) /* profiling info */ \ + entry_class(entry); \ + info_class INFO_TBL_CONST StgInfoTable info = { \ + layout : { payload : {ptrs,nptrs} }, \ + SRT_INFO(type,srt_,srt_off_,srt_len_), \ + INIT_ENTRY(entry), \ + INIT_VECTOR \ + } + +#endif /* direct-return address info tables --------------------------------------*/ +#if defined(GRAN) || defined(PAR) + +#define \ +INFO_TABLE_SRT_BITMAP(info, entry, bitmap_, srt_, srt_off_, srt_len_, \ + type, info_class, entry_class, \ + prof_descr, prof_type) \ + entry_class(RBH_##entry); \ + entry_class(entry); \ + info_class INFO_TBL_CONST StgInfoTable info; \ + info_class INFO_TBL_CONST StgInfoTable RBH_##info = { \ + layout : { bitmap : (StgWord32)bitmap_ }, \ + SRT_INFO(RBH,srt_,srt_off_,srt_len_), \ + INCLUDE_RBH_INFO(info), \ + INIT_ENTRY(RBH_##entry), \ + INIT_VECTOR \ + }; \ + StgFunPtr RBH_##entry (void) { JMP_(RBH_entry); } ; \ + info_class INFO_TBL_CONST StgInfoTable info = { \ + layout : { bitmap : (StgWord32)bitmap_ }, \ + SRT_INFO(type,srt_,srt_off_,srt_len_), \ + INCLUDE_RBH_INFO(RBH_##info), \ + INIT_ENTRY(entry), \ + INIT_VECTOR \ + } +#else + #define \ INFO_TABLE_SRT_BITMAP(info, entry, bitmap_, srt_, srt_off_, srt_len_, \ type, info_class, entry_class, \ @@ -63,9 +146,36 @@ INFO_TABLE_SRT_BITMAP(info, entry, bitmap_, srt_, srt_off_, srt_len_, \ INIT_ENTRY(entry), \ INIT_VECTOR \ } +#endif /* info-table without an SRT -----------------------------------------------*/ +#if defined(GRAN) || defined(PAR) + +#define \ +INFO_TABLE(info, entry, ptrs, nptrs, type, info_class, \ + entry_class, prof_descr, prof_type) \ + entry_class(RBH_##entry); \ + entry_class(entry); \ + info_class INFO_TBL_CONST StgInfoTable info; \ + info_class INFO_TBL_CONST StgInfoTable RBH_##info = { \ + layout : { payload : {ptrs,nptrs} }, \ + STD_INFO(RBH), \ + INCLUDE_RBH_INFO(info), \ + INIT_ENTRY(RBH_##entry), \ + INIT_VECTOR \ + }; \ + StgFunPtr RBH_##entry (void) { JMP_(RBH_entry); } ; \ + info_class INFO_TBL_CONST StgInfoTable info = { \ + layout : { payload : {ptrs,nptrs} }, \ + STD_INFO(type), \ + INCLUDE_RBH_INFO(RBH_##info), \ + INIT_ENTRY(entry), \ + INIT_VECTOR \ + } + +#else + #define \ INFO_TABLE(info, entry, ptrs, nptrs, type, info_class, \ entry_class, prof_descr, prof_type) \ @@ -77,8 +187,36 @@ INFO_TABLE(info, entry, ptrs, nptrs, type, info_class, \ INIT_VECTOR \ } +#endif + /* special selector-thunk info table ---------------------------------------*/ +#if defined(GRAN) || defined(PAR) + +#define \ +INFO_TABLE_SELECTOR(info, entry, offset, info_class, \ + entry_class, prof_descr, prof_type) \ + entry_class(RBH_##entry); \ + entry_class(entry); \ + info_class INFO_TBL_CONST StgInfoTable info; \ + info_class INFO_TBL_CONST StgInfoTable RBH_##info = { \ + layout : { selector_offset : offset }, \ + STD_INFO(RBH), \ + INCLUDE_RBH_INFO(info), \ + INIT_ENTRY(RBH_##entry), \ + INIT_VECTOR \ + }; \ + StgFunPtr RBH_##entry (void) { JMP_(RBH_entry); } ; \ + info_class INFO_TBL_CONST StgInfoTable info = { \ + layout : { selector_offset : offset }, \ + STD_INFO(THUNK_SELECTOR), \ + INCLUDE_RBH_INFO(RBH_##info), \ + INIT_ENTRY(entry), \ + INIT_VECTOR \ + } + +#else + #define \ INFO_TABLE_SELECTOR(info, entry, offset, info_class, \ entry_class, prof_descr, prof_type) \ @@ -90,6 +228,8 @@ INFO_TABLE_SELECTOR(info, entry, offset, info_class, \ INIT_VECTOR \ } +#endif + /* constructor info table --------------------------------------------------*/ #define \ diff --git a/ghc/includes/InfoTables.h b/ghc/includes/InfoTables.h index 0f8a659..b3db5e5 100644 --- a/ghc/includes/InfoTables.h +++ b/ghc/includes/InfoTables.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- - * $Id: InfoTables.h,v 1.17 1999/07/16 09:41:12 panne Exp $ + * $Id: InfoTables.h,v 1.18 2000/01/13 14:34:00 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -36,12 +36,15 @@ typedef struct { Parallelism info -------------------------------------------------------------------------- */ -#ifdef PAR +#if 0 && (defined(PAR) || defined(GRAN)) -#define PAR_INFO_WORDS 0 +// CURRENTLY UNUSED +// ToDo: use this in StgInfoTable (mutually recursive) -- HWL + +#define PAR_INFO_WORDS 1 typedef struct { - /* empty */ + StgInfoTable *rbh_infoptr; /* infoptr to the RBH */ } StgParInfo; #else /* !PAR */ @@ -54,6 +57,54 @@ typedef struct { #endif /* PAR */ +/* + Copied from ghc-0.29; ToDo: check this code -- HWL + + In the parallel system, all updatable closures have corresponding + revertible black holes. When we are assembly-mangling, we guarantee + that the revertible black hole code precedes the normal entry code, so + that the RBH info table resides at a fixed offset from the normal info + table. Otherwise, we add the RBH info table pointer to the end of the + normal info table and vice versa. + + Currently has to use a !RBH_MAGIC_OFFSET setting. + Still todo: init of par.infoptr field in all infotables!! +*/ + +#if defined(PAR) || defined(GRAN) +# define RBH_INFO_OFFSET (GEN_INFO_OFFSET+GEN_INFO_WORDS) + +# ifdef RBH_MAGIC_OFFSET + +# error magic offset not yet implemented + +# define RBH_INFO_WORDS 0 +# define INCLUDE_RBH_INFO(infoptr) + +# define RBH_INFOPTR(infoptr) (((P_)infoptr) - RBH_MAGIC_OFFSET) +# define REVERT_INFOPTR(infoptr) (((P_)infoptr) + RBH_MAGIC_OFFSET) + +# else + +# define RBH_INFO_WORDS 1 +# define INCLUDE_RBH_INFO(info) rbh_infoptr : &(info) + +# define RBH_INFOPTR(infoptr) (((StgInfoTable *)(infoptr))->rbh_infoptr) +# define REVERT_INFOPTR(infoptr) (((StgInfoTable *)(infoptr))->rbh_infoptr) + +# endif + +/* see ParallelRts.h */ +// EXTFUN(RBH_entry); +//StgClosure *convertToRBH(StgClosure *closure); +//#if defined(GRAN) +//void convertFromRBH(StgClosure *closure); +//#elif defined(PAR) +//void convertToFetchMe(StgPtr closure, globalAddr *ga); +//#endif + +#endif + /* ----------------------------------------------------------------------------- Debugging info -------------------------------------------------------------------------- */ @@ -98,11 +149,27 @@ extern StgWord16 closure_flags[]; #define closureFlags(c) (closure_flags[get_itbl(c)->type]) -#define closure_STATIC(c) ( closureFlags(c) & _STA) +#define closure_HNF(c) ( closureFlags(c) & _HNF) +#define closure_BITMAP(c) ( closureFlags(c) & _BTM) +#define closure_NON_SPARK(c) ( (closureFlags(c) & _NS)) #define closure_SHOULD_SPARK(c) (!(closureFlags(c) & _NS)) +#define closure_STATIC(c) ( closureFlags(c) & _STA) +#define closure_THUNK(c) ( closureFlags(c) & _THU) #define closure_MUTABLE(c) ( closureFlags(c) & _MUT) #define closure_UNPOINTED(c) ( closureFlags(c) & _UPT) +#define closure_SRT(c) ( closureFlags(c) & _SRT) + +/* same as above but for info-ptr rather than closure */ +#define ipFlags(ip) (closure_flags[ip->type]) +#define ip_HNF(ip) ( ipFlags(ip) & _HNF) +#define ip_BITMAP(ip) ( ipFlags(ip) & _BTM) +#define ip_SHOULD_SPARK(ip) (!(ipFlags(ip) & _NS)) +#define ip_STATIC(ip) ( ipFlags(ip) & _STA) +#define ip_THUNK(ip) ( ipFlags(ip) & _THU) +#define ip_MUTABLE(ip) ( ipFlags(ip) & _MUT) +#define ip_UNPOINTED(ip) ( ipFlags(ip) & _UPT) +#define ip_SRT(ip) ( ipFlags(ip) & _SRT) /* ----------------------------------------------------------------------------- Info Tables @@ -153,8 +220,9 @@ typedef StgClosure* StgSRT[]; typedef struct _StgInfoTable { StgSRT *srt; /* pointer to the SRT table */ -#ifdef PAR - StgParInfo par; +#if defined(PAR) || defined(GRAN) + // StgParInfo par; + struct _StgInfoTable *rbh_infoptr; #endif #ifdef PROFILING /* StgProfInfo prof; */ diff --git a/ghc/includes/Parallel.h b/ghc/includes/Parallel.h new file mode 100644 index 0000000..e9a6ef1 --- /dev/null +++ b/ghc/includes/Parallel.h @@ -0,0 +1,342 @@ +/* + Time-stamp: + + Definitions for parallel machines. + + This section contains definitions applicable only to programs compiled + to run on a parallel machine, i.e. on GUM. Some of these definitions + are also used when simulating parallel execution, i.e. on GranSim. +*/ + +/* + ToDo: Check the PAR specfic part of this file + Move stuff into Closures.h and ClosureMacros.h + Clean-up GRAN specific code + -- HWL +*/ + +#ifndef PARALLEL_H +#define PARALLEL_H + +#if defined(PAR) || defined(GRAN) /* whole file */ + +//@node Parallel definitions, End of File +//@section Parallel definitions + +//@menu +//* Basic definitions:: +//* GUM:: +//* GranSim:: +//@end menu + +//@node Basic definitions, GUM, Parallel definitions, Parallel definitions +//@subsection Basic definitions + +/* SET_PAR_HDR and SET_STATIC_PAR_HDR now live in ClosureMacros.h */ + +/* Needed for dumping routines */ +#if defined(PAR) +# define NODE_STR_LEN 20 +# define TIME_STR_LEN 120 +# define TIME rtsTime +# define CURRENT_TIME msTime() +# define TIME_ON_PROC(p) msTime() +# define CURRENT_PROC thisPE +# define BINARY_STATS RtsFlags.ParFlags.ParStats.Binary +#elif defined(GRAN) +# define NODE_STR_LEN 20 +# define TIME_STR_LEN 120 +# define TIME rtsTime +# define CURRENT_TIME CurrentTime[CurrentProc] +# define TIME_ON_PROC(p) CurrentTime[p] +# define CURRENT_PROC CurrentProc +# define BINARY_STATS RtsFlags.GranFlags.GranSimStats.Binary +#endif + +#if defined(PAR) +# define MAX_PES 256 /* Maximum number of processors */ + /* MAX_PES is enforced by SysMan, which does not + allow more than this many "processors". + This is important because PackGA [GlobAddr.lc] + **assumes** that a PE# can fit in 8+ bits. + */ + +# define SPARK_POOLS 2 /* no. of spark pools */ +# define REQUIRED_POOL 0 /* idx of pool of mandatory sparks (concurrency) */ +# define ADVISORY_POOL 1 /* idx of pool of advisory sparks (parallelism) */ +#endif + +//@menu +//* GUM:: +//* GranSim:: +//@end menu +//*/ + +//@node GUM, GranSim, Basic definitions, Parallel definitions +//@subsection GUM + +#if defined(PAR) +/* +Symbolic constants for the packing code. + +This constant defines how many words of data we can pack into a single +packet in the parallel (GUM) system. +*/ + +//@menu +//* Types:: +//* Externs:: +//* Prototypes:: +//* Macros:: +//@end menu +//*/ + +//@node Types, Externs, GUM, GUM +//@subsubsection Types + +/* Sparks and spark queues */ +typedef StgClosure *rtsSpark; +typedef rtsSpark *rtsSparkQ; + +typedef struct rtsPackBuffer_ { + StgInt /* nat */ id; + StgInt /* nat */ size; + StgInt /* nat */ unpacked_size; + StgTSO *tso; + StgWord *buffer[0]; +} rtsPackBuffer; + +#define PACK_BUFFER_HDR_SIZE 4 + +//@node Externs, Prototypes, Types, GUM +//@subsubsection Externs + +// extern rtsBool do_sp_profile; + +extern globalAddr theGlobalFromGA, theGlobalToGA; +extern StgBlockedFetch *PendingFetches; +extern GlobalTaskId *allPEs; + +extern rtsBool IAmMainThread, GlobalStopPending; +//extern rtsBool fishing; +extern rtsTime last_fish_arrived_at; +extern nat outstandingFishes; +extern GlobalTaskId SysManTask; +extern int seed; /* pseudo-random-number generator seed: */ + /* Initialised in ParInit */ +extern StgInt threadId; /* Number of Threads that have existed on a PE */ +extern GlobalTaskId mytid; + +extern GlobalTaskId *allPEs; +extern nat nPEs; +extern nat sparksIgnored, sparksCreated, threadsIgnored, threadsCreated; +extern nat advisory_thread_count; + +extern rtsBool InGlobalGC; /* Are we in the midst of performing global GC */ + +static ullong startTime; /* start of comp; in RtsStartup.c */ + +/* the spark pools proper */ +extern rtsSpark *pending_sparks_hd[]; /* ptr to start of a spark pool */ +extern rtsSpark *pending_sparks_tl[]; /* ptr to end of a spark pool */ +extern rtsSpark *pending_sparks_lim[]; +extern rtsSpark *pending_sparks_base[]; +extern nat spark_limit[]; + +extern rtsPackBuffer *PackBuffer; /* size: can be set via option */ +extern rtsPackBuffer *buffer; /* HWL_ */ +extern rtsPackBuffer *freeBuffer; /* HWL_ */ +extern rtsPackBuffer *packBuffer; /* HWL_ */ +extern rtsPackBuffer *gumPackBuffer; + +extern int thisPE; + +/* From Global.c */ +extern GALA *freeGALAList; +extern GALA *freeIndirections; +extern GALA *liveIndirections; +extern GALA *liveRemoteGAs; + +/* +extern HashTable *taskIDtoPEtable; +extern HashTable *LAtoGALAtable; +extern HashTable *pGAtoGALAtable; +*/ + +//@node Prototypes, Macros, Externs, GUM +//@subsubsection Prototypes + +/* From ParInit.c */ +void initParallelSystem(void); +void SynchroniseSystem(void); +void par_exit(StgInt n); + +PEs taskIDtoPE (GlobalTaskId gtid); +void registerTask (GlobalTaskId gtid); +globalAddr *LAGAlookup (StgClosure *addr); +StgClosure *GALAlookup (globalAddr *ga); +//static GALA *allocIndirection (StgPtr addr); +globalAddr *makeGlobal (StgClosure *addr, rtsBool preferred); +globalAddr *setRemoteGA (StgClosure *addr, globalAddr *ga, rtsBool preferred); +void splitWeight (globalAddr *to, globalAddr *from); +globalAddr *addWeight (globalAddr *ga); +void initGAtables (void); +void RebuildLAGAtable (void); +StgWord PackGA (StgWord pe, int slot); + +//@node Macros, , Prototypes, GUM +//@subsubsection Macros + +/* delay (in us) between dying fish returning and sending out a new fish */ +#define FISH_DELAY 1000 +/* max no. of outstanding spark steals */ +#define MAX_FISHES 1 + +// ToDo: check which of these is actually needed! + +# define PACK_HEAP_REQUIRED ((RtsFlags.ParFlags.packBufferSize - PACK_HDR_SIZE) / (PACK_GA_SIZE + _FHS) * (MIN_UPD_SIZE + 2)) + +# define MAX_GAS (RtsFlags.ParFlags.packBufferSize / PACK_GA_SIZE) + + +# define PACK_GA_SIZE 3 /* Size of a packed GA in words */ + /* Size of a packed fetch-me in words */ +# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS) + +# define PACK_HDR_SIZE 1 /* Words of header in a packet */ + +# define PACK_PLC_SIZE 2 /* Size of a packed PLC in words */ + +/* + Definitions relating to the entire parallel-only fixed-header field. + + On GUM, the global addresses for each local closure are stored in a + separate hash table, rather then with the closure in the heap. We call + @getGA@ to look up the global address associated with a local closure (0 + is returned for local closures that have no global address), and @setGA@ + to store a new global address for a local closure which did not + previously have one. */ + +# define GA_HDR_SIZE 0 + +# define GA(closure) getGA(closure) + +# define SET_GA(closure, ga) setGA(closure,ga) +# define SET_STATIC_GA(closure) +# define SET_GRAN_HDR(closure,pe) +# define SET_STATIC_PROCS(closure) + +# define MAX_GA_WEIGHT 0 /* Treat as 2^n */ + +/* At the moment, there is no activity profiling for GUM. This may change. */ +# define SET_TASK_ACTIVITY(act) /* nothing */ + +#endif /* PAR */ + +//@node GranSim, , GUM, Parallel definitions +//@subsection GranSim + +#if defined(GRAN) +/* ToDo: Check which of the PAR routines are needed in GranSim -- HWL */ + +//@menu +//* Types:: +//* Prototypes:: +//* Macros:: +//@end menu +//*/ + +//@node Types, Prototypes, GranSim, GranSim +//@subsubsection Types + +typedef struct rtsPackBuffer_ { + StgInt /* nat */ id; + StgInt /* nat */ size; + StgInt /* nat */ unpacked_size; + StgTSO *tso; + StgClosure **buffer; +} rtsPackBuffer; + +//@node Prototypes, Macros, Types, GranSim +//@subsubsection Prototypes + + +/* main packing functions */ +/* +rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso, nat *packbuffersize); +rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso, nat *packbuffersize); +void PrintPacket(rtsPackBuffer *buffer); +StgClosure *UnpackGraph(rtsPackBuffer* buffer); +*/ +/* important auxiliary functions */ + +/* +OLD CODE -- HWL +void InitPackBuffer(void); +P_ AllocateHeap (W_ size); +P_ PackNearbyGraph (P_ closure, P_ tso, W_ *packbuffersize); +P_ PackOneNode (P_ closure, P_ tso, W_ *packbuffersize); +P_ UnpackGraph (P_ buffer); + +void InitClosureQueue (void); +P_ DeQueueClosure(void); +void QueueClosure (P_ closure); +// rtsBool QueueEmpty(); +void PrintPacket (P_ buffer); +*/ + +// StgInfoTable *get_closure_info(StgClosure* node, unsigned int /* nat */ *size, unsigned int /* nat */ *ptrs, unsigned int /* nat */ *nonptrs, unsigned int /* nat */ *vhs, char *info_hdr_ty); +// int /* rtsBool */ IS_BLACK_HOLE(StgClosure* node) ; + +//@node Macros, , Prototypes, GranSim +//@subsubsection Macros + +/* max no. of outstanding spark steals */ +#define MAX_FISHES 1 + +/* These are needed in the packing code to get the size of the packet + right. The closures itself are never built in GrAnSim. */ +# define FETCHME_VHS IND_VHS +# define FETCHME_HS IND_HS + +# define FETCHME_GA_LOCN FETCHME_HS + +# define FETCHME_CLOSURE_SIZE(closure) IND_CLOSURE_SIZE(closure) +# define FETCHME_CLOSURE_NoPTRS(closure) 0L +# define FETCHME_CLOSURE_NoNONPTRS(closure) (IND_CLOSURE_SIZE(closure)-IND_VHS) + +# define MAX_GAS (RtsFlags.GranFlags.packBufferSize / PACK_GA_SIZE) +# define PACK_GA_SIZE 3 /* Size of a packed GA in words */ + /* Size of a packed fetch-me in words */ +# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS) +# define PACK_HDR_SIZE 4 /* Words of header in a packet */ + +# define PACK_HEAP_REQUIRED \ + (RtsFlags.GranFlags.packBufferSize * sizeofW(StgClosure*) + \ + 2 * sizeofW(StgInt) + sizeofW(StgTSO*)) + +# define PACK_FLAG_LOCN 0 +# define PACK_TSO_LOCN 1 +# define PACK_UNPACKED_SIZE_LOCN 2 +# define PACK_SIZE_LOCN 3 +# define MAGIC_PACK_FLAG 0xfabc + +# define GA_HDR_SIZE 1 + +# define PROCS_HDR_POSN PAR_HDR_POSN +# define PROCS_HDR_SIZE 1 + +/* Accessing components of the field */ +# define PROCS(closure) ((closure)->header.gran.procs) +/* SET_PROCS is now SET_GRAN_HEADER in ClosureMacros.h. */ + +#endif /* GRAN */ + +//@node End of File, , Parallel definitions +//@section End of File + +#endif /* defined(PAR) || defined(GRAN) whole file */ + +#endif /* Parallel_H */ + + diff --git a/ghc/includes/PrimOps.h b/ghc/includes/PrimOps.h index d11de24..0d97628 100644 --- a/ghc/includes/PrimOps.h +++ b/ghc/includes/PrimOps.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: PrimOps.h,v 1.43 2000/01/12 15:15:17 simonmar Exp $ + * $Id: PrimOps.h,v 1.44 2000/01/13 14:34:00 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -734,6 +734,118 @@ EF_(unblockAsyncExceptionszh_fast); extern int cmp_thread(const StgTSO *tso1, const StgTSO *tso2); +/* ------------------------------------------------------------------------ + Parallel PrimOps + + A par in the Haskell code is ultimately translated to a parzh macro + (with a case wrapped around it to guarantee that the macro is actually + executed; see compiler/prelude/PrimOps.lhs) + ---------------------------------------------------------------------- */ + +#if defined(GRAN) +// hash coding changed from 2.10 to 4.00 +#define parzh(r,node) parZh(r,node) + +#define parZh(r,node) \ + PARZh(r,node,1,0,0,0,0,0) + +#define parAtzh(r,node,where,identifier,gran_info,size_info,par_info,rest) \ + parATZh(r,node,where,identifier,gran_info,size_info,par_info,rest,1) + +#define parAtAbszh(r,node,proc,identifier,gran_info,size_info,par_info,rest) \ + parATZh(r,node,proc,identifier,gran_info,size_info,par_info,rest,2) + +#define parAtRelzh(r,node,proc,identifier,gran_info,size_info,par_info,rest) \ + parATZh(r,node,proc,identifier,gran_info,size_info,par_info,rest,3) + +#define parAtForNowzh(r,node,where,identifier,gran_info,size_info,par_info,rest) \ + parATZh(r,node,where,identifier,gran_info,size_info,par_info,rest,0) + +#define parATZh(r,node,where,identifier,gran_info,size_info,par_info,rest,local) \ +{ \ + rtsSparkQ result; \ + if (closure_SHOULD_SPARK((StgClosure*)node)) { \ + rtsSparkQ result; \ + STGCALL6(newSpark, node,identifier,gran_info,size_info,par_info,local); \ + if (local==2) { /* special case for parAtAbs */ \ + STGCALL3(GranSimSparkAtAbs, result,(I_)where,identifier);\ + } else if (local==3) { /* special case for parAtRel */ \ + STGCALL3(GranSimSparkAtAbs, result,(I_)(CurrentProc+where),identifier); \ + } else { \ + STGCALL3(GranSimSparkAt, result,where,identifier); \ + } \ + } \ +} + +#define parLocalzh(r,node,identifier,gran_info,size_info,par_info,rest) \ + PARZh(r,node,rest,identifier,gran_info,size_info,par_info,1) + +#define parGlobalzh(r,node,identifier,gran_info,size_info,par_info,rest) \ + PARZh(r,node,rest,identifier,gran_info,size_info,par_info,0) + +#define PARZh(r,node,rest,identifier,gran_info,size_info,par_info,local) \ +{ \ + if (closure_SHOULD_SPARK((StgClosure*)node)) { \ + rtsSpark *result; \ + result = RET_STGCALL6(rtsSpark*, newSpark, \ + node,identifier,gran_info,size_info,par_info,local);\ + STGCALL1(add_to_spark_queue,result); \ + STGCALL2(GranSimSpark, local,(P_)node); \ + } \ +} + +#define copyablezh(r,node) \ + /* copyable not yet implemented!! */ + +#define noFollowzh(r,node) \ + /* noFollow not yet implemented!! */ + +#endif /* GRAN */ + +#if 0 + +# if defined(GRAN) +/* ToDo: Use a parallel ticky macro for this */ +# define COUNT_SPARK(node) { (CurrentTSO->gran.globalsparks)++; sparksCreated++; } +# elif defined(PAR) +# define COUNT_SPARK(node) { (CurrentTSO->par.globalsparks)++; sparksCreated++; } +# endif + +/* + Note that we must bump the required thread count NOW, rather + than when the thread is actually created. + + forkzh not needed any more; see ghc/rts/PrimOps.hc +*/ +#define forkzh(r,liveness,node) \ +{ \ + extern nat context_switch; \ + while (pending_sparks_tl[REQUIRED_POOL] == pending_sparks_lim[REQUIRED_POOL]) \ + DO_YIELD((liveness << 1) | 1); \ + if (closure_SHOULD_SPARK((StgClosure *)node)) { \ + *pending_sparks_tl[REQUIRED_POOL]++ = (P_)(node); \ + } else { \ + sparksIgnored++; \ + } \ + context_switch = 1; \ +} + +// old version of par (previously used in GUM + +#define parzh(r,node) \ +{ \ + extern nat context_switch; \ + COUNT_SPARK(node); \ + if (closure_SHOULD_SPARK((StgClosure *)node) && \ + pending_sparks_tl[ADVISORY_POOL] < pending_sparks_lim[ADVISORY_POOL]) {\ + *pending_sparks_tl[ADVISORY_POOL]++ = (StgClosure *)(node); \ + } else { \ + sparksIgnored++; \ + } \ + r = context_switch = 1; \ +} +#endif /* 0 */ + #if defined(SMP) || defined(PAR) #define parzh(r,node) \ { \ diff --git a/ghc/includes/Rts.h b/ghc/includes/Rts.h index 7d35118..40deb1e 100644 --- a/ghc/includes/Rts.h +++ b/ghc/includes/Rts.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Rts.h,v 1.11 2000/01/13 12:40:15 simonmar Exp $ + * $Id: Rts.h,v 1.12 2000/01/13 14:34:01 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -42,17 +42,10 @@ #define MAX_RTS_ARGS 32 /* ----------------------------------------------------------------------------- - Useful typedefs + Assertions and Debuggery -------------------------------------------------------------------------- */ -typedef unsigned int nat; /* at least 32 bits (like int) */ -typedef unsigned long lnat; /* at least 32 bits */ -typedef unsigned long long ullong; /* at least 32 bits */ - -typedef enum { - rtsFalse = 0, - rtsTrue -} rtsBool; +#define IF_RTSFLAGS(c,s) if (RtsFlags.c) { s; } /* ----------------------------------------------------------------------------- Assertions and Debuggery diff --git a/ghc/includes/RtsTypes.h b/ghc/includes/RtsTypes.h new file mode 100644 index 0000000..10c4bde --- /dev/null +++ b/ghc/includes/RtsTypes.h @@ -0,0 +1,76 @@ +/* + Time-stamp: + + RTS specific types. +*/ + +/* ------------------------------------------------------------------------- + Generally useful typedefs + ------------------------------------------------------------------------- */ + +#ifndef RTS_TYPES_H +#define RTS_TYPES_H + +typedef unsigned int nat; /* at least 32 bits (like int) */ +typedef unsigned long lnat; /* at least 32 bits */ +typedef unsigned long long ullong; /* at least 32 bits */ + +/* ullong (64|128-bit) type: only include if needed (not ANSI) */ +#if defined(__GNUC__) +#define LL(x) (x##LL) +#else +#define LL(x) (x##L) +#endif + +typedef enum { + rtsFalse = 0, + rtsTrue +} rtsBool; + +/* + Types specific to the parallel runtime system. +*/ + +#if defined(PAR) +/* types only needed in the parallel system */ +typedef struct hashtable ParHashTable; +typedef struct hashlist ParHashList; + +// typedef double REAL_TIME; +// typedef W_ TIME; +// typedef GlobalTaskId Proc; +typedef int GlobalTaskId; +typedef ullong rtsTime; +typedef GlobalTaskId PEs; +typedef unsigned int rtsWeight; +typedef int rtsPacket; +typedef int OpCode; + +/* Global addresses i.e. unique ids in a parallel setup; needed in Closures.h*/ +typedef struct { + union { + StgPtr plc; + struct { + GlobalTaskId gtid; + int slot; + } gc; + } payload; + rtsWeight weight; +} globalAddr; + +/* (GA, LA) pairs */ +typedef struct gala { + globalAddr ga; + StgPtr la; + struct gala *next; + rtsBool preferred; +} GALA; + +#elif defined(GRAN) + +typedef lnat rtsTime; +typedef StgWord PEs; + +#endif + +#endif /* RTS_TYPES_H */ diff --git a/ghc/includes/SchedAPI.h b/ghc/includes/SchedAPI.h index 317a177..18c48f5 100644 --- a/ghc/includes/SchedAPI.h +++ b/ghc/includes/SchedAPI.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: SchedAPI.h,v 1.8 1999/11/18 12:10:17 sewardj Exp $ + * $Id: SchedAPI.h,v 1.9 2000/01/13 14:34:01 hwloidl Exp $ * * (c) The GHC Team 1998 * @@ -11,6 +11,11 @@ #ifndef SCHEDAPI_H #define SCHEDAPI_H +#if defined(GRAN) +// Dummy def for NO_PRI if not in GranSim +#define NO_PRI 0 +#endif + /* * schedule() plus the thread creation functions are not part * part of the external RTS API, so leave them out if we're @@ -22,8 +27,11 @@ SchedulerStatus waitThread(StgTSO *main_thread, /*out*/StgClosure **ret); /* * Creating threads */ - +#if defined(GRAN) +StgTSO *createThread(nat stack_size, StgInt pri); +#else StgTSO *createThread(nat stack_size); +#endif void scheduleThread(StgTSO *tso); static inline void pushClosure (StgTSO *tso, StgClosure *c) { @@ -38,7 +46,11 @@ static inline void pushRealWorld (StgTSO *tso) { static inline StgTSO * createGenThread(nat stack_size, StgClosure *closure) { StgTSO *t; +#if defined(GRAN) + t = createThread(stack_size, NO_PRI); +#else t = createThread(stack_size); +#endif pushClosure(t,closure); return t; } @@ -46,7 +58,11 @@ createGenThread(nat stack_size, StgClosure *closure) { static inline StgTSO * createIOThread(nat stack_size, StgClosure *closure) { StgTSO *t; +#if defined(GRAN) + t = createThread(stack_size, NO_PRI); +#else t = createThread(stack_size); +#endif pushRealWorld(t); pushClosure(t,closure); return t; @@ -60,7 +76,11 @@ createIOThread(nat stack_size, StgClosure *closure) { static inline StgTSO * createStrictIOThread(nat stack_size, StgClosure *closure) { StgTSO *t; +#if defined(GRAN) + t = createThread(stack_size, NO_PRI); +#else t = createThread(stack_size); +#endif pushClosure(t,closure); pushClosure(t,(StgClosure*)&forceIO_closure); return t; diff --git a/ghc/includes/Stg.h b/ghc/includes/Stg.h index c8d729d..0ae31a0 100644 --- a/ghc/includes/Stg.h +++ b/ghc/includes/Stg.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Stg.h,v 1.21 1999/11/09 15:57:40 simonmar Exp $ + * $Id: Stg.h,v 1.22 2000/01/13 14:34:01 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -103,6 +103,7 @@ void _stgAssert (char *, unsigned int); /* Global type definitions*/ #include "StgTypes.h" +#include "RtsTypes.h" /* Global constaints */ #include "Constants.h" @@ -116,6 +117,12 @@ void _stgAssert (char *, unsigned int); #include "InfoTables.h" #include "TSO.h" +/* Simulated-parallel information */ +#include "GranSim.h" + +/* Parallel information */ +#include "Parallel.h" + /* STG/Optimised-C related stuff */ #include "SMP.h" #include "MachRegs.h" diff --git a/ghc/includes/StgMacros.h b/ghc/includes/StgMacros.h index bc269ad..ab78687 100644 --- a/ghc/includes/StgMacros.h +++ b/ghc/includes/StgMacros.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgMacros.h,v 1.19 1999/11/22 16:44:30 sewardj Exp $ + * $Id: StgMacros.h,v 1.20 2000/01/13 14:34:01 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -138,6 +138,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } } #define HP_CHK(headroom,ret,r,layout,tag_assts) \ + DO_GRAN_ALLOCATE(headroom) \ if ((Hp += headroom) > HpLim) { \ EXTFUN_RTS(stg_chk_##layout); \ tag_assts \ @@ -146,6 +147,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } } #define HP_STK_CHK(stk_headroom,hp_headroom,ret,r,layout,tag_assts) \ + DO_GRAN_ALLOCATE(hp_headroom) \ if (Sp - stk_headroom < SpLim || (Hp += hp_headroom) > HpLim) { \ EXTFUN_RTS(stg_chk_##layout); \ tag_assts \ @@ -165,6 +167,10 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } functions. In all these cases, node points to a closure that we can just enter to restart the heap check (the NP stands for 'node points'). + In the NP case GranSim absolutely has to check whether the current node + resides on the current processor. Otherwise a FETCH event has to be + scheduled. All that is done in GranSimFetch. -- HWL + HpLim points to the LAST WORD of valid allocation space. -------------------------------------------------------------------------- */ @@ -176,6 +182,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } } #define HP_CHK_NP(headroom,ptrs,tag_assts) \ + DO_GRAN_ALLOCATE(headroom) \ if ((Hp += (headroom)) > HpLim) { \ EXTFUN_RTS(stg_gc_enter_##ptrs); \ tag_assts \ @@ -183,6 +190,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } } #define HP_CHK_SEQ_NP(headroom,ptrs,tag_assts) \ + DO_GRAN_ALLOCATE(headroom) \ if ((Hp += (headroom)) > HpLim) { \ EXTFUN_RTS(stg_gc_seq_##ptrs); \ tag_assts \ @@ -190,6 +198,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } } #define HP_STK_CHK_NP(stk_headroom, hp_headroom, ptrs, tag_assts) \ + DO_GRAN_ALLOCATE(hp_headroom) \ if ((Sp - (stk_headroom)) < SpLim || (Hp += (hp_headroom)) > HpLim) { \ EXTFUN_RTS(stg_gc_enter_##ptrs); \ tag_assts \ @@ -200,6 +209,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; } /* Heap checks for branches of a primitive case / unboxed tuple return */ #define GEN_HP_CHK_ALT(headroom,lbl,tag_assts) \ + DO_GRAN_ALLOCATE(headroom) \ if ((Hp += (headroom)) > HpLim) { \ EXTFUN_RTS(lbl); \ tag_assts \ @@ -341,6 +351,25 @@ EF_(stg_gen_block); JMP_(stg_block_##ptrs); \ } +#if defined(PAR) +/* + Similar to BLOCK_NP but separates the saving of the thread state from the + actual jump via an StgReturn +*/ + +#define SAVE_THREAD_STATE(ptrs) \ + ASSERT(ptrs==1); \ + Sp -= 1; \ + Sp[0] = R1.w; \ + SaveThreadState(); + +#define THREAD_RETURN(ptrs) \ + ASSERT(ptrs==1); \ + CurrentTSO->whatNext = ThreadEnterGHC; \ + R1.i = ThreadBlocked; \ + JMP_(StgReturn); +#endif + /* ----------------------------------------------------------------------------- CCall_GC needs to push a dummy stack frame containing the contents of volatile registers and variables. diff --git a/ghc/includes/StgMiscClosures.h b/ghc/includes/StgMiscClosures.h index d9c3489..e0ed424 100644 --- a/ghc/includes/StgMiscClosures.h +++ b/ghc/includes/StgMiscClosures.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgMiscClosures.h,v 1.15 1999/11/02 15:05:53 simonmar Exp $ + * $Id: StgMiscClosures.h,v 1.16 2000/01/13 14:34:01 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -31,6 +31,9 @@ STGFUN(WHITEHOLE_entry); STGFUN(SE_BLACKHOLE_entry); STGFUN(SE_CAF_BLACKHOLE_entry); #endif +#if defined(PAR) || defined(GRAN) +STGFUN(RBH_entry); +#endif STGFUN(BCO_entry); STGFUN(EVACUATED_entry); STGFUN(FOREIGN_entry); @@ -50,6 +53,15 @@ STGFUN(MUT_CONS_entry); STGFUN(END_MUT_LIST_entry); STGFUN(dummy_ret_entry); +/* this is the NIL ptr for a TSO queue (e.g. runnable queue) */ +#define END_TSO_QUEUE ((StgTSO *)(void*)&END_TSO_QUEUE_closure) +#if defined(PAR) || defined(GRAN) +/* this is the NIL ptr for a blocking queue */ +# define END_BQ_QUEUE ((StgBlockingQueueElement *)(void*)&END_TSO_QUEUE_closure) +/* this is the NIL ptr for a blocked fetch queue (as in PendingFetches in GUM) */ +# define END_BF_QUEUE ((StgBlockedFetch *)(void*)&END_TSO_QUEUE_closure) +#endif + /* info tables */ extern DLL_IMPORT_RTS const StgInfoTable IND_info; @@ -69,6 +81,9 @@ extern DLL_IMPORT_RTS const StgInfoTable WHITEHOLE_info; extern DLL_IMPORT_RTS const StgInfoTable SE_BLACKHOLE_info; extern DLL_IMPORT_RTS const StgInfoTable SE_CAF_BLACKHOLE_info; #endif +#if defined(PAR) || defined(GRAN) +extern DLL_IMPORT_RTS const StgInfoTable RBH_info; +#endif extern DLL_IMPORT_RTS const StgInfoTable BCO_info; extern DLL_IMPORT_RTS const StgInfoTable EVACUATED_info; extern DLL_IMPORT_RTS const StgInfoTable FOREIGN_info; diff --git a/ghc/includes/TSO.h b/ghc/includes/TSO.h index 5cc34be..ce46e00 100644 --- a/ghc/includes/TSO.h +++ b/ghc/includes/TSO.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: TSO.h,v 1.9 1999/12/01 14:34:49 simonmar Exp $ + * $Id: TSO.h,v 1.10 2000/01/13 14:34:01 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -10,6 +10,30 @@ #ifndef TSO_H #define TSO_H +#if defined(GRAN) || defined(PAR) +// magic marker for TSOs; debugging only +#define TSO_MAGIC 4321 + +typedef struct { + StgInt pri; + StgInt magic; + StgInt sparkname; + rtsTime startedat; + rtsBool exported; + StgInt basicblocks; + StgInt allocs; + rtsTime exectime; + rtsTime fetchtime; + rtsTime fetchcount; + rtsTime blocktime; + StgInt blockcount; + rtsTime blockedat; + StgInt globalsparks; + StgInt localsparks; + rtsTime clock; +} StgTSOStatBuf; +#endif + #if defined(PROFILING) typedef struct { CostCentreStack *CCCS; /* thread's current CCS */ @@ -20,14 +44,21 @@ typedef struct { #endif /* PROFILING */ #if defined(PAR) -typedef struct { -} StgTSOParInfo; +typedef StgTSOStatBuf StgTSOParInfo; #else /* !PAR */ typedef struct { } StgTSOParInfo; #endif /* PAR */ -#if defined(TICKY_TICKY) +#if defined(GRAN) +typedef StgTSOStatBuf StgTSOGranInfo; +#else /* !GRAN */ +typedef struct { +} StgTSOGranInfo; +#endif /* GRAN */ + + +#if defined(TICKY) typedef struct { } StgTSOTickyInfo; #else /* !TICKY_TICKY */ @@ -86,6 +117,9 @@ typedef enum { BlockedOnRead, BlockedOnWrite, BlockedOnDelay +#if defined(PAR) + , BlockedOnGA // blocked on a remote closure represented by a Global Address +#endif } StgTSOBlockReason; typedef union { @@ -93,6 +127,9 @@ typedef union { struct StgTSO_ *tso; int fd; unsigned int delay; +#if defined(PAR) + globalAddr ga; +#endif } StgTSOBlockInfo; /* @@ -104,6 +141,7 @@ typedef union { typedef struct StgTSO_ { StgHeader header; struct StgTSO_* link; + /* SDM and HWL agree that it would be cool to have a list of all TSOs */ StgMutClosure * mut_link; /* TSO's are mutable of course! */ StgTSOWhatNext whatNext; StgTSOBlockReason why_blocked; @@ -113,7 +151,7 @@ typedef struct StgTSO_ { StgTSOTickyInfo ticky; StgTSOProfInfo prof; StgTSOParInfo par; - /* GranSim Info? */ + StgTSOGranInfo gran; /* The thread stack... */ StgWord stack_size; /* stack size in *words* */ diff --git a/ghc/includes/Updates.h b/ghc/includes/Updates.h index d814c10..5378b6c 100644 --- a/ghc/includes/Updates.h +++ b/ghc/includes/Updates.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Updates.h,v 1.15 1999/11/09 15:47:09 simonmar Exp $ + * $Id: Updates.h,v 1.16 2000/01/13 14:34:01 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -91,18 +91,80 @@ Awaken any threads waiting on this computation -------------------------------------------------------------------------- */ +#if defined(PAR) + +/* + In a parallel setup several types of closures, might have a blocking queue: + BLACKHOLE_BQ ... same as in the default concurrent setup; it will be + reawakened via calling UPD_IND on that closure after + having finished the computation of the graph + FETCH_ME_BQ ... a global indirection (FETCH_ME) may be entered by a + local TSO, turning it into a FETCH_ME_BQ; it will be + reawakened via calling processResume + RBH ... a revertible black hole may be entered by another + local TSO, putting it onto its blocking queue; since + RBHs only exist while the corresponding closure is in + transit, they will be reawakened via calling + convertToFetchMe (upon processing an ACK message) + + In a parallel setup a blocking queue may contain 3 types of closures: + TSO ... as in the default concurrent setup + BLOCKED_FETCH ... indicating that a TSO on another PE is waiting for + the result of the current computation + CONSTR ... a RBHSave closure (which contains data ripped out of + the closure to make room for a blocking queue; since + it only contains data we use the exisiting type of + a CONSTR closure); this closure is the end of a + blocking queue for an RBH closure; it only exists in + this kind of blocking queue and must be at the end + of the queue +*/ +extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node); +#define DO_AWAKEN_BQ(bqe, node) STGCALL2(awakenBlockedQueue, bqe, node); + +#define AWAKEN_BQ(info,closure) \ + if (info == &BLACKHOLE_BQ_info || \ + info == &FETCH_ME_BQ_info || \ + get_itbl(closure)->type == RBH) { \ + StgBlockingQueueElement *bqe = ((StgBlockingQueue *)closure)->blocking_queue;\ + ASSERT(bqe!=END_BQ_QUEUE); \ + DO_AWAKEN_BQ(bqe, closure); \ + } + +#elif defined(GRAN) + +extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node); +#define DO_AWAKEN_BQ(bq, node) STGCALL2(awakenBlockedQueue, bq, node); + +/* In GranSim we don't have FETCH_ME or FETCH_ME_BQ closures, so they are + not checked. The rest of the code is the same as for GUM. +*/ +#define AWAKEN_BQ(info,closure) \ + if (info == &BLACKHOLE_BQ_info || \ + get_itbl(closure)->type == RBH) { \ + StgBlockingQueueElement *bqe = ((StgBlockingQueue *)closure)->blocking_queue;\ + ASSERT(bqe!=END_BQ_QUEUE); \ + DO_AWAKEN_BQ(bqe, closure); \ + } + + +#else /* !GRAN && !PAR */ + extern void awakenBlockedQueue(StgTSO *q); +#define DO_AWAKEN_BQ(closure) \ + STGCALL1(awakenBlockedQueue, \ + ((StgBlockingQueue *)closure)->blocking_queue); #define AWAKEN_BQ(info,closure) \ if (info == &BLACKHOLE_BQ_info) { \ - STGCALL1(awakenBlockedQueue, \ - ((StgBlockingQueue *)closure)->blocking_queue); \ + DO_AWAKEN_BQ(closure); \ } +#endif /* GRAN || PAR */ -/* ----------------------------------------------------------------------------- +/* ------------------------------------------------------------------------- Push an update frame on the stack. - -------------------------------------------------------------------------- */ + ------------------------------------------------------------------------- */ #if defined(PROFILING) #define PUSH_STD_CCCS(frame) frame->header.prof.ccs = CCCS diff --git a/ghc/lib/std/Makefile b/ghc/lib/std/Makefile index 51b68d2..853c599 100644 --- a/ghc/lib/std/Makefile +++ b/ghc/lib/std/Makefile @@ -47,6 +47,13 @@ ifneq "$(way)" "" SRC_HC_OPTS += -hisuf $(way_)hi endif +# HWL: for debugging GranSim generate .hc and .s files +SRC_HC_OPTS += -keep-hc-files-too -keep-s-files-too +# # HWL: why isn't that on by default !!???????????? +# ifeq "$(way)" "mg" +# SRC_HC_OPTS += -gransim +# endif + # per-module flags PrelArrExtra_HC_OPTS += -monly-2-regs @@ -55,7 +62,7 @@ PrelArrExtra_HC_OPTS += -monly-2-regs PrelNumExtra_HC_OPTS += -H24m -K2m PrelPack_HC_OPTS += -K4m -PrelBase_HC_OPTS += -H12m +PrelBase_HC_OPTS += -H32m -K32m PrelRead_HC_OPTS += -H20m PrelTup_HC_OPTS += -H12m -K2m PrelNum_HC_OPTS += -H12m -K4m diff --git a/ghc/lib/std/PrelHandle.lhs b/ghc/lib/std/PrelHandle.lhs index 85289ad..d65c234 100644 --- a/ghc/lib/std/PrelHandle.lhs +++ b/ghc/lib/std/PrelHandle.lhs @@ -29,7 +29,9 @@ import PrelShow import PrelAddr ( Addr, nullAddr ) import PrelReal ( toInteger ) import PrelPack ( packString ) +#ifndef __PARALLEL_HASKELL__ import PrelWeak ( addForeignFinalizer ) +#endif import Ix #ifdef __CONCURRENT_HASKELL__ diff --git a/ghc/lib/std/PrelWeak.lhs b/ghc/lib/std/PrelWeak.lhs index 3b09a39..354332b 100644 --- a/ghc/lib/std/PrelWeak.lhs +++ b/ghc/lib/std/PrelWeak.lhs @@ -7,6 +7,8 @@ \begin{code} {-# OPTIONS -fno-implicit-prelude #-} +#ifndef __PARALLEL_HASKELL__ + module PrelWeak where import PrelGHC @@ -43,4 +45,6 @@ instance Eq (Weak v) where (Weak w1) == (Weak w2) = w1 `sameWeak#` w2 -} +#endif + \end{code} diff --git a/ghc/mk/paths.mk b/ghc/mk/paths.mk index be6dd7d..88e39ae 100644 --- a/ghc/mk/paths.mk +++ b/ghc/mk/paths.mk @@ -1,5 +1,5 @@ # ----------------------------------------------------------------------------- -# $Id: paths.mk,v 1.17 2000/01/10 11:59:55 simonmar Exp $ +# $Id: paths.mk,v 1.18 2000/01/13 14:34:02 hwloidl Exp $ # # ghc project specific make variables # @@ -25,7 +25,6 @@ GHC_INCLUDE_DIR := $(TOP)/includes GHC_UTILS_DIR := $(TOP)/utils GHC_INTERPRETER_DIR := $(TOP)/interpreter -GHC_SYSMAN_DIR := $(GHC_RUNTIME_DIR)/gum GHC_HSP_DIR := $(GHC_HSC_DIR) GHC_MKDEPENDHS_DIR := $(GHC_UTILS_DIR)/mkdependHS GHC_HSCPP_DIR := $(GHC_UTILS_DIR)/hscpp @@ -37,11 +36,12 @@ GHC_HSCPP = $(GHC_HSCPP_DIR)/hscpp GHC_MKDEPENDHS = $(GHC_MKDEPENDHS_DIR)/mkdependHS-inplace GHC_HSP = $(GHC_HSP_DIR)/hsp GHC_HSC = $(GHC_HSC_DIR)/hsc -GHC_SYSMAN = $(GHC_RUNTIME_DIR)/gum/SysMan - UNLIT = $(GHC_UNLIT_DIR)/unlit GHC_UNLIT = $(GHC_UNLIT_DIR)/unlit +GHC_SYSMAN = $(GHC_RUNTIME_DIR)/parallel/SysMan +GHC_SYSMAN_DIR = $(GHC_RUNTIME_DIR)/parallel + #----------------------------------------------------------------------------- # Stuff for the C-compiling phase in particular... diff --git a/ghc/rts/ClosureFlags.c b/ghc/rts/ClosureFlags.c index a7fdb0b..89e98e4 100644 --- a/ghc/rts/ClosureFlags.c +++ b/ghc/rts/ClosureFlags.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: ClosureFlags.c,v 1.5 2000/01/12 12:39:20 simonmar Exp $ + * $Id: ClosureFlags.c,v 1.6 2000/01/13 14:34:02 hwloidl Exp $ * * (c) The GHC Team 1998-1999 * @@ -23,6 +23,7 @@ StgWord16 closure_flags[] = { * to thunks.) */ +/* 0 1 2 3 4 5 6 7 */ /* HNF BTM NS STA THU MUT UPT SRT */ /* INVALID_OBJECT */ ( 0 ), @@ -84,8 +85,14 @@ StgWord16 closure_flags[] = { /* WEAK */ (_HNF| _NS| _UPT ), /* FOREIGN */ (_HNF| _NS| _UPT ), /* STABLE_NAME */ (_HNF| _NS| _UPT ), + /* TSO */ (_HNF| _NS| _MUT|_UPT ), -/* BLOCKED_FETCH */ (_HNF| _NS ), -/* FETCH_ME */ (_HNF| _NS ), -/* EVACUATED */ ( 0 ) +/* BLOCKED_FETCH */ (_HNF| _NS| _MUT|_UPT ), +/* FETCH_ME */ (_HNF| _NS| _MUT|_UPT ), +/* FETCH_ME_BQ */ ( _NS| _MUT|_UPT ), +/* RBH */ ( _NS| _MUT|_UPT ), + +/* EVACUATED */ ( 0 ), + +/* N_CLOSURE_TYPES */ ( 0 ) }; diff --git a/ghc/rts/Exception.hc b/ghc/rts/Exception.hc index f19f212..7fdd6fd 100644 --- a/ghc/rts/Exception.hc +++ b/ghc/rts/Exception.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Exception.hc,v 1.2 1999/12/02 09:52:41 simonmar Exp $ + * $Id: Exception.hc,v 1.3 2000/01/13 14:34:02 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -14,6 +14,9 @@ #include "Storage.h" #include "RtsUtils.h" #include "RtsFlags.h" +#if defined(PAR) +# include "FetchMe.h" +#endif /* ----------------------------------------------------------------------------- Exception Primitives @@ -62,7 +65,16 @@ FN_(unblockAsyncExceptionszh_ret_entry) { FB_ ASSERT(CurrentTSO->blocked_exceptions != NULL); +#if defined(GRAN) +# error FixME +#elif defined(PAR) + // is CurrentTSO->block_info.closure always set to the node + // holding the blocking queue !? -- HWL + awakenBlockedQueue(CurrentTSO->blocked_exceptions, + CurrentTSO->block_info.closure); +#else awakenBlockedQueue(CurrentTSO->blocked_exceptions); +#endif CurrentTSO->blocked_exceptions = NULL; Sp++; JMP_(ENTRY_CODE(Sp[0])); @@ -76,7 +88,16 @@ FN_(unblockAsyncExceptionszh_fast) STK_CHK_GEN(2, R1_PTR, unblockAsyncExceptionszh_fast, ); if (CurrentTSO->blocked_exceptions != NULL) { +#if defined(GRAN) +# error FixME +#elif defined(PAR) + // is CurrentTSO->block_info.closure always set to the node + // holding the blocking queue !? -- HWL + awakenBlockedQueue(CurrentTSO->blocked_exceptions, + CurrentTSO->block_info.closure); +#else awakenBlockedQueue(CurrentTSO->blocked_exceptions); +#endif CurrentTSO->blocked_exceptions = NULL; Sp--; Sp[0] = (W_)&blockAsyncExceptionszh_ret_info; diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c index f3ce4c6..3665034 100644 --- a/ghc/rts/GC.c +++ b/ghc/rts/GC.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: GC.c,v 1.68 1999/12/01 15:07:00 simonmar Exp $ + * $Id: GC.c,v 1.69 2000/01/13 14:34:02 hwloidl Exp $ * * (c) The GHC Team 1998-1999 * @@ -7,6 +7,25 @@ * * ---------------------------------------------------------------------------*/ +//@menu +//* Includes:: +//* STATIC OBJECT LIST:: +//* Static function declarations:: +//* Garbage Collect:: +//* Weak Pointers:: +//* Evacuation:: +//* Scavenging:: +//* Reverting CAFs:: +//* Sanity code for CAF garbage collection:: +//* Lazy black holing:: +//* Stack squeezing:: +//* Pausing a thread:: +//* Index:: +//@end menu + +//@node Includes, STATIC OBJECT LIST +//@subsection Includes + #include "Rts.h" #include "RtsFlags.h" #include "RtsUtils.h" @@ -23,9 +42,21 @@ #include "SchedAPI.h" #include "Weak.h" #include "StablePriv.h" +#if defined(GRAN) || defined(PAR) +# include "GranSimRts.h" +# include "ParallelRts.h" +# include "FetchMe.h" +# if defined(DEBUG) +# include "Printer.h" +# include "ParallelDebug.h" +# endif +#endif StgCAF* enteredCAFs; +//@node STATIC OBJECT LIST, Static function declarations, Includes +//@subsection STATIC OBJECT LIST + /* STATIC OBJECT LIST. * * During GC: @@ -96,6 +127,9 @@ bdescr *old_to_space; lnat new_blocks; /* blocks allocated during this GC */ lnat g0s0_pcnt_kept = 30; /* percentage of g0s0 live at last minor GC */ +//@node Static function declarations, Garbage Collect, STATIC OBJECT LIST +//@subsection Static function declarations + /* ----------------------------------------------------------------------------- Static function declarations -------------------------------------------------------------------------- */ @@ -119,6 +153,9 @@ static void scavenge_mut_once_list ( generation *g ); static void gcCAFs ( void ); #endif +//@node Garbage Collect, Weak Pointers, Static function declarations +//@subsection Garbage Collect + /* ----------------------------------------------------------------------------- GarbageCollect @@ -141,6 +178,7 @@ static void gcCAFs ( void ); - free from-space in each step, and set from-space = to-space. -------------------------------------------------------------------------- */ +//@cindex GarbageCollect void GarbageCollect(void (*get_roots)(void)) { @@ -153,6 +191,11 @@ void GarbageCollect(void (*get_roots)(void)) CostCentreStack *prev_CCS; #endif +#if defined(DEBUG) && defined(GRAN) + IF_DEBUG(gc, belch("@@ Starting garbage collection at %ld (%lx)\n", + Now, Now)) +#endif + /* tell the stats department that we've started a GC */ stat_startGC(); @@ -176,8 +219,10 @@ void GarbageCollect(void (*get_roots)(void)) major_gc = (N == RtsFlags.GcFlags.generations-1); /* check stack sanity *before* GC (ToDo: check all threads) */ - /*IF_DEBUG(sanity, checkTSO(MainTSO,0)); */ - IF_DEBUG(sanity, checkFreeListSanity()); +#if defined(GRAN) + // ToDo!: check sanity IF_DEBUG(sanity, checkTSOsSanity()); +#endif + IF_DEBUG(sanity, checkFreeListSanity()); /* Initialise the static object lists */ @@ -296,6 +341,8 @@ void GarbageCollect(void (*get_roots)(void)) /* Do the mut-once lists first */ for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + IF_PAR_DEBUG(verbose, + printMutOnceList(&generations[g])); scavenge_mut_once_list(&generations[g]); evac_gen = g; for (st = generations[g].n_steps-1; st >= 0; st--) { @@ -304,6 +351,8 @@ void GarbageCollect(void (*get_roots)(void)) } for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + IF_PAR_DEBUG(verbose, + printMutableList(&generations[g])); scavenge_mutable_list(&generations[g]); evac_gen = g; for (st = generations[g].n_steps-1; st >= 0; st--) { @@ -317,6 +366,19 @@ void GarbageCollect(void (*get_roots)(void)) evac_gen = 0; get_roots(); +#if defined(PAR) + /* And don't forget to mark the TSO if we got here direct from + * Haskell! */ + /* Not needed in a seq version? + if (CurrentTSO) { + CurrentTSO = (StgTSO *)MarkRoot((StgClosure *)CurrentTSO); + } + */ + + /* Mark the entries in the GALA table of the parallel system */ + markLocalGAs(major_gc); +#endif + /* Mark the weak pointer list, and prepare to detect dead weak * pointers. */ @@ -577,7 +639,7 @@ void GarbageCollect(void (*get_roots)(void)) int pc_free; adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks); - IF_DEBUG(gc, fprintf(stderr, "Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %d\n", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks)); + IF_DEBUG(gc, fprintf(stderr, "@@ Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %d\n", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks)); pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize; if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even be < 0 */ { heapOverflow(); @@ -648,6 +710,11 @@ void GarbageCollect(void (*get_roots)(void)) */ resetNurseries(); +#if defined(PAR) + /* Reconstruct the Global Address tables used in GUM */ + RebuildGAtables(major_gc); +#endif + /* start any pending finalizers */ scheduleFinalizers(old_weak_ptr_list); @@ -675,6 +742,9 @@ void GarbageCollect(void (*get_roots)(void)) stat_endGC(allocated, collected, live, copied, N); } +//@node Weak Pointers, Evacuation, Garbage Collect +//@subsection Weak Pointers + /* ----------------------------------------------------------------------------- Weak Pointers @@ -694,6 +764,7 @@ void GarbageCollect(void (*get_roots)(void)) probably be optimised by keeping per-generation lists of weak pointers, but for a few weak pointers this scheme will work. -------------------------------------------------------------------------- */ +//@cindex traverse_weak_ptr_list static rtsBool traverse_weak_ptr_list(void) @@ -782,6 +853,8 @@ traverse_weak_ptr_list(void) evacuated need to be evacuated now. -------------------------------------------------------------------------- */ +//@cindex cleanup_weak_ptr_list + static void cleanup_weak_ptr_list ( StgWeak **list ) { @@ -809,6 +882,8 @@ cleanup_weak_ptr_list ( StgWeak **list ) closure if it is alive, or NULL otherwise. -------------------------------------------------------------------------- */ +//@cindex isAlive + StgClosure * isAlive(StgClosure *p) { @@ -823,10 +898,14 @@ isAlive(StgClosure *p) * for static closures with an empty SRT or CONSTR_STATIC_NOCAFs. */ +#if 1 || !defined(PAR) /* ignore closures in generations that we're not collecting. */ + /* In GUM we use this routine when rebuilding GA tables; for some + reason it has problems with the LOOKS_LIKE_STATIC macro -- HWL */ if (LOOKS_LIKE_STATIC(p) || Bdescr((P_)p)->gen->no > N) { return p; } +#endif switch (info->type) { @@ -850,12 +929,24 @@ isAlive(StgClosure *p) } } +//@cindex MarkRoot StgClosure * MarkRoot(StgClosure *root) { + //if (root != END_TSO_QUEUE) return evacuate(root); } +//@cindex MarkRootHWL +StgClosure * +MarkRootHWL(StgClosure *root) +{ + StgClosure *new = evacuate(root); + upd_evacuee(root, new); + return new; +} + +//@cindex addBlock static void addBlock(step *step) { bdescr *bd = allocBlock(); @@ -877,6 +968,8 @@ static void addBlock(step *step) new_blocks++; } +//@cindex upd_evacuee + static __inline__ void upd_evacuee(StgClosure *p, StgClosure *dest) { @@ -884,6 +977,8 @@ upd_evacuee(StgClosure *p, StgClosure *dest) ((StgEvacuated *)p)->evacuee = dest; } +//@cindex copy + static __inline__ StgClosure * copy(StgClosure *src, nat size, step *step) { @@ -925,6 +1020,8 @@ copy(StgClosure *src, nat size, step *step) * used to optimise evacuation of BLACKHOLEs. */ +//@cindex copyPart + static __inline__ StgClosure * copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *step) { @@ -953,6 +1050,9 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *step) return (StgClosure *)dest; } +//@node Evacuation, Scavenging, Weak Pointers +//@subsection Evacuation + /* ----------------------------------------------------------------------------- Evacuate a large object @@ -964,6 +1064,8 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *step) evacuated, or 0 otherwise. -------------------------------------------------------------------------- */ +//@cindex evacuate_large + static inline void evacuate_large(StgPtr p, rtsBool mutable) { @@ -1026,6 +1128,8 @@ evacuate_large(StgPtr p, rtsBool mutable) the promotion until the next GC. -------------------------------------------------------------------------- */ +//@cindex mkMutCons + static StgClosure * mkMutCons(StgClosure *ptr, generation *gen) { @@ -1075,7 +1179,7 @@ mkMutCons(StgClosure *ptr, generation *gen) didn't manage to evacuate this object into evac_gen. -------------------------------------------------------------------------- */ - +//@cindex evacuate static StgClosure * evacuate(StgClosure *q) @@ -1085,6 +1189,9 @@ evacuate(StgClosure *q) step *step; const StgInfoTable *info; + nat size, ptrs, nonptrs, vhs; + char str[80]; + loop: if (HEAP_ALLOCED(q)) { bd = Bdescr((P_)q); @@ -1110,7 +1217,15 @@ loop: ASSERT(q && (LOOKS_LIKE_GHC_INFO(GET_INFO(q)) || IS_HUGS_CONSTR_INFO(GET_INFO(q)))); info = get_itbl(q); - + /* + if (info->type==RBH) { + info = REVERT_INFOPTR(info); + IF_DEBUG(gc, + belch("@_ Trying to evacuate an RBH %p (%s); reverting to IP %p (%s)", + q, info_type(q), info, info_type_by_ip(info))); + } + */ + switch (info -> type) { case BCO: @@ -1328,7 +1443,7 @@ loop: case CATCH_FRAME: case SEQ_FRAME: /* shouldn't see these */ - barf("evacuate: stack frame\n"); + barf("evacuate: stack frame at %p\n", q); case AP_UPD: case PAP: @@ -1347,7 +1462,7 @@ loop: if (evac_gen > 0) { /* optimisation */ StgClosure *p = ((StgEvacuated*)q)->evacuee; if (Bdescr((P_)p)->gen->no < evac_gen) { - /* fprintf(stderr,"evac failed!\n");*/ + IF_DEBUG(gc, belch("@@ evacuate: evac of EVACUATED node %p failed!", p)); failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } @@ -1417,10 +1532,44 @@ loop: } } +#if defined(PAR) + case RBH: // cf. BLACKHOLE_BQ + { + //StgInfoTable *rip = get_closure_info(q, &size, &ptrs, &nonptrs, &vhs, str); + to = copy(q,BLACKHOLE_sizeW(),step); + //ToDo: derive size etc from reverted IP + //to = copy(q,size,step); + recordMutable((StgMutClosure *)to); + IF_DEBUG(gc, + belch("@@ evacuate: RBH %p (%s) to %p (%s)", + q, info_type(q), to, info_type(to))); + return to; + } + case BLOCKED_FETCH: + ASSERT(sizeofW(StgBlockedFetch) >= MIN_NONUPD_SIZE); + to = copy(q,sizeofW(StgBlockedFetch),step); + IF_DEBUG(gc, + belch("@@ evacuate: %p (%s) to %p (%s)", + q, info_type(q), to, info_type(to))); + return to; + case FETCH_ME: - fprintf(stderr,"evacuate: unimplemented/strange closure type\n"); - return q; + ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE); + to = copy(q,sizeofW(StgFetchMe),step); + IF_DEBUG(gc, + belch("@@ evacuate: %p (%s) to %p (%s)", + q, info_type(q), to, info_type(to))); + return to; + + case FETCH_ME_BQ: + ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE); + to = copy(q,sizeofW(StgFetchMeBlockingQueue),step); + IF_DEBUG(gc, + belch("@@ evacuate: %p (%s) to %p (%s)", + q, info_type(q), to, info_type(to))); + return to; +#endif default: barf("evacuate: strange closure type %d", (int)(info->type)); @@ -1433,6 +1582,7 @@ loop: relocate_TSO is called just after a TSO has been copied from src to dest. It adjusts the update frame list for the new location. -------------------------------------------------------------------------- */ +//@cindex relocate_TSO StgTSO * relocate_TSO(StgTSO *src, StgTSO *dest) @@ -1481,6 +1631,11 @@ relocate_TSO(StgTSO *src, StgTSO *dest) return dest; } +//@node Scavenging, Reverting CAFs, Evacuation +//@subsection Scavenging + +//@cindex scavenge_srt + static inline void scavenge_srt(const StgInfoTable *info) { @@ -1548,7 +1703,7 @@ scavengeTSO (StgTSO *tso) scavenging a mutable object where early promotion isn't such a good idea. -------------------------------------------------------------------------- */ - +//@cindex scavenge static void scavenge(step *step) @@ -1582,6 +1737,11 @@ scavenge(step *step) || IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p)))); info = get_itbl((StgClosure *)p); + /* + if (info->type==RBH) + info = REVERT_INFOPTR(info); + */ + switch (info -> type) { case BCO: @@ -1849,8 +2009,72 @@ scavenge(step *step) break; } +#if defined(PAR) + case RBH: // cf. BLACKHOLE_BQ + { + // nat size, ptrs, nonptrs, vhs; + // char str[80]; + // StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str); + StgRBH *rbh = (StgRBH *)p; + (StgClosure *)rbh->blocking_queue = + evacuate((StgClosure *)rbh->blocking_queue); + if (failed_to_evac) { + failed_to_evac = rtsFalse; + recordMutable((StgMutClosure *)rbh); + } + IF_DEBUG(gc, + belch("@@ scavenge: RBH %p (%s) (new blocking_queue link=%p)", + p, info_type(p), (StgClosure *)rbh->blocking_queue)); + // ToDo: use size of reverted closure here! + p += BLACKHOLE_sizeW(); + break; + } + case BLOCKED_FETCH: + { + StgBlockedFetch *bf = (StgBlockedFetch *)p; + /* follow the pointer to the node which is being demanded */ + (StgClosure *)bf->node = + evacuate((StgClosure *)bf->node); + /* follow the link to the rest of the blocking queue */ + (StgClosure *)bf->link = + evacuate((StgClosure *)bf->link); + if (failed_to_evac) { + failed_to_evac = rtsFalse; + recordMutable((StgMutClosure *)bf); + } + IF_DEBUG(gc, + belch("@@ scavenge: %p (%s); node is now %p; exciting, isn't it", + bf, info_type((StgClosure *)bf), + bf->node, info_type(bf->node))); + p += sizeofW(StgBlockedFetch); + break; + } + case FETCH_ME: + IF_DEBUG(gc, + belch("@@ scavenge: HWL claims nothing to do for %p (%s)", + p, info_type((StgClosure *)p))); + p += sizeofW(StgFetchMe); + break; // nothing to do in this case + + case FETCH_ME_BQ: // cf. BLACKHOLE_BQ + { + StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p; + (StgClosure *)fmbq->blocking_queue = + evacuate((StgClosure *)fmbq->blocking_queue); + if (failed_to_evac) { + failed_to_evac = rtsFalse; + recordMutable((StgMutClosure *)fmbq); + } + IF_DEBUG(gc, + belch("@@ scavenge: %p (%s) exciting, isn't it", + p, info_type((StgClosure *)p))); + p += sizeofW(StgFetchMeBlockingQueue); + break; + } +#endif + case EVACUATED: barf("scavenge: unimplemented/strange closure type\n"); @@ -1879,6 +2103,8 @@ scavenge(step *step) because they contain old-to-new generation pointers. Only certain objects can have this property. -------------------------------------------------------------------------- */ +//@cindex scavenge_one + static rtsBool scavenge_one(StgClosure *p) { @@ -1890,6 +2116,11 @@ scavenge_one(StgClosure *p) info = get_itbl(p); + /* ngoq moHqu'! + if (info->type==RBH) + info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure + */ + switch (info -> type) { case FUN: @@ -1976,6 +2207,7 @@ scavenge_one(StgClosure *p) generations older than the one being collected) as roots. We also remove non-mutable objects from the mutable list at this point. -------------------------------------------------------------------------- */ +//@cindex scavenge_mut_once_list static void scavenge_mut_once_list(generation *gen) @@ -1997,6 +2229,10 @@ scavenge_mut_once_list(generation *gen) || IS_HUGS_CONSTR_INFO(GET_INFO(p)))); info = get_itbl(p); + /* + if (info->type==RBH) + info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure + */ switch(info->type) { case IND_OLDGEN: @@ -2008,7 +2244,8 @@ scavenge_mut_once_list(generation *gen) ((StgIndOldGen *)p)->indirectee = evacuate(((StgIndOldGen *)p)->indirectee); -#if 0 +#ifdef DEBUG + if (RtsFlags.DebugFlags.gc) /* Debugging code to print out the size of the thing we just * promoted */ @@ -2107,6 +2344,7 @@ scavenge_mut_once_list(generation *gen) gen->mut_once_list = new_list; } +//@cindex scavenge_mutable_list static void scavenge_mutable_list(generation *gen) @@ -2127,6 +2365,10 @@ scavenge_mutable_list(generation *gen) || IS_HUGS_CONSTR_INFO(GET_INFO(p)))); info = get_itbl(p); + /* + if (info->type==RBH) + info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure + */ switch(info->type) { case MUT_ARR_PTRS_FROZEN: @@ -2136,6 +2378,10 @@ scavenge_mutable_list(generation *gen) { StgPtr end, q; + IF_DEBUG(gc, + belch("@@ scavenge_mut_list: scavenging MUT_ARR_PTRS_FROZEN %p; size: %#x ; next: %p", + p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p), p->mut_link)); + end = (P_)p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); evac_gen = gen->no; for (q = (P_)((StgMutArrPtrs *)p)->payload; q < end; q++) { @@ -2158,6 +2404,10 @@ scavenge_mutable_list(generation *gen) { StgPtr end, q; + IF_DEBUG(gc, + belch("@@ scavenge_mut_list: scavenging MUT_ARR_PTRS %p; size: %#x ; next: %p", + p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p), p->mut_link)); + end = (P_)p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); for (q = (P_)((StgMutArrPtrs *)p)->payload; q < end; q++) { (StgClosure *)*q = evacuate((StgClosure *)*q); @@ -2170,6 +2420,10 @@ scavenge_mutable_list(generation *gen) * it from the mutable list if possible by promoting whatever it * points to. */ + IF_DEBUG(gc, + belch("@@ scavenge_mut_list: scavenging MUT_VAR %p; var: %p ; next: %p", + p, ((StgMutVar *)p)->var, p->mut_link)); + ASSERT(p->header.info != &MUT_CONS_info); ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var); p->mut_link = gen->mut_list; @@ -2179,6 +2433,11 @@ scavenge_mutable_list(generation *gen) case MVAR: { StgMVar *mvar = (StgMVar *)p; + + IF_DEBUG(gc, + belch("@@ scavenge_mut_list: scavenging MAVR %p; head: %p; tail: %p; value: %p ; next: %p", + mvar, mvar->head, mvar->tail, mvar->value, p->mut_link)); + (StgClosure *)mvar->head = evacuate((StgClosure *)mvar->head); (StgClosure *)mvar->tail = evacuate((StgClosure *)mvar->tail); (StgClosure *)mvar->value = evacuate((StgClosure *)mvar->value); @@ -2205,6 +2464,11 @@ scavenge_mutable_list(generation *gen) case BLACKHOLE_BQ: { StgBlockingQueue *bh = (StgBlockingQueue *)p; + + IF_DEBUG(gc, + belch("@@ scavenge_mut_list: scavenging BLACKHOLE_BQ (%p); next: %p", + p, p->mut_link)); + (StgClosure *)bh->blocking_queue = evacuate((StgClosure *)bh->blocking_queue); p->mut_link = gen->mut_list; @@ -2233,6 +2497,8 @@ scavenge_mutable_list(generation *gen) } continue; + // HWL: old PAR code deleted here + default: /* shouldn't have anything else on the mutables list */ barf("scavenge_mutable_list: strange object? %d", (int)(info->type)); @@ -2240,6 +2506,8 @@ scavenge_mutable_list(generation *gen) } } +//@cindex scavenge_static + static void scavenge_static(void) { @@ -2255,7 +2523,10 @@ scavenge_static(void) while (p != END_OF_STATIC_LIST) { info = get_itbl(p); - + /* + if (info->type==RBH) + info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure + */ /* make sure the info pointer is into text space */ ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO(p)) || IS_HUGS_CONSTR_INFO(GET_INFO(p)))); @@ -2324,6 +2595,7 @@ scavenge_static(void) objects pointed to by it. We can use the same code for walking PAPs, since these are just sections of copied stack. -------------------------------------------------------------------------- */ +//@cindex scavenge_stack static void scavenge_stack(StgPtr p, StgPtr stack_end) @@ -2332,6 +2604,8 @@ scavenge_stack(StgPtr p, StgPtr stack_end) const StgInfoTable* info; StgWord32 bitmap; + IF_DEBUG(sanity, belch(" scavenging stack between %p and %p", p, stack_end)); + /* * Each time around this loop, we are looking at a chunk of stack * that starts with either a pending argument section or an @@ -2380,8 +2654,18 @@ scavenge_stack(StgPtr p, StgPtr stack_end) /* probably a slow-entry point return address: */ case FUN: case FUN_STATIC: - p++; + { +#if 0 + StgPtr old_p = p; + p++; p++; + IF_DEBUG(sanity, + belch("HWL: scavenge_stack: FUN(_STATIC) adjusting p from %p to %p (instead of %p)", + old_p, p, old_p+1)); +#else + p++; /* what if FHS!=1 !? -- HWL */ +#endif goto follow_srt; + } /* Specialised code for update frames, since they're so common. * We *know* the updatee points to a BLACKHOLE, CAF_BLACKHOLE, @@ -2436,14 +2720,29 @@ scavenge_stack(StgPtr p, StgPtr stack_end) } /* small bitmap (< 32 entries, or 64 on a 64-bit machine) */ - case RET_BCO: - case RET_SMALL: - case RET_VEC_SMALL: case STOP_FRAME: case CATCH_FRAME: case SEQ_FRAME: + { + StgPtr old_p = p; // debugging only -- HWL + /* stack frames like these are ordinary closures and therefore may + contain setup-specific fixed-header words (as in GranSim!); + therefore, these cases should not use p++ but &(p->payload) -- HWL */ + IF_DEBUG(gran, IF_DEBUG(sanity, printObj(p))); + bitmap = info->layout.bitmap; + + p = (StgPtr)&(((StgClosure *)p)->payload); + IF_DEBUG(sanity, + belch("HWL: scavenge_stack: (STOP|CATCH|SEQ)_FRAME adjusting p from %p to %p (instead of %p)", + old_p, p, old_p+1)); + goto small_bitmap; + } + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: bitmap = info->layout.bitmap; p++; + /* this assumes that the payload starts immediately after the info-ptr */ small_bitmap: while (bitmap != 0) { if ((bitmap & 1) == 0) { @@ -2504,6 +2803,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end) objects are (repeatedly) mutable, so most of the time evac_gen will be zero. --------------------------------------------------------------------------- */ +//@cindex scavenge_large static void scavenge_large(step *step) @@ -2580,6 +2880,7 @@ scavenge_large(step *step) case TSO: scavengeTSO((StgTSO *)p); + // HWL: old PAR code deleted here continue; default: @@ -2588,6 +2889,8 @@ scavenge_large(step *step) } } +//@cindex zero_static_object_list + static void zero_static_object_list(StgClosure* first_static) { @@ -2610,6 +2913,8 @@ zero_static_object_list(StgClosure* first_static) * It doesn't do any harm to zero all the mutable link fields on the * mutable list. */ +//@cindex zero_mutable_list + static void zero_mutable_list( StgMutClosure *first ) { @@ -2621,9 +2926,13 @@ zero_mutable_list( StgMutClosure *first ) } } +//@node Reverting CAFs, Sanity code for CAF garbage collection, Scavenging +//@subsection Reverting CAFs + /* ----------------------------------------------------------------------------- Reverting CAFs -------------------------------------------------------------------------- */ +//@cindex RevertCAFs void RevertCAFs(void) { @@ -2639,6 +2948,8 @@ void RevertCAFs(void) enteredCAFs = END_CAF_LIST; } +//@cindex revert_dead_CAFs + void revert_dead_CAFs(void) { StgCAF* caf = enteredCAFs; @@ -2660,6 +2971,9 @@ void revert_dead_CAFs(void) } } +//@node Sanity code for CAF garbage collection, Lazy black holing, Reverting CAFs +//@subsection Sanity code for CAF garbage collection + /* ----------------------------------------------------------------------------- Sanity code for CAF garbage collection. @@ -2673,6 +2987,8 @@ void revert_dead_CAFs(void) -------------------------------------------------------------------------- */ #ifdef DEBUG +//@cindex gcCAFs + static void gcCAFs(void) { @@ -2710,6 +3026,9 @@ gcCAFs(void) } #endif +//@node Lazy black holing, Stack squeezing, Sanity code for CAF garbage collection +//@subsection Lazy black holing + /* ----------------------------------------------------------------------------- Lazy black holing. @@ -2717,6 +3036,7 @@ gcCAFs(void) some work, we have to run down the stack and black-hole all the closures referred to by update frames. -------------------------------------------------------------------------- */ +//@cindex threadLazyBlackHole static void threadLazyBlackHole(StgTSO *tso) @@ -2772,6 +3092,9 @@ threadLazyBlackHole(StgTSO *tso) } } +//@node Stack squeezing, Pausing a thread, Lazy black holing +//@subsection Stack squeezing + /* ----------------------------------------------------------------------------- * Stack squeezing * @@ -2779,6 +3102,7 @@ threadLazyBlackHole(StgTSO *tso) * lazy black holing here. * * -------------------------------------------------------------------------- */ +//@cindex threadSqueezeStack static void threadSqueezeStack(StgTSO *tso) @@ -2789,6 +3113,14 @@ threadSqueezeStack(StgTSO *tso) StgUpdateFrame *prev_frame; /* Temporally previous */ StgPtr bottom; rtsBool prev_was_update_frame; +#if DEBUG + StgUpdateFrame *top_frame; + nat upd_frames=0, stop_frames=0, catch_frames=0, seq_frames=0, + bhs=0, squeezes=0; + void printObj( StgClosure *obj ); // from Printer.c + + top_frame = tso->su; +#endif bottom = &(tso->stack[tso->stack_size]); frame = tso->su; @@ -2814,6 +3146,30 @@ threadSqueezeStack(StgTSO *tso) frame->link = next_frame; next_frame = frame; frame = prev_frame; +#if DEBUG + IF_DEBUG(sanity, + if (!(frame>=top_frame && frame<=bottom)) { + printObj((StgClosure *)prev_frame); + barf("threadSqueezeStack: current frame is rubbish %p; previous was %p\n", + frame, prev_frame); + }) + switch (get_itbl(frame)->type) { + case UPDATE_FRAME: upd_frames++; + if (frame->updatee->header.info == &BLACKHOLE_info) + bhs++; + break; + case STOP_FRAME: stop_frames++; + break; + case CATCH_FRAME: catch_frames++; + break; + case SEQ_FRAME: seq_frames++; + break; + default: + barf("Found non-frame during stack squeezing at %p (prev frame was %p)\n", + frame, prev_frame); + printObj((StgClosure *)prev_frame); + } +#endif if (get_itbl(frame)->type == UPDATE_FRAME && frame->updatee->header.info == &BLACKHOLE_info) { break; @@ -2863,8 +3219,9 @@ threadSqueezeStack(StgTSO *tso) StgClosure *updatee_keep = prev_frame->updatee; StgClosure *updatee_bypass = frame->updatee; -#if 0 /* DEBUG */ - fprintf(stderr, "squeezing frame at %p\n", frame); +#if DEBUG + IF_DEBUG(gc, fprintf(stderr, "@@ squeezing frame at %p\n", frame)); + squeezes++; #endif /* Deal with blocking queues. If both updatees have blocked @@ -2949,9 +3306,10 @@ threadSqueezeStack(StgTSO *tso) else next_frame_bottom = tso->sp - 1; -#if 0 /* DEBUG */ - fprintf(stderr, "sliding [%p, %p] by %ld\n", sp, next_frame_bottom, - displacement); +#if DEBUG + IF_DEBUG(gc, + fprintf(stderr, "sliding [%p, %p] by %ld\n", sp, next_frame_bottom, + displacement)) #endif while (sp >= next_frame_bottom) { @@ -2965,8 +3323,16 @@ threadSqueezeStack(StgTSO *tso) tso->sp += displacement; tso->su = prev_frame; +#if DEBUG + IF_DEBUG(gc, + fprintf(stderr, "@@ threadSqueezeStack: squeezed %d update-frames; found %d BHs; found %d update-, %d stop-, %d catch, %d seq-frames\n", + squeezes, bhs, upd_frames, stop_frames, catch_frames, seq_frames)) +#endif } +//@node Pausing a thread, Index, Stack squeezing +//@subsection Pausing a thread + /* ----------------------------------------------------------------------------- * Pausing a thread * @@ -2974,6 +3340,7 @@ threadSqueezeStack(StgTSO *tso) * here. We also take the opportunity to do stack squeezing if it's * turned on. * -------------------------------------------------------------------------- */ +//@cindex threadPaused void threadPaused(StgTSO *tso) @@ -2983,3 +3350,83 @@ threadPaused(StgTSO *tso) else threadLazyBlackHole(tso); } + +#if DEBUG +//@cindex printMutOnceList +void +printMutOnceList(generation *gen) +{ + const StgInfoTable *info; + StgMutClosure *p, *next, *new_list; + + p = gen->mut_once_list; + new_list = END_MUT_LIST; + next = p->mut_link; + + evac_gen = gen->no; + failed_to_evac = rtsFalse; + + fprintf(stderr, "@@ Mut once list %p: ", gen->mut_once_list); + for (; p != END_MUT_LIST; p = next, next = p->mut_link) { + fprintf(stderr, "%p (%s), ", + p, info_type((StgClosure *)p)); + } + fputc('\n', stderr); +} + +//@cindex printMutableList +void +printMutableList(generation *gen) +{ + const StgInfoTable *info; + StgMutClosure *p, *next; + + p = gen->saved_mut_list; + next = p->mut_link; + + evac_gen = 0; + failed_to_evac = rtsFalse; + + fprintf(stderr, "@@ Mutable list %p: ", gen->saved_mut_list); + for (; p != END_MUT_LIST; p = next, next = p->mut_link) { + fprintf(stderr, "%p (%s), ", + p, info_type((StgClosure *)p)); + } + fputc('\n', stderr); +} +#endif /* DEBUG */ + +//@node Index, , Pausing a thread +//@subsection Index + +//@index +//* GarbageCollect:: @cindex\s-+GarbageCollect +//* MarkRoot:: @cindex\s-+MarkRoot +//* RevertCAFs:: @cindex\s-+RevertCAFs +//* addBlock:: @cindex\s-+addBlock +//* cleanup_weak_ptr_list:: @cindex\s-+cleanup_weak_ptr_list +//* copy:: @cindex\s-+copy +//* copyPart:: @cindex\s-+copyPart +//* evacuate:: @cindex\s-+evacuate +//* evacuate_large:: @cindex\s-+evacuate_large +//* gcCAFs:: @cindex\s-+gcCAFs +//* isAlive:: @cindex\s-+isAlive +//* mkMutCons:: @cindex\s-+mkMutCons +//* relocate_TSO:: @cindex\s-+relocate_TSO +//* revert_dead_CAFs:: @cindex\s-+revert_dead_CAFs +//* scavenge:: @cindex\s-+scavenge +//* scavenge_large:: @cindex\s-+scavenge_large +//* scavenge_mut_once_list:: @cindex\s-+scavenge_mut_once_list +//* scavenge_mutable_list:: @cindex\s-+scavenge_mutable_list +//* scavenge_one:: @cindex\s-+scavenge_one +//* scavenge_srt:: @cindex\s-+scavenge_srt +//* scavenge_stack:: @cindex\s-+scavenge_stack +//* scavenge_static:: @cindex\s-+scavenge_static +//* threadLazyBlackHole:: @cindex\s-+threadLazyBlackHole +//* threadPaused:: @cindex\s-+threadPaused +//* threadSqueezeStack:: @cindex\s-+threadSqueezeStack +//* traverse_weak_ptr_list:: @cindex\s-+traverse_weak_ptr_list +//* upd_evacuee:: @cindex\s-+upd_evacuee +//* zero_mutable_list:: @cindex\s-+zero_mutable_list +//* zero_static_object_list:: @cindex\s-+zero_static_object_list +//@end index diff --git a/ghc/rts/GC.h b/ghc/rts/GC.h index dc7beb8..212620e 100644 --- a/ghc/rts/GC.h +++ b/ghc/rts/GC.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: GC.h,v 1.4 1999/02/05 16:02:43 simonm Exp $ + * $Id: GC.h,v 1.5 2000/01/13 14:34:03 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -9,3 +9,4 @@ void threadPaused(StgTSO *); StgClosure *isAlive(StgClosure *p); +void GarbageCollect(void (*get_roots)(void)); diff --git a/ghc/rts/Hash.h b/ghc/rts/Hash.h index ac0df5c..74ff321 100644 --- a/ghc/rts/Hash.h +++ b/ghc/rts/Hash.h @@ -1,5 +1,5 @@ /*----------------------------------------------------------------------------- - * $Id: Hash.h,v 1.1 1999/01/27 12:11:26 simonm Exp $ + * $Id: Hash.h,v 1.2 2000/01/13 14:34:03 hwloidl Exp $ * * (c) The GHC Team, 1999 * @@ -14,3 +14,4 @@ void insertHashTable ( HashTable *table, StgWord key, void *data ); void * removeHashTable ( HashTable *table, StgWord key, void *data ); void freeHashTable ( HashTable *table, void (*freeDataFun)(void *) ); HashTable * allocHashTable ( void ); + diff --git a/ghc/rts/HeapStackCheck.hc b/ghc/rts/HeapStackCheck.hc index fc29ba7..1a30f44 100644 --- a/ghc/rts/HeapStackCheck.hc +++ b/ghc/rts/HeapStackCheck.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: HeapStackCheck.hc,v 1.10 1999/11/09 15:46:51 simonmar Exp $ + * $Id: HeapStackCheck.hc,v 1.11 2000/01/13 14:34:03 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -276,6 +276,334 @@ EXTFUN(stg_gc_enter_8) FE_ } +#if defined(GRAN) +/* + ToDo: merge the block and yield macros, calling something like BLOCK(N) + at the end; +*/ + +/* + Should we actually ever do a yield in such a case?? -- HWL +*/ +EXTFUN(gran_yield_0) +{ + FB_ + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadYielding; + JMP_(StgReturn); + FE_ +} + +EXTFUN(gran_yield_1) +{ + FB_ + Sp -= 1; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadYielding; + JMP_(StgReturn); + FE_ +} + +/*- 2 Regs--------------------------------------------------------------------*/ + +EXTFUN(gran_yield_2) +{ + FB_ + Sp -= 2; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadYielding; + JMP_(StgReturn); + FE_ +} + +/*- 3 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_3) +{ + FB_ + Sp -= 3; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadYielding; + JMP_(StgReturn); + FE_ +} + +/*- 4 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_4) +{ + FB_ + Sp -= 4; + Sp[3] = R4.w; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadYielding; + JMP_(StgReturn); + FE_ +} + +/*- 5 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_5) +{ + FB_ + Sp -= 5; + Sp[4] = R5.w; + Sp[3] = R4.w; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadYielding; + JMP_(StgReturn); + FE_ +} + +/*- 6 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_6) +{ + FB_ + Sp -= 6; + Sp[5] = R6.w; + Sp[4] = R5.w; + Sp[3] = R4.w; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadYielding; + JMP_(StgReturn); + FE_ +} + +/*- 7 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_7) +{ + FB_ + Sp -= 7; + Sp[6] = R7.w; + Sp[5] = R6.w; + Sp[4] = R5.w; + Sp[3] = R4.w; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadYielding; + JMP_(StgReturn); + FE_ +} + +/*- 8 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_8) +{ + FB_ + Sp -= 8; + Sp[7] = R8.w; + Sp[6] = R7.w; + Sp[5] = R6.w; + Sp[4] = R5.w; + Sp[3] = R4.w; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadYielding; + JMP_(StgReturn); + FE_ +} + +// the same routines but with a block rather than a yield + +EXTFUN(gran_block_1) +{ + FB_ + Sp -= 1; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadBlocked; + JMP_(StgReturn); + FE_ +} + +/*- 2 Regs--------------------------------------------------------------------*/ + +EXTFUN(gran_block_2) +{ + FB_ + Sp -= 2; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadBlocked; + JMP_(StgReturn); + FE_ +} + +/*- 3 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_3) +{ + FB_ + Sp -= 3; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadBlocked; + JMP_(StgReturn); + FE_ +} + +/*- 4 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_4) +{ + FB_ + Sp -= 4; + Sp[3] = R4.w; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadBlocked; + JMP_(StgReturn); + FE_ +} + +/*- 5 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_5) +{ + FB_ + Sp -= 5; + Sp[4] = R5.w; + Sp[3] = R4.w; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadBlocked; + JMP_(StgReturn); + FE_ +} + +/*- 6 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_6) +{ + FB_ + Sp -= 6; + Sp[5] = R6.w; + Sp[4] = R5.w; + Sp[3] = R4.w; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadBlocked; + JMP_(StgReturn); + FE_ +} + +/*- 7 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_7) +{ + FB_ + Sp -= 7; + Sp[6] = R7.w; + Sp[5] = R6.w; + Sp[4] = R5.w; + Sp[3] = R4.w; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadBlocked; + JMP_(StgReturn); + FE_ +} + +/*- 8 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_8) +{ + FB_ + Sp -= 8; + Sp[7] = R8.w; + Sp[6] = R7.w; + Sp[5] = R6.w; + Sp[4] = R5.w; + Sp[3] = R4.w; + Sp[2] = R3.w; + Sp[1] = R2.w; + Sp[0] = R1.w; + SaveThreadState(); + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadBlocked; + JMP_(StgReturn); + FE_ +} + +#endif + +#if 0 && defined(PAR) + +/* + Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the + saving of the thread state from the actual jump via an StgReturn. + We need this separation because we call RTS routines in blocking entry codes + before jumping back into the RTS (see parallel/FetchMe.hc). +*/ + +EXTFUN(par_block_1_no_jump) +{ + FB_ + Sp -= 1; + Sp[0] = R1.w; + SaveThreadState(); + FE_ +} + +EXTFUN(par_jump) +{ + FB_ + CurrentTSO->whatNext = ThreadEnterGHC; + R1.i = ThreadBlocked; + JMP_(StgReturn); + FE_ +} + +#endif + /* ----------------------------------------------------------------------------- For a case expression on a polymorphic or function-typed object, if the default branch (there can only be one branch) of the case fails diff --git a/ghc/rts/Main.c b/ghc/rts/Main.c index 09e6e21..1c721b7 100644 --- a/ghc/rts/Main.c +++ b/ghc/rts/Main.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Main.c,v 1.13 2000/01/13 12:40:15 simonmar Exp $ + * $Id: Main.c,v 1.14 2000/01/13 14:34:03 hwloidl Exp $ * * (c) The GHC Team 1998-1999 * @@ -16,21 +16,25 @@ #include "RtsUtils.h" #ifdef DEBUG -#include "Printer.h" /* for printing */ +# include "Printer.h" /* for printing */ #endif #ifdef INTERPRETER -#include "Assembler.h" +# include "Assembler.h" #endif #ifdef PAR -#include "ParInit.h" -#include "Parallel.h" -#include "LLC.h" +# include "ParInit.h" +# include "Parallel.h" +# include "LLC.h" +#endif + +#if defined(GRAN) || defined(PAR) +# include "GranSimRts.h" #endif #ifdef HAVE_WINDOWS_H -#include +# include #endif @@ -41,24 +45,65 @@ int main(int argc, char *argv[]) { int exit_status; - SchedulerStatus status; + /* all GranSim/GUM init is done in startupHaskell; sets IAmMainThread! */ + startupHaskell(argc,argv); -# ifndef PAR - /* ToDo: want to start with a larger stack size */ - status = rts_evalIO((StgClosure *)&mainIO_closure, NULL); -# else + /* kick off the computation by creating the main thread with a pointer + to mainIO_closure representing the computation of the overall program; + then enter the scheduler with this thread and off we go; + + the same for GranSim (we have only one instance of this code) + + in a parallel setup, where we have many instances of this code + running on different PEs, we should do this only for the main PE + (IAmMainThread is set in startupHaskell) + */ + +# if defined(PAR) + +# if DEBUG + { /* a wait loop to allow attachment of gdb to UNIX threads */ + nat i, j, s; + + for (i=0, s=0; ihead != (StgTSO *)&END_TSO_QUEUE_closure) { ASSERT(mvar->head->why_blocked == BlockedOnMVar); +#if defined(GRAN) +# error FixME +#elif defined(PAR) + // ToDo: check 2nd arg (mvar) is right + mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar); +#else mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head); +#endif if (mvar->head == (StgTSO *)&END_TSO_QUEUE_closure) { mvar->tail = (StgTSO *)&END_TSO_QUEUE_closure; } diff --git a/ghc/rts/Printer.c b/ghc/rts/Printer.c index cbb20dd..600d0a2 100644 --- a/ghc/rts/Printer.c +++ b/ghc/rts/Printer.c @@ -1,6 +1,5 @@ - /* ----------------------------------------------------------------------------- - * $Id: Printer.c,v 1.18 1999/11/29 18:59:46 sewardj Exp $ + * $Id: Printer.c,v 1.19 2000/01/13 14:34:04 hwloidl Exp $ * * Copyright (c) 1994-1999. * @@ -18,6 +17,12 @@ #include "Bytecodes.h" /* for InstrPtr */ #include "Disassembler.h" +#include "Printer.h" + +// HWL: explicit fixed header size to make debugging easier +int fixed_hs = FIXED_HS, itbl_sz = sizeofW(StgInfoTable), + uf_sz=sizeofW(StgUpdateFrame), sf_sz=sizeofW(StgSeqFrame); + /* -------------------------------------------------------------------------- * local function decls * ------------------------------------------------------------------------*/ @@ -198,6 +203,14 @@ void printClosure( StgClosure *obj ) fprintf(stderr,")\n"); break; +#if defined(GRAN) || defined(PAR) + case RBH: + fprintf(stderr,"RBH("); + printPtr((StgPtr)stgCast(StgRBH*,obj)->blocking_queue); + fprintf(stderr,")\n"); + break; +#endif + case CONSTR: case CONSTR_1_0: case CONSTR_0_1: case CONSTR_1_1: case CONSTR_0_2: case CONSTR_2_0: @@ -305,6 +318,13 @@ void printClosure( StgClosure *obj ) } } +/* +void printGraph( StgClosure *obj ) +{ + printClosure(obj); +} +*/ + StgPtr printStackObj( StgPtr sp ) { /*fprintf(stderr,"Stack[%d] = ", &stgStack[STACK_SIZE] - sp); */ @@ -678,7 +698,7 @@ static void printZcoded( const char *raw ) /* Causing linking trouble on Win32 plats, so I'm disabling this for now. */ -#if defined(HAVE_BFD_H) && !defined(_WIN32) +#if defined(HAVE_BFD_H) && !defined(_WIN32) && defined(USE_BSD) #include diff --git a/ghc/rts/RtsFlags.c b/ghc/rts/RtsFlags.c index c3c0515..8324e1a 100644 --- a/ghc/rts/RtsFlags.c +++ b/ghc/rts/RtsFlags.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: RtsFlags.c,v 1.23 2000/01/13 12:40:15 simonmar Exp $ + * $Id: RtsFlags.c,v 1.24 2000/01/13 14:34:04 hwloidl Exp $ * * (c) The AQUA Project, Glasgow University, 1994-1997 * (c) The GHC Team, 1998-1999 @@ -8,6 +8,19 @@ * * ---------------------------------------------------------------------------*/ +//@menu +//* Includes:: +//* Constants:: +//* Static function decls:: +//* Command-line option parsing routines:: +//* GranSim specific options:: +//* Aux fcts:: +//@end menu +//*/ + +//@node Includes, Constants +//@subsection Includes + #include "Rts.h" #include "RtsFlags.h" #include "RtsUtils.h" @@ -36,12 +49,100 @@ char **prog_argv = NULL; int rts_argc; /* ditto */ char *rts_argv[MAX_RTS_ARGS]; +//@node Constants, Static function decls, Includes +//@subsection Constants + /* * constants, used later */ #define RTS 1 #define PGM 0 +#if defined(GRAN) + +char *gran_debug_opts_strs[] = { + "DEBUG (-bDe, -bD1): event_trace; printing event trace.\n", + "DEBUG (-bDE, -bD2): event_stats; printing event statistics.\n", + "DEBUG (-bDb, -bD4): bq; check blocking queues\n", + "DEBUG (-bDG, -bD8): pack; routines for (un-)packing graph structures.\n", + "DEBUG (-bDq, -bD16): checkSparkQ; check consistency of the spark queues.\n", + "DEBUG (-bDf, -bD32): thunkStealing; print forwarding of fetches.\n", + "DEBUG (-bDr, -bD64): randomSteal; stealing sparks/threads from random PEs.\n", + "DEBUG (-bDF, -bD128): findWork; searching spark-pools (local & remote), thread queues for work.\n", + "DEBUG (-bDu, -bD256): unused; currently unused flag.\n", + "DEBUG (-bDS, -bD512): pri; priority sparking or scheduling.\n", + "DEBUG (-bD:, -bD1024): checkLight; check GranSim-Light setup.\n", + "DEBUG (-bDo, -bD2048): sortedQ; check whether spark/thread queues are sorted.\n", + "DEBUG (-bDz, -bD4096): blockOnFetch; check for blocked on fetch.\n", + "DEBUG (-bDP, -bD8192): packBuffer; routines handling pack buffer (GranSim internal!).\n", + "DEBUG (-bDt, -bD16384): blockOnFetch_sanity; check for TSO asleep on fetch.\n", +}; + +/* one character codes for the available debug options */ +char gran_debug_opts_flags[] = { + 'e', 'E', 'b', 'G', 'q', 'f', 'r', 'F', 'u', 'S', ':', 'o', 'z', 'P', 't' +}; + +/* prefix strings printed with the debug messages of the corresponding type */ +char *gran_debug_opts_prefix[] = { + "", /* event_trace */ + "", /* event_stats */ + "##", /* bq */ + "**", /* pack */ + "^^", /* checkSparkQ */ + "==", /* thunkStealing */ + "^^", /* randomSteal */ + "+-", /* findWork */ + "", /* unused */ + "++", /* pri */ + "::", /* checkLight */ + "##", /* sortedQ */ + "", /* blockOnFetch */ + "", /* packBuffer */ + "" /* blockOnFetch_sanity */ +}; + +#elif defined(PAR) + +char *par_debug_opts_strs[] = { + "DEBUG (-qDv, -qD1): verbose; be generally verbose with parallel related stuff.\n", + "DEBUG (-qDt, -qD2): trace; trace messages.\n", + "DEBUG (-qDs, -qD4): schedule; scheduling of parallel threads.\n", + "DEBUG (-qDe, -qD8): free; free messages.\n", + "DEBUG (-qDr, -qD16): resume; resume messages.\n", + "DEBUG (-qDw, -qD32): weight; print weights for GC.\n", + "DEBUG (-qDF, -qD64): fetch; fetch messages.\n", + "DEBUG (-qDa, -qD128): ack; ack messages.\n", + "DEBUG (-qDf, -qD256): fish; fish messages.\n", + "DEBUG (-qDo, -qD512): forward; forwarding messages to other PEs.\n", + "DEBUG (-qDp, -qD1024): pack; packing and unpacking graphs.\n" +}; + +/* one character codes for the available debug options */ +char par_debug_opts_flags[] = { + 'v', 't', 's', 'e', 'r', 'w', 'F', 'a', 'f', 'o', 'p' +}; + +/* prefix strings printed with the debug messages of the corresponding type */ +char *par_debug_opts_prefix[] = { + " ", /* verbose */ + "..", /* trace */ + "--", /* schedule */ + "!!", /* free */ + "[]", /* resume */ + ";;", /* weight */ + "%%", /* fetch */ + ",,", /* ack */ + "$$", /* fish */ + "", /* forward */ + "**" /* pack */ +}; + +#endif /* PAR */ + +//@node Static function decls, Command-line option parsing routines, Constants +//@subsection Static function decls + /* ----------------------------------------------------------------------------- Static function decls -------------------------------------------------------------------------- */ @@ -56,6 +157,20 @@ open_stats_file ( static I_ decode(const char *s); static void bad_option(const char *s); +#if defined(GRAN) +static void enable_GranSimLight(void); +static void process_gran_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error); +static void set_GranSim_debug_options(nat n); +static void help_GranSim_debug_options(nat n); +#elif defined(PAR) +static void process_par_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error); +static void set_par_debug_options(nat n); +static void help_par_debug_options(nat n); +#endif + +//@node Command-line option parsing routines, GranSim specific options, Static function decls +//@subsection Command-line option parsing routines + /* ----------------------------------------------------------------------------- * Command-line option parsing routines. * ---------------------------------------------------------------------------*/ @@ -109,82 +224,92 @@ void initRtsFlagsDefaults(void) #endif #ifdef PAR - RtsFlags.ParFlags.parallelStats = rtsFalse; - RtsFlags.ParFlags.granSimStats = rtsFalse; - RtsFlags.ParFlags.granSimStats_Binary = rtsFalse; + RtsFlags.ParFlags.ParStats.Full = rtsFalse; + RtsFlags.ParFlags.ParStats.Binary = rtsFalse; + RtsFlags.ParFlags.ParStats.Sparks = rtsFalse; + RtsFlags.ParFlags.ParStats.Heap = rtsFalse; + RtsFlags.ParFlags.ParStats.NewLogfile = rtsFalse; + RtsFlags.ParFlags.ParStats.Global = rtsFalse; + RtsFlags.ParFlags.outputDisabled = rtsFalse; RtsFlags.ParFlags.packBufferSize = 1024; + + RtsFlags.ParFlags.maxThreads = 1024; + RtsFlags.ParFlags.maxFishes = MAX_FISHES; + RtsFlags.ParFlags.fishDelay = FISH_DELAY; #endif #if defined(PAR) || defined(SMP) RtsFlags.ParFlags.maxLocalSparks = 4096; -#endif +#endif /* PAR || SMP */ + +#if defined(GRAN) + /* ToDo: check defaults for GranSim and GUM */ + RtsFlags.ConcFlags.ctxtSwitchTime = CS_MIN_MILLISECS; /* In milliseconds */ + RtsFlags.ConcFlags.maxThreads = 65536; // refers to mandatory threads + RtsFlags.GcFlags.maxStkSize = (1024 * 1024) / sizeof(W_); + RtsFlags.GcFlags.initialStkSize = 1024 / sizeof(W_); + + RtsFlags.GranFlags.GranSimStats.Full = rtsFalse; + RtsFlags.GranFlags.GranSimStats.Suppressed = rtsFalse; + RtsFlags.GranFlags.GranSimStats.Binary = rtsFalse; + RtsFlags.GranFlags.GranSimStats.Sparks = rtsFalse; + RtsFlags.GranFlags.GranSimStats.Heap = rtsFalse; + RtsFlags.GranFlags.GranSimStats.NewLogfile = rtsFalse; + RtsFlags.GranFlags.GranSimStats.Global = rtsFalse; -#ifdef GRAN - RtsFlags.GranFlags.granSimStats = rtsFalse; - RtsFlags.GranFlags.granSimStats_suppressed = rtsFalse; - RtsFlags.GranFlags.granSimStats_Binary = rtsFalse; - RtsFlags.GranFlags.granSimStats_Sparks = rtsFalse; - RtsFlags.GranFlags.granSimStats_Heap = rtsFalse; - RtsFlags.GranFlags.labelling = rtsFalse; RtsFlags.GranFlags.packBufferSize = 1024; RtsFlags.GranFlags.packBufferSize_internal = GRANSIM_DEFAULT_PACK_BUFFER_SIZE; - RtsFlags.GranFlags.proc = MAX_PROC; - RtsFlags.GranFlags.max_fishes = MAX_FISHES; - RtsFlags.GranFlags.time_slice = GRAN_TIME_SLICE; - RtsFlags.GranFlags.Light = rtsFalse; - - RtsFlags.GranFlags.gran_latency = LATENCY; - RtsFlags.GranFlags.gran_additional_latency = ADDITIONAL_LATENCY; - RtsFlags.GranFlags.gran_fetchtime = FETCHTIME; - RtsFlags.GranFlags.gran_lunblocktime = LOCALUNBLOCKTIME; - RtsFlags.GranFlags.gran_gunblocktime = GLOBALUNBLOCKTIME; - RtsFlags.GranFlags.gran_mpacktime = MSGPACKTIME; - RtsFlags.GranFlags.gran_munpacktime = MSGUNPACKTIME; - RtsFlags.GranFlags.gran_mtidytime = MSGTIDYTIME; - - RtsFlags.GranFlags.gran_threadcreatetime = THREADCREATETIME; - RtsFlags.GranFlags.gran_threadqueuetime = THREADQUEUETIME; - RtsFlags.GranFlags.gran_threaddescheduletime = THREADDESCHEDULETIME; - RtsFlags.GranFlags.gran_threadscheduletime = THREADSCHEDULETIME; - RtsFlags.GranFlags.gran_threadcontextswitchtime = THREADCONTEXTSWITCHTIME; - - RtsFlags.GranFlags.gran_arith_cost = ARITH_COST; - RtsFlags.GranFlags.gran_branch_cost = BRANCH_COST; - RtsFlags.GranFlags.gran_load_cost = LOAD_COST; - RtsFlags.GranFlags.gran_store_cost = STORE_COST; - RtsFlags.GranFlags.gran_float_cost = FLOAT_COST; - - RtsFlags.GranFlags.gran_heapalloc_cost = HEAPALLOC_COST; - - RtsFlags.GranFlags.gran_pri_spark_overhead = PRI_SPARK_OVERHEAD; - RtsFlags.GranFlags.gran_pri_sched_overhead = PRI_SCHED_OVERHEAD; - - RtsFlags.GranFlags.DoFairSchedule = rtsFalse; - RtsFlags.GranFlags.DoReScheduleOnFetch = rtsFalse; - RtsFlags.GranFlags.DoStealThreadsFirst = rtsFalse; - RtsFlags.GranFlags.SimplifiedFetch = rtsFalse; - RtsFlags.GranFlags.DoAlwaysCreateThreads = rtsFalse; - RtsFlags.GranFlags.DoGUMMFetching = rtsFalse; - RtsFlags.GranFlags.DoThreadMigration = rtsFalse; - RtsFlags.GranFlags.FetchStrategy = 2; + RtsFlags.GranFlags.proc = MAX_PROC; + RtsFlags.GranFlags.Fishing = rtsFalse; + RtsFlags.GranFlags.maxFishes = MAX_FISHES; + RtsFlags.GranFlags.time_slice = GRAN_TIME_SLICE; + RtsFlags.GranFlags.Light = rtsFalse; + + RtsFlags.GranFlags.Costs.latency = LATENCY; + RtsFlags.GranFlags.Costs.additional_latency = ADDITIONAL_LATENCY; + RtsFlags.GranFlags.Costs.fetchtime = FETCHTIME; + RtsFlags.GranFlags.Costs.lunblocktime = LOCALUNBLOCKTIME; + RtsFlags.GranFlags.Costs.gunblocktime = GLOBALUNBLOCKTIME; + RtsFlags.GranFlags.Costs.mpacktime = MSGPACKTIME; + RtsFlags.GranFlags.Costs.munpacktime = MSGUNPACKTIME; + RtsFlags.GranFlags.Costs.mtidytime = MSGTIDYTIME; + + RtsFlags.GranFlags.Costs.threadcreatetime = THREADCREATETIME; + RtsFlags.GranFlags.Costs.threadqueuetime = THREADQUEUETIME; + RtsFlags.GranFlags.Costs.threaddescheduletime = THREADDESCHEDULETIME; + RtsFlags.GranFlags.Costs.threadscheduletime = THREADSCHEDULETIME; + RtsFlags.GranFlags.Costs.threadcontextswitchtime = THREADCONTEXTSWITCHTIME; + + RtsFlags.GranFlags.Costs.arith_cost = ARITH_COST; + RtsFlags.GranFlags.Costs.branch_cost = BRANCH_COST; + RtsFlags.GranFlags.Costs.load_cost = LOAD_COST; + RtsFlags.GranFlags.Costs.store_cost = STORE_COST; + RtsFlags.GranFlags.Costs.float_cost = FLOAT_COST; + + RtsFlags.GranFlags.Costs.heapalloc_cost = HEAPALLOC_COST; + + RtsFlags.GranFlags.Costs.pri_spark_overhead = PRI_SPARK_OVERHEAD; + RtsFlags.GranFlags.Costs.pri_sched_overhead = PRI_SCHED_OVERHEAD; + + RtsFlags.GranFlags.DoFairSchedule = rtsFalse; + RtsFlags.GranFlags.DoAsyncFetch = rtsFalse; + RtsFlags.GranFlags.DoStealThreadsFirst = rtsFalse; + RtsFlags.GranFlags.DoAlwaysCreateThreads = rtsFalse; + RtsFlags.GranFlags.DoBulkFetching = rtsFalse; + RtsFlags.GranFlags.DoThreadMigration = rtsFalse; + RtsFlags.GranFlags.FetchStrategy = 2; RtsFlags.GranFlags.PreferSparksOfLocalNodes = rtsFalse; - RtsFlags.GranFlags.DoPrioritySparking = rtsFalse; - RtsFlags.GranFlags.DoPriorityScheduling = rtsFalse; - RtsFlags.GranFlags.SparkPriority = 0; - RtsFlags.GranFlags.SparkPriority2 = 0; - RtsFlags.GranFlags.RandomPriorities = rtsFalse; - RtsFlags.GranFlags.InversePriorities = rtsFalse; - RtsFlags.GranFlags.IgnorePriorities = rtsFalse; - RtsFlags.GranFlags.ThunksToPack = 0; - RtsFlags.GranFlags.RandomSteal = rtsTrue; - RtsFlags.GranFlags.NoForward = rtsFalse; - RtsFlags.GranFlags.PrintFetchMisses = rtsFalse; - - RtsFlags.GranFlags.debug = 0x0; - RtsFlags.GranFlags.event_trace = rtsFalse; - RtsFlags.GranFlags.event_trace_all = rtsFalse; + RtsFlags.GranFlags.DoPrioritySparking = rtsFalse; + RtsFlags.GranFlags.DoPriorityScheduling = rtsFalse; + RtsFlags.GranFlags.SparkPriority = 0; + RtsFlags.GranFlags.SparkPriority2 = 0; + RtsFlags.GranFlags.RandomPriorities = rtsFalse; + RtsFlags.GranFlags.InversePriorities = rtsFalse; + RtsFlags.GranFlags.IgnorePriorities = rtsFalse; + RtsFlags.GranFlags.ThunksToPack = 0; + RtsFlags.GranFlags.RandomSteal = rtsTrue; #endif #ifdef TICKY_TICKY @@ -279,10 +404,15 @@ usage_text[] = { # ifdef SMP " -N Use OS threads (default: 1)", # endif +" -e Size of spark pools (default 100)", +" -t Set maximum number of advisory threads per PE (default 32)", +" -o Set stack chunk size (default 1024)", + # ifdef PAR -" -q Enable activity profile (output files in ~/*.gr)", -" -qb Enable binary activity profile (output file /tmp/.gb)", -" -Q Set pack-buffer size (default: 1024)", +" -qP Enable activity profile (output files in ~/*.gr)", +" -qQ Set pack-buffer size (default: 1024)", +" -qd Turn on PVM-ish debugging", +" -qO Disable output for performance measurement", # endif # if defined(SMP) || defined(PAR) " -e Maximum number of outstanding local sparks (default: 4096)", @@ -470,6 +600,8 @@ error = rtsTrue; if ((n>>7)&1) RtsFlags.DebugFlags.sanity = rtsTrue; if ((n>>8)&1) RtsFlags.DebugFlags.stable = rtsTrue; if ((n>>9)&1) RtsFlags.DebugFlags.prof = rtsTrue; + if ((n>>10)&1) RtsFlags.DebugFlags.gran = rtsTrue; + if ((n>>11)&1) RtsFlags.DebugFlags.par = rtsTrue; } break; #endif @@ -546,7 +678,7 @@ error = rtsTrue; RtsFlags.GcFlags.giveStats ++; #ifdef PAR /* Opening all those files would almost certainly fail... */ - RtsFlags.ParFlags.parallelStats = rtsTrue; + RtsFlags.ParFlags.ParStats.Full = rtsTrue; RtsFlags.GcFlags.statsFile = stderr; /* temporary; ToDo: rm */ #else RtsFlags.GcFlags.statsFile @@ -776,37 +908,9 @@ error = rtsTrue; } ) break; - case 'O': + case 'q': PAR_BUILD_ONLY( - RtsFlags.ParFlags.outputDisabled = rtsTrue; - ) break; - - case 'q': /* activity profile option */ - PAR_BUILD_ONLY( - if (rts_argv[arg][2] == 'b') - RtsFlags.ParFlags.granSimStats_Binary = rtsTrue; - else - RtsFlags.ParFlags.granSimStats = rtsTrue; - ) break; - -#if 0 /* or??? */ - case 'q': /* quasi-parallel profile option */ - GRAN_BUILD_ONLY ( - if (rts_argv[arg][2] == 'v') - do_qp_prof = 2; - else - do_qp_prof++; - ) break; -#endif /* 0??? */ - - case 'Q': /* Set pack buffer size */ - PAR_BUILD_ONLY( - if (rts_argv[arg][2] != '\0') { - RtsFlags.ParFlags.packBufferSize = decode(rts_argv[arg]+2); - } else { - prog_belch("missing size of PackBuffer (for -Q)"); - error = rtsTrue; - } + process_par_option(arg, rts_argc, rts_argv, &error); ) break; /* =========== GRAN =============================== */ @@ -870,6 +974,942 @@ error = rtsTrue; } } +#if defined(GRAN) + +//@node GranSim specific options, Aux fcts, Command-line option parsing routines +//@subsection GranSim specific options + +static void +enable_GranSimLight(void) { + + fprintf(stderr,"GrAnSim Light enabled (infinite number of processors; 0 communication costs)\n"); + RtsFlags.GranFlags.Light=rtsTrue; + RtsFlags.GranFlags.Costs.latency = + RtsFlags.GranFlags.Costs.fetchtime = + RtsFlags.GranFlags.Costs.additional_latency = + RtsFlags.GranFlags.Costs.gunblocktime = + RtsFlags.GranFlags.Costs.lunblocktime = + RtsFlags.GranFlags.Costs.threadcreatetime = + RtsFlags.GranFlags.Costs.threadqueuetime = + RtsFlags.GranFlags.Costs.threadscheduletime = + RtsFlags.GranFlags.Costs.threaddescheduletime = + RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0; + + RtsFlags.GranFlags.Costs.mpacktime = + RtsFlags.GranFlags.Costs.munpacktime = 0; + + RtsFlags.GranFlags.DoFairSchedule = rtsTrue; + RtsFlags.GranFlags.DoAsyncFetch = rtsFalse; + RtsFlags.GranFlags.DoAlwaysCreateThreads = rtsTrue; + /* FetchStrategy is irrelevant in GrAnSim-Light */ + + /* GrAnSim Light often creates an abundance of parallel threads, + each with its own stack etc. Therefore, it's in general a good + idea to use small stack chunks (use the -o option to + increase it again). + */ + // RtsFlags.ConcFlags.stkChunkSize = 100; + + RtsFlags.GranFlags.proc = 1; +} + +static void +process_gran_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error) +{ + if (rts_argv[arg][1] != 'b') /* All GranSim options start with -b */ + return; + + /* or a ridiculously idealised simulator */ + if(strcmp((rts_argv[arg]+2),"oring")==0) { + RtsFlags.GranFlags.Costs.latency = + RtsFlags.GranFlags.Costs.fetchtime = + RtsFlags.GranFlags.Costs.additional_latency = + RtsFlags.GranFlags.Costs.gunblocktime = + RtsFlags.GranFlags.Costs.lunblocktime = + RtsFlags.GranFlags.Costs.threadcreatetime = + RtsFlags.GranFlags.Costs.threadqueuetime = + RtsFlags.GranFlags.Costs.threadscheduletime = + RtsFlags.GranFlags.Costs.threaddescheduletime = + RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0; + + RtsFlags.GranFlags.Costs.mpacktime = + RtsFlags.GranFlags.Costs.munpacktime = 0; + + RtsFlags.GranFlags.Costs.arith_cost = + RtsFlags.GranFlags.Costs.float_cost = + RtsFlags.GranFlags.Costs.load_cost = + RtsFlags.GranFlags.Costs.store_cost = + RtsFlags.GranFlags.Costs.branch_cost = 0; + + RtsFlags.GranFlags.Costs.heapalloc_cost = 1; + + /* ++RtsFlags.GranFlags.DoFairSchedule; */ + RtsFlags.GranFlags.DoStealThreadsFirst = rtsTrue; /* -bZ */ + RtsFlags.GranFlags.DoThreadMigration = rtsTrue; /* -bM */ + RtsFlags.GranFlags.GranSimStats.Full = rtsTrue; /* -bP */ + return; + } + + /* or a somewhat idealised simulator */ + if(strcmp((rts_argv[arg]+2),"onzo")==0) { + RtsFlags.GranFlags.Costs.latency = + RtsFlags.GranFlags.Costs.fetchtime = + RtsFlags.GranFlags.Costs.additional_latency = + RtsFlags.GranFlags.Costs.gunblocktime = + RtsFlags.GranFlags.Costs.lunblocktime = + RtsFlags.GranFlags.Costs.threadcreatetime = + RtsFlags.GranFlags.Costs.threadqueuetime = + RtsFlags.GranFlags.Costs.threadscheduletime = + RtsFlags.GranFlags.Costs.threaddescheduletime = + RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0; + + RtsFlags.GranFlags.Costs.mpacktime = + RtsFlags.GranFlags.Costs.munpacktime = 0; + + RtsFlags.GranFlags.Costs.heapalloc_cost = 1; + + /* RtsFlags.GranFlags.DoFairSchedule = rtsTrue; */ /* -b-R */ + /* RtsFlags.GranFlags.DoStealThreadsFirst = rtsTrue; */ /* -b-T */ + RtsFlags.GranFlags.DoAsyncFetch = rtsTrue; /* -bZ */ + RtsFlags.GranFlags.DoThreadMigration = rtsTrue; /* -bM */ + RtsFlags.GranFlags.GranSimStats.Full = rtsTrue; /* -bP */ +# if defined(GRAN_CHECK) && defined(GRAN) + RtsFlags.GranFlags.Debug.event_stats = rtsTrue; /* print event statistics */ +# endif + return; + } + + /* Communication and task creation cost parameters */ + switch(rts_argv[arg][2]) { + case '.': + IgnoreYields = rtsTrue; // HWL HACK + break; + + case ':': + enable_GranSimLight(); /* set flags for GrAnSim-Light mode */ + break; + + case 'l': + if (rts_argv[arg][3] != '\0') + { + RtsFlags.GranFlags.Costs.gunblocktime = + RtsFlags.GranFlags.Costs.latency = decode(rts_argv[arg]+3); + RtsFlags.GranFlags.Costs.fetchtime = 2*RtsFlags.GranFlags.Costs.latency; + } + else + RtsFlags.GranFlags.Costs.latency = LATENCY; + break; + + case 'a': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.additional_latency = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.additional_latency = ADDITIONAL_LATENCY; + break; + + case 'm': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.mpacktime = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.mpacktime = MSGPACKTIME; + break; + + case 'x': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.mtidytime = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.mtidytime = 0; + break; + + case 'r': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.munpacktime = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.munpacktime = MSGUNPACKTIME; + break; + + case 'g': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.fetchtime = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.fetchtime = FETCHTIME; + break; + + case 'n': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.gunblocktime = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.gunblocktime = GLOBALUNBLOCKTIME; + break; + + case 'u': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.lunblocktime = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.lunblocktime = LOCALUNBLOCKTIME; + break; + + /* Thread-related metrics */ + case 't': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.threadcreatetime = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.threadcreatetime = THREADCREATETIME; + break; + + case 'q': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.threadqueuetime = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.threadqueuetime = THREADQUEUETIME; + break; + + case 'c': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.threadscheduletime = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.threadscheduletime = THREADSCHEDULETIME; + + RtsFlags.GranFlags.Costs.threadcontextswitchtime = RtsFlags.GranFlags.Costs.threadscheduletime + + RtsFlags.GranFlags.Costs.threaddescheduletime; + break; + + case 'd': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.threaddescheduletime = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.threaddescheduletime = THREADDESCHEDULETIME; + + RtsFlags.GranFlags.Costs.threadcontextswitchtime = RtsFlags.GranFlags.Costs.threadscheduletime + + RtsFlags.GranFlags.Costs.threaddescheduletime; + break; + + /* Instruction Cost Metrics */ + case 'A': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.arith_cost = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.arith_cost = ARITH_COST; + break; + + case 'F': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.float_cost = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.float_cost = FLOAT_COST; + break; + + case 'B': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.branch_cost = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.branch_cost = BRANCH_COST; + break; + + case 'L': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.load_cost = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.load_cost = LOAD_COST; + break; + + case 'S': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.store_cost = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.store_cost = STORE_COST; + break; + + case 'H': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.heapalloc_cost = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.heapalloc_cost = 0; + break; + + case 'y': + RtsFlags.GranFlags.DoAsyncFetch = rtsTrue; + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.FetchStrategy = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.FetchStrategy = 2; + if (RtsFlags.GranFlags.FetchStrategy == 0) + RtsFlags.GranFlags.DoAsyncFetch = rtsFalse; + break; + + case 'K': /* sort overhead (per elem in spark list) */ + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.pri_spark_overhead = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.pri_spark_overhead = PRI_SPARK_OVERHEAD; + fprintf(stderr,"Overhead for pri spark: %d (per elem).\n", + RtsFlags.GranFlags.Costs.pri_spark_overhead); + break; + + case 'O': /* sort overhead (per elem in spark list) */ + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.Costs.pri_sched_overhead = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.Costs.pri_sched_overhead = PRI_SCHED_OVERHEAD; + fprintf(stderr,"Overhead for pri sched: %d (per elem).\n", + RtsFlags.GranFlags.Costs.pri_sched_overhead); + break; + + /* General Parameters */ + case 'p': + if (rts_argv[arg][3] != '\0') + { + RtsFlags.GranFlags.proc = decode(rts_argv[arg]+3); + if (RtsFlags.GranFlags.proc==0) { + enable_GranSimLight(); /* set flags for GrAnSim-Light mode */ + } else if (RtsFlags.GranFlags.proc > MAX_PROC || + RtsFlags.GranFlags.proc < 1) + { + fprintf(stderr,"setupRtsFlags: no more than %u processors +allowed\n", + MAX_PROC); + *error = rtsTrue; + } + } + else + RtsFlags.GranFlags.proc = MAX_PROC; + break; + + case 'f': + RtsFlags.GranFlags.Fishing = rtsTrue; + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.maxFishes = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.maxFishes = MAX_FISHES; + break; + + case 'w': + if (rts_argv[arg][3] != '\0') + RtsFlags.GranFlags.time_slice = decode(rts_argv[arg]+3); + else + RtsFlags.GranFlags.time_slice = GRAN_TIME_SLICE; + break; + + case 'C': + RtsFlags.GranFlags.DoAlwaysCreateThreads=rtsTrue; + RtsFlags.GranFlags.DoThreadMigration=rtsTrue; + break; + + case 'G': + fprintf(stderr,"Bulk fetching enabled.\n"); + RtsFlags.GranFlags.DoBulkFetching=rtsTrue; + break; + + case 'M': + fprintf(stderr,"Thread migration enabled.\n"); + RtsFlags.GranFlags.DoThreadMigration=rtsTrue; + break; + + case 'R': + fprintf(stderr,"Fair Scheduling enabled.\n"); + RtsFlags.GranFlags.DoFairSchedule=rtsTrue; + break; + + case 'I': + fprintf(stderr,"Priority Scheduling enabled.\n"); + RtsFlags.GranFlags.DoPriorityScheduling=rtsTrue; + break; + + case 'T': + RtsFlags.GranFlags.DoStealThreadsFirst=rtsTrue; + RtsFlags.GranFlags.DoThreadMigration=rtsTrue; + break; + + case 'Z': + RtsFlags.GranFlags.DoAsyncFetch=rtsTrue; + break; + +/* case 'z': */ +/* RtsFlags.GranFlags.SimplifiedFetch=rtsTrue; */ +/* break; */ + + case 'N': + RtsFlags.GranFlags.PreferSparksOfLocalNodes=rtsTrue; + break; + + case 'b': + RtsFlags.GranFlags.GranSimStats.Binary=rtsTrue; + break; + + case 'P': + /* format is -bP where is one char describing kind of profile */ + RtsFlags.GranFlags.GranSimStats.Full = rtsTrue; + switch(rts_argv[arg][3]) { + case '\0': break; // nothing special, just an ordinary profile + case '0': RtsFlags.GranFlags.GranSimStats.Suppressed = rtsTrue; + break; + case 'b': RtsFlags.GranFlags.GranSimStats.Binary = rtsTrue; + break; + case 's': RtsFlags.GranFlags.GranSimStats.Sparks = rtsTrue; + break; + case 'h': RtsFlags.GranFlags.GranSimStats.Heap = rtsTrue; + break; + case 'n': RtsFlags.GranFlags.GranSimStats.NewLogfile = rtsTrue; + break; + case 'g': RtsFlags.GranFlags.GranSimStats.Global = rtsTrue; + break; + default: barf("Unknown option -bP%c", rts_argv[arg][3]); + } + break; + + case 's': + RtsFlags.GranFlags.GranSimStats.Sparks=rtsTrue; + break; + + case 'h': + RtsFlags.GranFlags.GranSimStats.Heap=rtsTrue; + break; + + case 'Y': /* syntax: -bY[,] n ... pos int */ + if (rts_argv[arg][3] != '\0') { + char *arg0, *tmp; + + arg0 = rts_argv[arg]+3; + if ((tmp = strstr(arg0,","))==NULL) { + RtsFlags.GranFlags.SparkPriority = decode(arg0); + fprintf(stderr,"SparkPriority: %u.\n",RtsFlags.GranFlags.SparkPriority); + } else { + *(tmp++) = '\0'; + RtsFlags.GranFlags.SparkPriority = decode(arg0); + RtsFlags.GranFlags.SparkPriority2 = decode(tmp); + fprintf(stderr,"SparkPriority: %u.\n", + RtsFlags.GranFlags.SparkPriority); + fprintf(stderr,"SparkPriority2:%u.\n", + RtsFlags.GranFlags.SparkPriority2); + if (RtsFlags.GranFlags.SparkPriority2 < + RtsFlags.GranFlags.SparkPriority) { + fprintf(stderr,"WARNING: 2nd pri < main pri (%u<%u); 2nd pri has no effect\n", + RtsFlags.GranFlags.SparkPriority2, + RtsFlags.GranFlags.SparkPriority); + } + } + } else { + /* plain pri spark is now invoked with -bX + RtsFlags.GranFlags.DoPrioritySparking = 1; + fprintf(stderr,"PrioritySparking.\n"); + */ + } + break; + + case 'Q': + if (rts_argv[arg][3] != '\0') { + RtsFlags.GranFlags.ThunksToPack = decode(rts_argv[arg]+3); + } else { + RtsFlags.GranFlags.ThunksToPack = 1; + } + fprintf(stderr,"Thunks To Pack in one packet: %u.\n", + RtsFlags.GranFlags.ThunksToPack); + break; + + case 'e': + RtsFlags.GranFlags.RandomSteal = rtsFalse; + fprintf(stderr,"Deterministic mode (no random stealing)\n"); + break; + + /* The following class of options contains eXperimental */ + /* features in connection with exploiting granularity */ + /* information. I.e. if -bY is chosen these options */ + /* tell the RTS what to do with the supplied info --HWL */ + + case 'W': + if (rts_argv[arg][3] != '\0') { + RtsFlags.GranFlags.packBufferSize_internal = decode(rts_argv[arg]+3); + } else { + RtsFlags.GranFlags.packBufferSize_internal = GRANSIM_DEFAULT_PACK_BUFFER_SIZE; + } + fprintf(stderr,"Size of GranSim internal pack buffer: %u.\n", + RtsFlags.GranFlags.packBufferSize_internal); + break; + + case 'X': + switch(rts_argv[arg][3]) { + + case '\0': + RtsFlags.GranFlags.DoPrioritySparking = 1; + fprintf(stderr,"Priority Sparking with Normal Priorities.\n"); + RtsFlags.GranFlags.InversePriorities = rtsFalse; + RtsFlags.GranFlags.RandomPriorities = rtsFalse; + RtsFlags.GranFlags.IgnorePriorities = rtsFalse; + break; + + case 'I': + RtsFlags.GranFlags.DoPrioritySparking = 1; + fprintf(stderr,"Priority Sparking with Inverse Priorities.\n"); + RtsFlags.GranFlags.InversePriorities++; + break; + + case 'R': + RtsFlags.GranFlags.DoPrioritySparking = 1; + fprintf(stderr,"Priority Sparking with Random Priorities.\n"); + RtsFlags.GranFlags.RandomPriorities++; + break; + + case 'N': + RtsFlags.GranFlags.DoPrioritySparking = 1; + fprintf(stderr,"Priority Sparking with No Priorities.\n"); + RtsFlags.GranFlags.IgnorePriorities++; + break; + + default: + bad_option( rts_argv[arg] ); + break; + } + break; + + case '-': + switch(rts_argv[arg][3]) { + + case 'C': + RtsFlags.GranFlags.DoAlwaysCreateThreads=rtsFalse; + RtsFlags.GranFlags.DoThreadMigration=rtsFalse; + break; + + case 'G': + RtsFlags.GranFlags.DoBulkFetching=rtsFalse; + break; + + case 'M': + RtsFlags.GranFlags.DoThreadMigration=rtsFalse; + break; + + case 'R': + RtsFlags.GranFlags.DoFairSchedule=rtsFalse; + break; + + case 'T': + RtsFlags.GranFlags.DoStealThreadsFirst=rtsFalse; + RtsFlags.GranFlags.DoThreadMigration=rtsFalse; + break; + + case 'Z': + RtsFlags.GranFlags.DoAsyncFetch=rtsFalse; + break; + + case 'N': + RtsFlags.GranFlags.PreferSparksOfLocalNodes=rtsFalse; + break; + + case 'P': + RtsFlags.GranFlags.GranSimStats.Suppressed=rtsTrue; + break; + + case 's': + RtsFlags.GranFlags.GranSimStats.Sparks=rtsFalse; + break; + + case 'h': + RtsFlags.GranFlags.GranSimStats.Heap=rtsFalse; + break; + + case 'b': + RtsFlags.GranFlags.GranSimStats.Binary=rtsFalse; + break; + + case 'X': + RtsFlags.GranFlags.DoPrioritySparking = rtsFalse; + break; + + case 'Y': + RtsFlags.GranFlags.DoPrioritySparking = rtsFalse; + RtsFlags.GranFlags.SparkPriority = rtsFalse; + break; + + case 'I': + RtsFlags.GranFlags.DoPriorityScheduling = rtsFalse; + break; + + case 'e': + RtsFlags.GranFlags.RandomSteal = rtsFalse; + break; + + default: + bad_option( rts_argv[arg] ); + break; + } + break; + +# if defined(GRAN_CHECK) && defined(GRAN) + case 'D': + switch(rts_argv[arg][3]) { + case 'Q': /* Set pack buffer size (same as 'Q' in GUM) */ + if (rts_argv[arg][4] != '\0') { + RtsFlags.GranFlags.packBufferSize = decode(rts_argv[arg]+4); + fprintf(stderr,"Pack buffer size: %d\n", + RtsFlags.GranFlags.packBufferSize); + } else { + fprintf(stderr, "setupRtsFlags: missing size of PackBuffer (for -Q)\n"); + *error = rtsTrue; + } + break; + + default: + if (isdigit(rts_argv[arg][3])) {/* Set all debugging options in one */ + /* hack warning: interpret the flags as a binary number */ + nat n = decode(rts_argv[arg]+3); + set_GranSim_debug_options(n); + } else { + nat i; + for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++) + if (rts_argv[arg][3] == gran_debug_opts_flags[i]) + break; + + if (i==MAX_GRAN_DEBUG_OPTION+1) { + fprintf(stderr, "Valid GranSim debug options are:\n"); + help_GranSim_debug_options(MAX_GRAN_DEBUG_MASK); + bad_option( rts_argv[arg] ); + } else { // flag found; now set it + set_GranSim_debug_options(GRAN_DEBUG_MASK(i)); // 2^i + } + } + break; + +#if 0 + case 'e': /* event trace; also -bD1 */ + fprintf(stderr,"DEBUG: event_trace; printing event trace.\n"); + RtsFlags.GranFlags.Debug.event_trace = rtsTrue; + /* RtsFlags.GranFlags.event_trace=rtsTrue; */ + break; + + case 'E': /* event statistics; also -bD2 */ + fprintf(stderr,"DEBUG: event_stats; printing event statistics.\n"); + RtsFlags.GranFlags.Debug.event_stats = rtsTrue; + /* RtsFlags.GranFlags.Debug |= 0x20; print event statistics */ + break; + + case 'f': /* thunkStealing; also -bD4 */ + fprintf(stderr,"DEBUG: thunkStealing; printing forwarding of FETCHNODES.\n"); + RtsFlags.GranFlags.Debug.thunkStealing = rtsTrue; + /* RtsFlags.GranFlags.Debug |= 0x2; print fwd messages */ + break; + + case 'z': /* blockOnFetch; also -bD8 */ + fprintf(stderr,"DEBUG: blockOnFetch; check for blocked on fetch.\n"); + RtsFlags.GranFlags.Debug.blockOnFetch = rtsTrue; + /* RtsFlags.GranFlags.Debug |= 0x4; debug non-reschedule-on-fetch */ + break; + + case 't': /* blockOnFetch_sanity; also -bD16 */ + fprintf(stderr,"DEBUG: blockOnFetch_sanity; check for TSO asleep on fetch.\n"); + RtsFlags.GranFlags.Debug.blockOnFetch_sanity = rtsTrue; + /* RtsFlags.GranFlags.Debug |= 0x10; debug TSO asleep for fetch */ + break; + + case 'S': /* priSpark; also -bD32 */ + fprintf(stderr,"DEBUG: priSpark; priority sparking.\n"); + RtsFlags.GranFlags.Debug.priSpark = rtsTrue; + break; + + case 's': /* priSched; also -bD64 */ + fprintf(stderr,"DEBUG: priSched; priority scheduling.\n"); + RtsFlags.GranFlags.Debug.priSched = rtsTrue; + break; + + case 'F': /* findWork; also -bD128 */ + fprintf(stderr,"DEBUG: findWork; searching spark-pools (local & remote), thread queues for work.\n"); + RtsFlags.GranFlags.Debug.findWork = rtsTrue; + break; + + case 'g': /* globalBlock; also -bD256 */ + fprintf(stderr,"DEBUG: globalBlock; blocking on remote closures (FETCHMEs etc in GUM).\n"); + RtsFlags.GranFlags.Debug.globalBlock = rtsTrue; + break; + + case 'G': /* pack; also -bD512 */ + fprintf(stderr,"DEBUG: pack; routines for (un-)packing graph structures.\n"); + RtsFlags.GranFlags.Debug.pack = rtsTrue; + break; + + case 'P': /* packBuffer; also -bD1024 */ + fprintf(stderr,"DEBUG: packBuffer; routines handling pack buffer (GranSim internal!).\n"); + RtsFlags.GranFlags.Debug.packBuffer = rtsTrue; + break; + + case 'o': /* sortedQ; also -bD2048 */ + fprintf(stderr,"DEBUG: sortedQ; check whether spark/thread queues are sorted.\n"); + RtsFlags.GranFlags.Debug.sortedQ = rtsTrue; + break; + + case 'r': /* randomSteal; also -bD4096 */ + fprintf(stderr,"DEBUG: randomSteal; stealing sparks/threads from random PEs.\n"); + RtsFlags.GranFlags.Debug.randomSteal = rtsTrue; + break; + + case 'q': /* checkSparkQ; also -bD8192 */ + fprintf(stderr,"DEBUG: checkSparkQ; check consistency of the spark queues.\n"); + RtsFlags.GranFlags.Debug.checkSparkQ = rtsTrue; + break; + + case ':': /* checkLight; also -bD16384 */ + fprintf(stderr,"DEBUG: checkLight; check GranSim-Light setup.\n"); + RtsFlags.GranFlags.Debug.checkLight = rtsTrue; + break; + + case 'b': /* bq; also -bD32768 */ + fprintf(stderr,"DEBUG: bq; check blocking queues\n"); + RtsFlags.GranFlags.Debug.bq = rtsTrue; + break; + + case 'd': /* all options turned on */ + fprintf(stderr,"DEBUG: all options turned on.\n"); + set_GranSim_debug_options(MAX_GRAN_DEBUG_MASK); + /* RtsFlags.GranFlags.Debug |= 0x40; */ + break; + +/* case '\0': */ +/* RtsFlags.GranFlags.Debug = 1; */ +/* break; */ +#endif + + } + break; +# endif /* GRAN_CHECK */ + default: + bad_option( rts_argv[arg] ); + break; + } +} + +/* + Interpret n as a binary number masking GranSim debug options and set the + correxponding option. See gran_debug_opts_strs for explanations of the flags. +*/ +static void +set_GranSim_debug_options(nat n) { + nat i; + + for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++) + if ((n>>i)&1) { + fprintf(stderr, gran_debug_opts_strs[i]); + switch (i) { + case 0: RtsFlags.GranFlags.Debug.event_trace = rtsTrue; break; + case 1: RtsFlags.GranFlags.Debug.event_stats = rtsTrue; break; + case 2: RtsFlags.GranFlags.Debug.bq = rtsTrue; break; + case 3: RtsFlags.GranFlags.Debug.pack = rtsTrue; break; + case 4: RtsFlags.GranFlags.Debug.checkSparkQ = rtsTrue; break; + case 5: RtsFlags.GranFlags.Debug.thunkStealing = rtsTrue; break; + case 6: RtsFlags.GranFlags.Debug.randomSteal = rtsTrue; break; + case 7: RtsFlags.GranFlags.Debug.findWork = rtsTrue; break; + case 8: RtsFlags.GranFlags.Debug.unused = rtsTrue; break; + case 9: RtsFlags.GranFlags.Debug.pri = rtsTrue; break; + case 10: RtsFlags.GranFlags.Debug.checkLight = rtsTrue; break; + case 11: RtsFlags.GranFlags.Debug.sortedQ = rtsTrue; break; + case 12: RtsFlags.GranFlags.Debug.blockOnFetch = rtsTrue; break; + case 13: RtsFlags.GranFlags.Debug.packBuffer = rtsTrue; break; + case 14: RtsFlags.GranFlags.Debug.blockOnFetch_sanity = rtsTrue; break; + default: barf("set_GranSim_debug_options: only %d debug options expected"); + } /* switch */ + } /* if */ +} + +/* + Print one line explanation for each of the GranSim debug options specified + in the bitmask n. +*/ +static void +help_GranSim_debug_options(nat n) { + nat i; + + for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++) + if ((n>>i)&1) + fprintf(stderr, gran_debug_opts_strs[i]); +} + +# elif defined(PAR) + +static void +process_par_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error) +{ + if (rts_argv[arg][1] != 'q') /* All GUM options start with -q */ + return; + + /* Communication and task creation cost parameters */ + switch(rts_argv[arg][2]) { + case 'e': /* -qe ... allow local sparks */ + if (rts_argv[arg][3] != '\0') { /* otherwise, stick w/ the default */ + RtsFlags.ParFlags.maxLocalSparks + = strtol(rts_argv[arg]+3, (char **) NULL, 10); + + if (RtsFlags.ParFlags.maxLocalSparks <= 0) { + belch("setupRtsFlags: bad value for -e\n"); + *error = rtsTrue; + } + } + IF_PAR_DEBUG(verbose, + belch("-qe: max %d local sparks", + RtsFlags.ParFlags.maxLocalSparks)); + break; + + case 't': + if (rts_argv[arg][3] != '\0') { + RtsFlags.ParFlags.maxThreads + = strtol(rts_argv[arg]+3, (char **) NULL, 10); + } else { + belch("setupRtsFlags: missing size for -qt\n"); + *error = rtsTrue; + } + IF_PAR_DEBUG(verbose, + belch("-qt: max %d threads", + RtsFlags.ParFlags.maxThreads)); + break; + + case 'f': + if (rts_argv[arg][3] != '\0') + RtsFlags.ParFlags.maxFishes = decode(rts_argv[arg]+3); + else + RtsFlags.ParFlags.maxFishes = MAX_FISHES; + break; + IF_PAR_DEBUG(verbose, + belch("-qf: max %d fishes sent out at one time", + RtsFlags.ParFlags.maxFishes)); + break; + + + case 'd': + if (rts_argv[arg][3] != '\0') { + RtsFlags.ParFlags.fishDelay + = strtol(rts_argv[arg]+3, (char **) NULL, 10); + } else { + belch("setupRtsFlags: missing fish delay time for -qd\n"); + *error = rtsTrue; + } + IF_PAR_DEBUG(verbose, + belch("-qd: fish delay time %d", + RtsFlags.ParFlags.fishDelay)); + break; + + case 'O': + RtsFlags.ParFlags.outputDisabled = rtsTrue; + IF_PAR_DEBUG(verbose, + belch("-qO: output disabled")); + break; + + case 'P': /* -qP for writing a log file */ + RtsFlags.ParFlags.ParStats.Full = rtsTrue; + /* same encoding as in GranSim after -bP */ + switch(rts_argv[arg][3]) { + case '\0': break; // nothing special, just an ordinary profile + //case '0': RtsFlags.ParFlags.ParStats.Suppressed = rtsTrue; + // break; + case 'b': RtsFlags.ParFlags.ParStats.Binary = rtsTrue; + break; + case 's': RtsFlags.ParFlags.ParStats.Sparks = rtsTrue; + break; + //case 'h': RtsFlags.parFlags.ParStats.Heap = rtsTrue; + // break; + case 'n': RtsFlags.ParFlags.ParStats.NewLogfile = rtsTrue; + break; + case 'g': RtsFlags.ParFlags.ParStats.Global = rtsTrue; + break; + default: barf("Unknown option -qP%c", rts_argv[arg][2]); + } + IF_PAR_DEBUG(verbose, + belch("(-qP) writing to log-file (RtsFlags.ParFlags.ParStats.Full=%s)", + (RtsFlags.ParFlags.ParStats.Full ? "rtsTrue" : "rtsFalse"))); + break; + + case 'Q': /* -qQ ... set pack buffer size to */ + if (rts_argv[arg][3] != '\0') { + RtsFlags.ParFlags.packBufferSize = decode(rts_argv[arg]+3); + } else { + belch("setupRtsFlags: missing size of PackBuffer (for -Q)\n"); + error = rtsTrue; + } + IF_PAR_DEBUG(verbose, + belch("-qQ: pack buffer size set to %d", + RtsFlags.ParFlags.packBufferSize)); + break; + +# if defined(DEBUG) + case 'w': + if (rts_argv[arg][3] != '\0') { + RtsFlags.ParFlags.wait + = strtol(rts_argv[arg]+3, (char **) NULL, 10); + } else { + RtsFlags.ParFlags.wait = 1000; + } + IF_PAR_DEBUG(verbose, + belch("-qw: length of wait loop after synchr before reduction: %d", + RtsFlags.ParFlags.wait)); + break; + + case 'D': /* -qD ... all the debugging options */ + if (isdigit(rts_argv[arg][3])) {/* Set all debugging options in one */ + /* hack warning: interpret the flags as a binary number */ + nat n = decode(rts_argv[arg]+3); + set_par_debug_options(n); + } else { + nat i; + for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++) + if (rts_argv[arg][3] == par_debug_opts_flags[i]) + break; + + if (i==MAX_PAR_DEBUG_OPTION+1) { + fprintf(stderr, "Valid GUM debug options are:\n"); + help_par_debug_options(MAX_PAR_DEBUG_MASK); + bad_option( rts_argv[arg] ); + } else { // flag found; now set it + set_par_debug_options(PAR_DEBUG_MASK(i)); // 2^i + } + } + break; +# endif + default: + belch("Unknown option -q%c", rts_argv[arg][2]); + break; + } /* switch */ +} + +/* + Interpret n as a binary number masking Par debug options and set the + correxponding option. See par_debug_opts_strs for explanations of the flags. +*/ +static void +set_par_debug_options(nat n) { + nat i; + + for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++) + if ((n>>i)&1) { + fprintf(stderr, par_debug_opts_strs[i]); + switch (i) { + case 0: RtsFlags.ParFlags.Debug.verbose = rtsTrue; break; + case 1: RtsFlags.ParFlags.Debug.trace = rtsTrue; break; + case 2: RtsFlags.ParFlags.Debug.schedule = rtsTrue; break; + case 3: RtsFlags.ParFlags.Debug.free = rtsTrue; break; + case 4: RtsFlags.ParFlags.Debug.resume = rtsTrue; break; + case 5: RtsFlags.ParFlags.Debug.weight = rtsTrue; break; + case 6: RtsFlags.ParFlags.Debug.fetch = rtsTrue; break; + case 7: RtsFlags.ParFlags.Debug.ack = rtsTrue; break; + case 8: RtsFlags.ParFlags.Debug.fish = rtsTrue; break; + case 9: RtsFlags.ParFlags.Debug.forward = rtsTrue; break; + case 10: RtsFlags.ParFlags.Debug.pack = rtsTrue; break; + default: barf("set_par_debug_options: only %d debug options expected"); + } /* switch */ + } /* if */ +} + +/* + Print one line explanation for each of the GranSim debug options specified + in the bitmask n. +*/ +static void +help_par_debug_options(nat n) { + nat i; + + for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++) + if ((n>>i)&1) + fprintf(stderr, par_debug_opts_strs[i]); +} + +#endif /* GRAN */ + +//@node Aux fcts, , GranSim specific options +//@subsection Aux fcts + static FILE * /* return NULL on error */ open_stats_file ( I_ arg, diff --git a/ghc/rts/RtsFlags.h b/ghc/rts/RtsFlags.h index 238e2b6..e3febb3 100644 --- a/ghc/rts/RtsFlags.h +++ b/ghc/rts/RtsFlags.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: RtsFlags.h,v 1.19 2000/01/12 15:15:17 simonmar Exp $ + * $Id: RtsFlags.h,v 1.20 2000/01/13 14:34:04 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -59,6 +59,8 @@ struct DEBUG_FLAGS { rtsBool stable : 1; /* 256 */ rtsBool prof : 1; /* 512 */ + rtsBool gran : 1; /* 1024 */ + rtsBool par : 1; /* 2048 */ }; #if defined(PROFILING) || defined(PAR) @@ -124,15 +126,46 @@ struct CONCURRENT_FLAGS { }; #ifdef PAR +/* currently the same as GRAN_STATS_FLAGS */ +struct PAR_STATS_FLAGS { + rtsBool Full; /* Full .gr profile (rtsTrue) or only END events? */ + // rtsBool Suppressed; /* No .gr profile at all */ + rtsBool Binary; /* Binary profile? (not yet implemented) */ + rtsBool Sparks; /* Info on sparks in profile? */ + rtsBool Heap; /* Info on heap allocs in profile? */ + rtsBool NewLogfile; /* Use new log-file format? (not yet implemented) */ + rtsBool Global; /* Global statistics? (printed on shutdown; no log file) */ +}; + +struct PAR_DEBUG_FLAGS { + /* flags to control debugging output in various subsystems */ + rtsBool verbose : 1; /* 1 */ + rtsBool trace : 1; /* 2 */ + rtsBool schedule : 1; /* 4 */ + rtsBool free : 1; /* 8 */ + rtsBool resume : 1; /* 16 */ + rtsBool weight : 1; /* 32 */ + rtsBool fetch : 1; /* 64 */ + rtsBool ack : 1; /* 128 */ + rtsBool fish : 1; /* 256 */ + rtsBool forward : 1; /* 512 */ + rtsBool pack : 1; /* 1024 */ +}; + +#define MAX_PAR_DEBUG_OPTION 10 +#define PAR_DEBUG_MASK(n) ((nat)(ldexp(1,n))) +#define MAX_PAR_DEBUG_MASK ((nat)(ldexp(1,(MAX_PAR_DEBUG_OPTION+1))-1)) + struct PAR_FLAGS { - rtsBool parallelStats; /* Gather parallel statistics */ - rtsBool granSimStats; /* Full .gr profile (rtsTrue) or only END events? */ - rtsBool granSimStats_Binary; - - rtsBool outputDisabled; /* Disable output for performance purposes */ - - unsigned int packBufferSize; - unsigned int maxLocalSparks; + struct PAR_STATS_FLAGS ParStats; /* profile and stats output */ + struct PAR_DEBUG_FLAGS Debug; /* debugging options */ + rtsBool outputDisabled; /* Disable output for performance purposes */ + nat packBufferSize; + nat maxLocalSparks; /* spark pool size */ + nat maxThreads; /* thread pool size */ + nat maxFishes; /* max number of active fishes */ + rtsTime fishDelay; /* delay before sending a new fish */ + long wait; }; #endif /* PAR */ @@ -141,53 +174,88 @@ struct PAR_FLAGS { nat nNodes; /* number of threads to run simultaneously */ unsigned int maxLocalSparks; }; -#endif +#endif /* SMP */ #ifdef GRAN +struct GRAN_STATS_FLAGS { + rtsBool Full; /* Full .gr profile (rtsTrue) or only END events? */ + rtsBool Suppressed; /* No .gr profile at all */ + rtsBool Binary; /* Binary profile? (not yet implemented) */ + rtsBool Sparks; /* Info on sparks in profile? */ + rtsBool Heap; /* Info on heap allocs in profile? */ + rtsBool NewLogfile; /* Use new log-file format? (not yet implemented) */ + rtsBool Global; /* Global statistics? (printed on shutdown; no log file) */ +}; + +struct GRAN_COST_FLAGS { + /* Communication Cost Variables -- set in main program */ + nat latency; /* Latency for single packet */ + nat additional_latency; /* Latency for additional packets */ + nat fetchtime; + nat lunblocktime; /* Time for local unblock */ + nat gunblocktime; /* Time for global unblock */ + nat mpacktime; /* Cost of creating a packet */ + nat munpacktime; /* Cost of receiving a packet */ + nat mtidytime; /* Cost of cleaning up after send */ + + nat threadcreatetime; /* Thread creation costs */ + nat threadqueuetime; /* Cost of adding a thread to the running/runnable queue */ + nat threaddescheduletime; /* Cost of descheduling a thread */ + nat threadscheduletime; /* Cost of scheduling a thread */ + nat threadcontextswitchtime; /* Cost of context switch */ + + /* Instruction Costs */ + nat arith_cost; /* arithmetic instructions (+,i,< etc) */ + nat branch_cost; /* branch instructions */ + nat load_cost; /* load into register */ + nat store_cost; /* store into memory */ + nat float_cost; /* floating point operations */ + + nat heapalloc_cost; /* heap allocation costs */ + + /* Overhead for granularity control mechanisms */ + /* overhead per elem of spark queue */ + nat pri_spark_overhead; + /* overhead per elem of thread queue */ + nat pri_sched_overhead; +}; + +struct GRAN_DEBUG_FLAGS { + /* flags to control debugging output in various subsystems */ + rtsBool event_trace : 1; /* 1 */ + rtsBool event_stats : 1; /* 2 */ + rtsBool bq : 1; /* 4 */ + rtsBool pack : 1; /* 8 */ + rtsBool checkSparkQ : 1; /* 16 */ + rtsBool thunkStealing : 1; /* 32 */ + rtsBool randomSteal : 1; /* 64 */ + rtsBool findWork : 1; /* 128 */ + rtsBool unused : 1; /* 256 */ + rtsBool pri : 1; /* 512 */ + rtsBool checkLight : 1; /* 1024 */ + rtsBool sortedQ : 1; /* 2048 */ + rtsBool blockOnFetch : 1; /* 4096 */ + rtsBool packBuffer : 1; /* 8192 */ + rtsBool blockOnFetch_sanity : 1; /* 16384 */ +}; + +#define MAX_GRAN_DEBUG_OPTION 14 +#define GRAN_DEBUG_MASK(n) ((nat)(ldexp(1,n))) +#define MAX_GRAN_DEBUG_MASK ((nat)(ldexp(1,(MAX_GRAN_DEBUG_OPTION+1))-1)) + struct GRAN_FLAGS { - rtsBool granSimStats; /* Full .gr profile (rtsTrue) or only END events? */ - rtsBool granSimStats_suppressed; /* No .gr profile at all */ - rtsBool granSimStats_Binary; - rtsBool granSimStats_Sparks; - rtsBool granSimStats_Heap; - rtsBool labelling; - unsigned int packBufferSize; - unsigned int packBufferSize_internal; - - int proc; /* number of processors */ - int max_fishes; /* max number of spark or thread steals */ - TIME time_slice; /* max time slice of one reduction thread */ - - /* Communication Cost Variables -- set in main program */ - unsigned int gran_latency; /* Latency for single packet */ - unsigned int gran_additional_latency; /* Latency for additional packets */ - unsigned int gran_fetchtime; - unsigned int gran_lunblocktime; /* Time for local unblock */ - unsigned int gran_gunblocktime; /* Time for global unblock */ - unsigned int gran_mpacktime; /* Cost of creating a packet */ - unsigned int gran_munpacktime; /* Cost of receiving a packet */ - unsigned int gran_mtidytime; /* Cost of cleaning up after send */ - - unsigned int gran_threadcreatetime; /* Thread creation costs */ - unsigned int gran_threadqueuetime; /* Cost of adding a thread to the running/runnable queue */ - unsigned int gran_threaddescheduletime; /* Cost of descheduling a thread */ - unsigned int gran_threadscheduletime; /* Cost of scheduling a thread */ - unsigned int gran_threadcontextswitchtime; /* Cost of context switch */ - - /* Instruction Costs */ - unsigned int gran_arith_cost; /* arithmetic instructions (+,i,< etc) */ - unsigned int gran_branch_cost; /* branch instructions */ - unsigned int gran_load_cost; /* load into register */ - unsigned int gran_store_cost; /* store into memory */ - unsigned int gran_float_cost; /* floating point operations */ - - unsigned int gran_heapalloc_cost; /* heap allocation costs */ - - /* Overhead for granularity control mechanisms */ - /* overhead per elem of spark queue */ - unsigned int gran_pri_spark_overhead; - /* overhead per elem of thread queue */ - unsigned int gran_pri_sched_overhead; + struct GRAN_STATS_FLAGS GranSimStats; /* profile and stats output */ + struct GRAN_COST_FLAGS Costs; /* cost metric for simulation */ + struct GRAN_DEBUG_FLAGS Debug; /* debugging options */ + + // rtsBool labelling; + nat packBufferSize; + nat packBufferSize_internal; + + PEs proc; /* number of processors */ + rtsBool Fishing; /* Simulate GUM style fishing mechanism? */ + nat maxFishes; /* max number of spark or thread steals */ + rtsTime time_slice; /* max time slice of one reduction thread */ /* GrAnSim-Light: This version puts no bound on the number of processors but in exchange doesn't model communication costs @@ -198,30 +266,27 @@ struct GRAN_FLAGS { rtsBool Light; rtsBool DoFairSchedule ; /* fair scheduling alg? default: unfair */ - rtsBool DoReScheduleOnFetch ; /* async. communication? */ + rtsBool DoAsyncFetch; /* async. communication? */ rtsBool DoStealThreadsFirst; /* prefer threads over sparks when stealing */ - rtsBool SimplifiedFetch; /* fast but inaccurate fetch modelling */ - rtsBool DoAlwaysCreateThreads; /* eager thread creation */ - rtsBool DoGUMMFetching; /* bulk fetching */ - rtsBool DoThreadMigration; /* allow to move threads */ - int FetchStrategy; /* what to do when waiting for data */ - rtsBool PreferSparksOfLocalNodes; /* prefer local over global sparks */ - rtsBool DoPrioritySparking; /* sparks sorted by priorities */ - rtsBool DoPriorityScheduling; /* threads sorted by priorities */ - int SparkPriority; /* threshold for cut-off mechanism */ - int SparkPriority2; - rtsBool RandomPriorities; - rtsBool InversePriorities; - rtsBool IgnorePriorities; - int ThunksToPack; /* number of thunks in packet + 1 */ - rtsBool RandomSteal; /* steal spark/thread from random proc */ - rtsBool NoForward; /* no forwarding of fetch messages */ - rtsBool PrintFetchMisses; /* print number of fetch misses */ - - unsigned int debug; - rtsBool event_trace; - rtsBool event_trace_all; - + rtsBool DoAlwaysCreateThreads; /* eager thread creation */ + rtsBool DoBulkFetching; /* bulk fetching */ + rtsBool DoThreadMigration; /* allow to move threads */ + nat FetchStrategy; /* what to do when waiting for data */ + rtsBool PreferSparksOfLocalNodes; /* prefer local over global sparks */ + rtsBool DoPrioritySparking; /* sparks sorted by priorities */ + rtsBool DoPriorityScheduling; /* threads sorted by priorities */ + nat SparkPriority; /* threshold for cut-off mechanism */ + nat SparkPriority2; + rtsBool RandomPriorities; + rtsBool InversePriorities; + rtsBool IgnorePriorities; + nat ThunksToPack; /* number of thunks in packet + 1 */ + rtsBool RandomSteal; /* steal spark/thread from random proc */ + rtsBool NoForward; /* no forwarding of fetch messages */ + + // unsigned int debug; + // rtsBool event_trace; + // rtsBool event_trace_all; }; #endif /* GRAN */ diff --git a/ghc/rts/RtsStartup.c b/ghc/rts/RtsStartup.c index 0996ba0..a589b18 100644 --- a/ghc/rts/RtsStartup.c +++ b/ghc/rts/RtsStartup.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: RtsStartup.c,v 1.25 1999/12/20 10:34:37 simonpj Exp $ + * $Id: RtsStartup.c,v 1.26 2000/01/13 14:34:04 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -25,7 +25,12 @@ # include "ProfHeap.h" #endif -#ifdef PAR +#if defined(GRAN) +#include "GranSimRts.h" +#include "ParallelRts.h" +#endif + +#if defined(PAR) #include "ParInit.h" #include "Parallel.h" #include "LLC.h" @@ -37,6 +42,9 @@ struct RTS_FLAGS RtsFlags; static int rts_has_started_up = 0; +#if defined(PAR) +static ullong startTime = 0; +#endif void startupHaskell(int argc, char *argv[]) @@ -51,10 +59,6 @@ startupHaskell(int argc, char *argv[]) else rts_has_started_up=1; -#if defined(PAR) - int nPEs = 0; /* Number of PEs */ -#endif - /* The very first thing we do is grab the start time...just in case we're * collecting timing statistics. */ @@ -62,13 +66,15 @@ startupHaskell(int argc, char *argv[]) #ifdef PAR /* - *The parallel system needs to be initialised and synchronised before - *the program is run. + * The parallel system needs to be initialised and synchronised before + * the program is run. */ + fprintf(stderr, "startupHaskell: argv[0]=%s\n", argv[0]); if (*argv[0] == '-') { /* Look to see whether we're the Main Thread */ IAmMainThread = rtsTrue; argv++; argc--; /* Strip off flag argument */ -/* fprintf(stderr, "I am Main Thread\n"); */ + // IF_PAR_DEBUG(verbose, + fprintf(stderr, "[%x] I am Main Thread\n", mytid); } /* * Grab the number of PEs out of the argument vector, and @@ -78,7 +84,6 @@ startupHaskell(int argc, char *argv[]) argv[1] = argv[0]; argv++; argc--; initEachPEHook(); /* HWL: hook to be execed on each PE */ - SynchroniseSystem(); #endif /* Set the RTS flags to default values. */ @@ -92,13 +97,10 @@ startupHaskell(int argc, char *argv[]) prog_argc = argc; prog_argv = argv; -#ifdef PAR - /* Initialise the parallel system -- before initHeap! */ - initParallelSystem(); - /* And start GranSim profiling if required: omitted for now - *if (Rtsflags.ParFlags.granSimStats) - *init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv); - */ +#if defined(PAR) + /* NB: this really must be done after processing the RTS flags */ + fprintf(stderr, "Synchronising system (%d PEs)\n", nPEs); + SynchroniseSystem(); // calls initParallelSystem etc #endif /* PAR */ /* initialise scheduler data structures (needs to be done before @@ -106,6 +108,16 @@ startupHaskell(int argc, char *argv[]) */ initScheduler(); +#if defined(GRAN) + /* And start GranSim profiling if required: */ + if (RtsFlags.GranFlags.GranSimStats.Full) + init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv); +#elif defined(PAR) + /* And start GUM profiling if required: */ + if (RtsFlags.ParFlags.ParStats.Full) + init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv); +#endif /* PAR || GRAN */ + /* initialize the storage manager */ initStorage(); @@ -179,12 +191,14 @@ shutdownHaskell(void) /* start timing the shutdown */ stat_startExit(); +#if !defined(GRAN) /* Finalize any remaining weak pointers */ finalizeWeakPointersNow(); +#endif #if defined(GRAN) - #error FixMe. - if (!RTSflags.GranFlags.granSimStats_suppressed) + /* end_gr_simulation prints global stats if requested -- HWL */ + if (!RtsFlags.GranFlags.GranSimStats.Suppressed) end_gr_simulation(); #endif @@ -220,8 +234,12 @@ shutdownHaskell(void) #endif rts_has_started_up=0; -} +#if defined(PAR) + shutdownParallelSystem(0); +#endif + +} /* * called from STG-land to exit the program @@ -230,7 +248,7 @@ shutdownHaskell(void) void stg_exit(I_ n) { -#ifdef PAR +#if 0 /* def PAR */ par_exit(n); #else exit(n); diff --git a/ghc/rts/RtsUtils.c b/ghc/rts/RtsUtils.c index 28fb2f7..5e53b7d 100644 --- a/ghc/rts/RtsUtils.c +++ b/ghc/rts/RtsUtils.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: RtsUtils.c,v 1.12 2000/01/13 12:40:16 simonmar Exp $ + * $Id: RtsUtils.c,v 1.13 2000/01/13 14:34:04 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -8,6 +8,7 @@ * ---------------------------------------------------------------------------*/ #include "Rts.h" +#include "RtsTypes.h" #include "RtsAPI.h" #include "RtsFlags.h" #include "Hooks.h" @@ -23,6 +24,10 @@ #include #endif +#ifdef HAVE_GETTIMEOFDAY +#include +#endif + #include /* variable-argument error function. */ @@ -182,7 +187,7 @@ resetGenSymZh(void) /* it's your funeral */ Get the current time as a string. Used in profiling reports. -------------------------------------------------------------------------- */ -#if defined(PROFILING) || defined(DEBUG) +#if defined(PROFILING) || defined(DEBUG) || defined(PAR) || defined(GRAN) char * time_str(void) { @@ -219,6 +224,44 @@ resetNonBlockingFd(int fd) #endif } +#if 0 +static ullong startTime = 0; + +/* used in a parallel setup */ +ullong +msTime(void) +{ +# if defined(HAVE_GETCLOCK) && !defined(alpha_TARGET_ARCH) + struct timespec tv; + + if (getclock(TIMEOFDAY, &tv) != 0) { + fflush(stdout); + fprintf(stderr, "Clock failed\n"); + stg_exit(EXIT_FAILURE); + } + return tv.tv_sec * LL(1000) + tv.tv_nsec / LL(1000000) - startTime; +# elif HAVE_GETTIMEOFDAY && !defined(alpha_TARGET_ARCH) + struct timeval tv; + + if (gettimeofday(&tv, NULL) != 0) { + fflush(stdout); + fprintf(stderr, "Clock failed\n"); + stg_exit(EXIT_FAILURE); + } + return tv.tv_sec * LL(1000) + tv.tv_usec / LL(1000) - startTime; +# else + time_t t; + if ((t = time(NULL)) == (time_t) -1) { + fflush(stdout); + fprintf(stderr, "Clock failed\n"); + stg_exit(EXIT_FAILURE); + } + return t * LL(1000) - startTime; +# endif +} +#endif + + /* ----------------------------------------------------------------------------- Print large numbers, with punctuation. -------------------------------------------------------------------------- */ diff --git a/ghc/rts/RtsUtils.h b/ghc/rts/RtsUtils.h index 8f5581c..79557e8 100644 --- a/ghc/rts/RtsUtils.h +++ b/ghc/rts/RtsUtils.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: RtsUtils.h,v 1.6 2000/01/12 15:15:17 simonmar Exp $ + * $Id: RtsUtils.h,v 1.7 2000/01/13 14:34:04 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -30,7 +30,8 @@ extern nat stg_strlen(char *str); /*Defined in Main.c, but made visible here*/ extern void stg_exit(I_ n) __attribute__((noreturn)); -char * time_str(void); - +char *time_str(void); char *ullong_format_string(ullong, char *, rtsBool); +//ullong msTime(void); + diff --git a/ghc/rts/Sanity.c b/ghc/rts/Sanity.c index 920530a..c0a602a 100644 --- a/ghc/rts/Sanity.c +++ b/ghc/rts/Sanity.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Sanity.c,v 1.14 1999/05/21 14:37:12 sof Exp $ + * $Id: Sanity.c,v 1.15 2000/01/13 14:34:04 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -14,17 +14,36 @@ * * ---------------------------------------------------------------------------*/ +//@menu +//* Includes:: +//* Macros:: +//* Stack sanity:: +//* Heap Sanity:: +//* TSO Sanity:: +//* Thread Queue Sanity:: +//* Blackhole Sanity:: +//@end menu + +//@node Includes, Macros +//@subsection Includes + #include "Rts.h" -#ifdef DEBUG +#ifdef DEBUG /* whole file */ #include "RtsFlags.h" #include "RtsUtils.h" #include "BlockAlloc.h" #include "Sanity.h" +//@node Macros, Stack sanity, Includes +//@subsection Macros + #define LOOKS_LIKE_PTR(r) (LOOKS_LIKE_STATIC_CLOSURE(r) || ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1))) +//@node Stack sanity, Heap Sanity, Macros +//@subsection Stack sanity + /* ----------------------------------------------------------------------------- Check stack sanity -------------------------------------------------------------------------- */ @@ -42,6 +61,7 @@ static StgOffset checkLargeBitmap( StgPtr payload, void checkClosureShallow( StgClosure* p ); +//@cindex checkSmallBitmap static StgOffset checkSmallBitmap( StgPtr payload, StgWord32 bitmap ) { @@ -56,7 +76,7 @@ checkSmallBitmap( StgPtr payload, StgWord32 bitmap ) return i; } - +//@cindex checkLargeBitmap static StgOffset checkLargeBitmap( StgPtr payload, StgLargeBitmap* large_bitmap ) { @@ -75,6 +95,7 @@ checkLargeBitmap( StgPtr payload, StgLargeBitmap* large_bitmap ) return i; } +//@cindex checkStackClosure StgOffset checkStackClosure( StgClosure* c ) { @@ -91,17 +112,28 @@ checkStackClosure( StgClosure* c ) case RET_BCO: /* small bitmap (<= 32 entries) */ case RET_SMALL: case RET_VEC_SMALL: + return 1 + checkSmallBitmap((StgPtr)c + 1,info->layout.bitmap); + case UPDATE_FRAME: case CATCH_FRAME: case STOP_FRAME: case SEQ_FRAME: - return 1 + checkSmallBitmap((StgPtr)c + 1,info->layout.bitmap); +#if defined(GRAN) + return 2 + +#else + return 1 + +#endif + checkSmallBitmap((StgPtr)c + 1,info->layout.bitmap); case RET_BIG: /* large bitmap (> 32 entries) */ case RET_VEC_BIG: return 1 + checkLargeBitmap((StgPtr)c + 1,info->layout.large_bitmap); case FUN: case FUN_STATIC: /* probably a slow-entry point return address: */ - return 1; +#if 0 && defined(GRAN) + return 2; +#else + return 1; +#endif default: /* if none of the above, maybe it's a closure which looks a * little like an infotable @@ -118,6 +150,7 @@ checkStackClosure( StgClosure* c ) * chunks. */ +//@cindex checkClosureShallow void checkClosureShallow( StgClosure* p ) { @@ -133,6 +166,7 @@ checkClosureShallow( StgClosure* p ) } /* check an individual stack object */ +//@cindex checkStackObject StgOffset checkStackObject( StgPtr sp ) { @@ -151,6 +185,7 @@ checkStackObject( StgPtr sp ) } /* check sections of stack between update frames */ +//@cindex checkStackChunk void checkStackChunk( StgPtr sp, StgPtr stack_end ) { @@ -160,9 +195,10 @@ checkStackChunk( StgPtr sp, StgPtr stack_end ) while (p < stack_end) { p += checkStackObject( p ); } - ASSERT( p == stack_end ); + // ASSERT( p == stack_end ); -- HWL } +//@cindex checkStackChunk StgOffset checkClosure( StgClosure* p ) { @@ -332,13 +368,17 @@ checkClosure( StgClosure* p ) case BLOCKED_FETCH: case FETCH_ME: case EVACUATED: - barf("checkClosure: unimplemented/strange closure type"); + barf("checkClosure: unimplemented/strange closure type %d", + info->type); default: - barf("checkClosure"); + barf("checkClosure (closure type %d)", info->type); } #undef LOOKS_LIKE_PTR } +//@node Heap Sanity, TSO Sanity, Stack sanity +//@subsection Heap Sanity + /* ----------------------------------------------------------------------------- Check Heap Sanity @@ -348,6 +388,7 @@ checkClosure( StgClosure* p ) all the objects in the remainder of the chain. -------------------------------------------------------------------------- */ +//@cindex checkHeap extern void checkHeap(bdescr *bd, StgPtr start) { @@ -377,6 +418,7 @@ checkHeap(bdescr *bd, StgPtr start) } } +//@cindex checkChain extern void checkChain(bdescr *bd) { @@ -387,6 +429,7 @@ checkChain(bdescr *bd) } /* check stack - making sure that update frames are linked correctly */ +//@cindex checkStack void checkStack(StgPtr sp, StgPtr stack_end, StgUpdateFrame* su ) { @@ -415,6 +458,10 @@ checkStack(StgPtr sp, StgPtr stack_end, StgUpdateFrame* su ) ASSERT(stgCast(StgPtr,su) == stack_end); } +//@node TSO Sanity, Thread Queue Sanity, Heap Sanity +//@subsection TSO Sanity + +//@cindex checkTSO extern void checkTSO(StgTSO *tso) { @@ -437,6 +484,69 @@ checkTSO(StgTSO *tso) checkStack(sp, stack_end, su); } +#if defined(GRAN) +//@cindex checkTSOsSanity +extern void +checkTSOsSanity(void) { + nat i, tsos; + StgTSO *tso; + + belch("Checking sanity of all runnable TSOs:"); + + for (i=0, tsos=0; ilink) { + fprintf(stderr, "TSO %p on PE %d ...", tso, i); + checkTSO(tso); + fprintf(stderr, "OK, "); + tsos++; + } + } + + belch(" checked %d TSOs on %d PEs; ok\n", tsos, RtsFlags.GranFlags.proc); +} + +//@node Thread Queue Sanity, Blackhole Sanity, TSO Sanity +//@subsection Thread Queue Sanity + +// still GRAN only + +//@cindex checkThreadQSanity +extern rtsBool +checkThreadQSanity (PEs proc, rtsBool check_TSO_too) +{ + StgTSO *tso, *prev; + + /* the NIL value for TSOs is END_TSO_QUEUE; thus, finding NULL is an error */ + ASSERT(run_queue_hds[proc]!=NULL); + ASSERT(run_queue_tls[proc]!=NULL); + /* if either head or tail is NIL then the other one must be NIL, too */ + ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE || run_queue_tls[proc]==END_TSO_QUEUE); + ASSERT(run_queue_tls[proc]!=END_TSO_QUEUE || run_queue_hds[proc]==END_TSO_QUEUE); + for (tso=run_queue_hds[proc], prev=END_TSO_QUEUE; + tso!=END_TSO_QUEUE; + prev=tso, tso=tso->link) { + ASSERT((prev!=END_TSO_QUEUE || tso==run_queue_hds[proc]) && + (prev==END_TSO_QUEUE || prev->link==tso)); + if (check_TSO_too) + checkTSO(tso); + } + ASSERT(prev==run_queue_tls[proc]); +} + +//@cindex checkThreadQsSanity +extern rtsBool +checkThreadQsSanity (rtsBool check_TSO_too) +{ + PEs p; + + for (p=0; psu; do { @@ -474,4 +586,26 @@ rtsBool isBlackhole( StgTSO* tso, StgClosure* p ) } while (1); } +//@node Index, , Blackhole Sanity +//@subsection Index + +//@index +//* checkChain:: @cindex\s-+checkChain +//* checkClosureShallow:: @cindex\s-+checkClosureShallow +//* checkHeap:: @cindex\s-+checkHeap +//* checkLargeBitmap:: @cindex\s-+checkLargeBitmap +//* checkSmallBitmap:: @cindex\s-+checkSmallBitmap +//* checkStack:: @cindex\s-+checkStack +//* checkStackChunk:: @cindex\s-+checkStackChunk +//* checkStackChunk:: @cindex\s-+checkStackChunk +//* checkStackClosure:: @cindex\s-+checkStackClosure +//* checkStackObject:: @cindex\s-+checkStackObject +//* checkTSO:: @cindex\s-+checkTSO +//* checkTSOsSanity:: @cindex\s-+checkTSOsSanity +//* checkThreadQSanity:: @cindex\s-+checkThreadQSanity +//* checkThreadQsSanity:: @cindex\s-+checkThreadQsSanity +//* isBlackhole:: @cindex\s-+isBlackhole +//@end index + #endif /* DEBUG */ + diff --git a/ghc/rts/Sanity.h b/ghc/rts/Sanity.h index 6ab9c84..1bd2157 100644 --- a/ghc/rts/Sanity.h +++ b/ghc/rts/Sanity.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Sanity.h,v 1.4 1999/02/05 16:02:52 simonm Exp $ + * $Id: Sanity.h,v 1.5 2000/01/13 14:34:04 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -13,6 +13,11 @@ extern void checkHeap ( bdescr *bd, StgPtr start ); extern void checkChain ( bdescr *bd ); extern void checkStack ( StgPtr sp, StgPtr stack_end, StgUpdateFrame* su ); extern void checkTSO ( StgTSO* tso ); +#if defined(GRAN) +extern void checkTSOsSanity(void); +extern rtsBool checkThreadQSanity (PEs proc, rtsBool check_TSO_too); +extern rtsBool checkThreadQsSanity (rtsBool check_TSO_too); +#endif extern StgOffset checkClosure( StgClosure* p ); diff --git a/ghc/rts/Schedule.c b/ghc/rts/Schedule.c index 1a96f87..d87f7ab 100644 --- a/ghc/rts/Schedule.c +++ b/ghc/rts/Schedule.c @@ -1,11 +1,18 @@ -/* ----------------------------------------------------------------------------- - * $Id: Schedule.c,v 1.40 2000/01/13 10:37:31 simonmar Exp $ +/* --------------------------------------------------------------------------- + * $Id: Schedule.c,v 1.41 2000/01/13 14:34:05 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * * Scheduler * - * ---------------------------------------------------------------------------*/ + * The main scheduling code in GranSim is quite different from that in std + * (concurrent) Haskell: while concurrent Haskell just iterates over the + * threads in the runnable queue, GranSim is event driven, i.e. it iterates + * over the events in the global event queue. -- HWL + * --------------------------------------------------------------------------*/ + +//@node Main scheduling code, , , +//@section Main scheduling code /* Version with scheduler monitor support for SMPs. @@ -27,6 +34,23 @@ SDM & KH, 10/99 */ +//@menu +//* Includes:: +//* Variables and Data structures:: +//* Prototypes:: +//* Main scheduling loop:: +//* Suspend and Resume:: +//* Run queue code:: +//* Garbage Collextion Routines:: +//* Blocking Queue Routines:: +//* Exception Handling Routines:: +//* Debugging Routines:: +//* Index:: +//@end menu + +//@node Includes, Variables and Data structures, Main scheduling code, Main scheduling code +//@subsection Includes + #include "Rts.h" #include "SchedAPI.h" #include "RtsUtils.h" @@ -48,9 +72,21 @@ #include "Sanity.h" #include "Stats.h" #include "Sparks.h" +#if defined(GRAN) || defined(PAR) +# include "GranSimRts.h" +# include "GranSim.h" +# include "ParallelRts.h" +# include "Parallel.h" +# include "ParallelDebug.h" +# include "FetchMe.h" +# include "HLC.h" +#endif #include +//@node Variables and Data structures, Prototypes, Includes, Main scheduling code +//@subsection Variables and Data structures + /* Main threads: * * These are the threads which clients have requested that we run. @@ -65,6 +101,7 @@ * * Main threads information is kept in a linked list: */ +//@cindex StgMainThread typedef struct StgMainThread_ { StgTSO * tso; SchedulerStatus stat; @@ -83,6 +120,47 @@ static StgMainThread *main_threads; /* Thread queues. * Locks required: sched_mutex. */ + +#if DEBUG +char *whatNext_strs[] = { + "ThreadEnterGHC", + "ThreadRunGHC", + "ThreadEnterHugs", + "ThreadKilled", + "ThreadComplete" +}; + +char *threadReturnCode_strs[] = { + "HeapOverflow", /* might also be StackOverflow */ + "StackOverflow", + "ThreadYielding", + "ThreadBlocked", + "ThreadFinished" +}; +#endif + +#if defined(GRAN) + +StgTSO* ActiveTSO = NULL; /* for assigning system costs; GranSim-Light only */ +// rtsTime TimeOfNextEvent, EndOfTimeSlice; now in GranSim.c + +/* + In GranSim we have a runable and a blocked queue for each processor. + In order to minimise code changes new arrays run_queue_hds/tls + are created. run_queue_hd is then a short cut (macro) for + run_queue_hds[CurrentProc] (see GranSim.h). + -- HWL +*/ +StgTSO *run_queue_hds[MAX_PROC], *run_queue_tls[MAX_PROC]; +StgTSO *blocked_queue_hds[MAX_PROC], *blocked_queue_tls[MAX_PROC]; +StgTSO *ccalling_threadss[MAX_PROC]; + +#else /* !GRAN */ + +//@cindex run_queue_hd +//@cindex run_queue_tl +//@cindex blocked_queue_hd +//@cindex blocked_queue_tl StgTSO *run_queue_hd, *run_queue_tl; StgTSO *blocked_queue_hd, *blocked_queue_tl; @@ -93,6 +171,7 @@ static StgTSO *suspended_ccalling_threads; static void GetRoots(void); static StgTSO *threadStackOverflow(StgTSO *tso); +#endif /* KH: The following two flags are shared memory locations. There is no need to lock them, since they are only unset at the end of a scheduler @@ -100,14 +179,17 @@ static StgTSO *threadStackOverflow(StgTSO *tso); */ /* flag set by signal handler to precipitate a context switch */ +//@cindex context_switch nat context_switch; /* if this flag is set as well, give up execution */ +//@cindex interrupted rtsBool interrupted; /* Next thread ID to allocate. * Locks required: sched_mutex */ +//@cindex next_thread_id StgThreadID next_thread_id = 1; /* @@ -132,10 +214,19 @@ StgThreadID next_thread_id = 1; * Locks required: sched_mutex. */ #ifdef SMP -Capability *free_capabilities; /* Available capabilities for running threads */ -nat n_free_capabilities; /* total number of available capabilities */ +//@cindex free_capabilities +//@cindex n_free_capabilities +Capability *free_capabilities; /* Available capabilities for running threads */ +nat n_free_capabilities; /* total number of available capabilities */ +#else +//@cindex MainRegTable +Capability MainRegTable; /* for non-SMP, we have one global capability */ +#endif + +#if defined(GRAN) +StgTSO *CurrentTSOs[MAX_PROC]; #else -Capability MainRegTable; /* for non-SMP, we have one global capability */ +StgTSO *CurrentTSO; #endif rtsBool ready_to_gc; @@ -143,6 +234,7 @@ rtsBool ready_to_gc; /* All our current task ids, saved in case we need to kill them later. */ #ifdef SMP +//@cindex task_ids task_info *task_ids; #endif @@ -157,6 +249,10 @@ static void sched_belch(char *s, ...); #endif #ifdef SMP +//@cindex sched_mutex +//@cindex term_mutex +//@cindex thread_ready_cond +//@cindex gc_pending_cond pthread_mutex_t sched_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t term_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t thread_ready_cond = PTHREAD_COND_INITIALIZER; @@ -165,7 +261,35 @@ pthread_cond_t gc_pending_cond = PTHREAD_COND_INITIALIZER; nat await_death; #endif -/* ----------------------------------------------------------------------------- +#if defined(PAR) +StgTSO *LastTSO; +rtsTime TimeOfLastYield; +#endif + +/* + * The thread state for the main thread. +// ToDo: check whether not needed any more +StgTSO *MainTSO; + */ + + +//@node Prototypes, Main scheduling loop, Variables and Data structures, Main scheduling code +//@subsection Prototypes + +#if 0 && defined(GRAN) +// ToDo: replace these with macros +static /* inline */ void add_to_run_queue(StgTSO* tso); +static /* inline */ void push_on_run_queue(StgTSO* tso); +static /* inline */ StgTSO *take_off_run_queue(StgTSO *tso); + +/* Thread management */ +void initScheduler(void); +#endif + +//@node Main scheduling loop, Suspend and Resume, Prototypes, Main scheduling code +//@subsection Main scheduling loop + +/* --------------------------------------------------------------------------- Main scheduling loop. We use round-robin scheduling, each thread returning to the @@ -184,18 +308,35 @@ nat await_death; * waiting for work, or * waiting for a GC to complete. - -------------------------------------------------------------------------- */ - + ------------------------------------------------------------------------ */ +//@cindex schedule static void schedule( void ) { StgTSO *t; Capability *cap; StgThreadReturnCode ret; +#if defined(GRAN) + rtsEvent *event; +#elif defined(PAR) + rtsSpark spark; + StgTSO *tso; + GlobalTaskId pe; +#endif ACQUIRE_LOCK(&sched_mutex); +#if defined(GRAN) +# error ToDo: implement GranSim scheduler +#elif defined(PAR) + while (!GlobalStopPending) { /* GlobalStopPending set in par_exit */ + + if (PendingFetches != END_BF_QUEUE) { + processFetches(); + } +#else while (1) { +#endif /* If we're interrupted (the user pressed ^C, or some other * termination condition occurred), kill all the currently running @@ -267,7 +408,7 @@ schedule( void ) * number of threads in the run queue equal to the number of * free capabilities. */ -#if defined(SMP) || defined(PAR) +#if defined(SMP) { nat n = n_free_capabilities; StgTSO *tso = run_queue_hd; @@ -284,11 +425,12 @@ schedule( void ) if (spark == NULL) { break; /* no more sparks in the pool */ } else { + // I'd prefer this to be done in activateSpark -- HWL StgTSO *tso; tso = createThread_(RtsFlags.GcFlags.initialStkSize, rtsTrue); pushClosure(tso,spark); PUSH_ON_RUN_QUEUE(tso); -#ifdef ToDo +#ifdef PAR advisory_thread_count++; #endif @@ -304,7 +446,7 @@ schedule( void ) pthread_cond_signal(&thread_ready_cond); } } -#endif /* SMP || PAR */ +#endif /* SMP */ /* Check whether any waiting threads need to be woken up. If the * run queue is empty, and there are no other tasks running, we @@ -375,10 +517,114 @@ schedule( void ) IF_DEBUG(scheduler, sched_belch("work now available")); } #endif + +#if defined(GRAN) +# error ToDo: implement GranSim scheduler +#elif defined(PAR) + // ToDo: phps merge with spark activation above + /* check whether we have local work and send requests if we have none */ + if (run_queue_hd == END_TSO_QUEUE) { /* no runnable threads */ + /* :-[ no local threads => look out for local sparks */ + if (advisory_thread_count < RtsFlags.ParFlags.maxThreads && + (pending_sparks_hd[REQUIRED_POOL] < pending_sparks_tl[REQUIRED_POOL] || + pending_sparks_hd[ADVISORY_POOL] < pending_sparks_tl[ADVISORY_POOL])) { + /* + * ToDo: add GC code check that we really have enough heap afterwards!! + * Old comment: + * If we're here (no runnable threads) and we have pending + * sparks, we must have a space problem. Get enough space + * to turn one of those pending sparks into a + * thread... + */ + + spark = findSpark(); /* get a spark */ + if (spark != (rtsSpark) NULL) { + tso = activateSpark(spark); /* turn the spark into a thread */ + IF_PAR_DEBUG(verbose, + belch("== [%x] schedule: Created TSO %p (%d); %d threads active", + mytid, tso, tso->id, advisory_thread_count)); + + if (tso==END_TSO_QUEUE) { // failed to activate spark -> back to loop + belch("^^ failed to activate spark"); + goto next_thread; + } // otherwise fall through & pick-up new tso + } else { + IF_PAR_DEBUG(verbose, + belch("^^ no local sparks (spark pool contains only NFs: %d)", + spark_queue_len(ADVISORY_POOL))); + goto next_thread; + } + } else + /* =8-[ no local sparks => look for work on other PEs */ + { + /* + * We really have absolutely no work. Send out a fish + * (there may be some out there already), and wait for + * something to arrive. We clearly can't run any threads + * until a SCHEDULE or RESUME arrives, and so that's what + * we're hoping to see. (Of course, we still have to + * respond to other types of messages.) + */ + if (//!fishing && + outstandingFishes < RtsFlags.ParFlags.maxFishes ) { // && + // (last_fish_arrived_at+FISH_DELAY < CURRENT_TIME)) { + /* fishing set in sendFish, processFish; + avoid flooding system with fishes via delay */ + pe = choosePE(); + sendFish(pe, mytid, NEW_FISH_AGE, NEW_FISH_HISTORY, + NEW_FISH_HUNGER); + } + + processMessages(); + goto next_thread; + // ReSchedule(0); + } + } else if (PacketsWaiting()) { /* Look for incoming messages */ + processMessages(); + } + + /* Now we are sure that we have some work available */ + ASSERT(run_queue_hd != END_TSO_QUEUE); + /* Take a thread from the run queue, if we have work */ + t = take_off_run_queue(END_TSO_QUEUE); + + /* ToDo: write something to the log-file + if (RTSflags.ParFlags.granSimStats && !sameThread) + DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd); + */ + + CurrentTSO = t; + + IF_DEBUG(scheduler, belch("--^^ %d sparks on [%#x] (hd=%x; tl=%x; lim=%x)", + spark_queue_len(ADVISORY_POOL), CURRENT_PROC, + pending_sparks_hd[ADVISORY_POOL], + pending_sparks_tl[ADVISORY_POOL], + pending_sparks_lim[ADVISORY_POOL])); + + IF_DEBUG(scheduler, belch("--== %d threads on [%#x] (hd=%x; tl=%x)", + run_queue_len(), CURRENT_PROC, + run_queue_hd, run_queue_tl)); + + if (t != LastTSO) { + /* + we are running a different TSO, so write a schedule event to log file + NB: If we use fair scheduling we also have to write a deschedule + event for LastTSO; with unfair scheduling we know that the + previous tso has blocked whenever we switch to another tso, so + we don't need it in GUM for now + */ + DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC, + GR_SCHEDULE, t, (StgClosure *)NULL, 0, 0); + + } + +#else /* !GRAN && !PAR */ /* grab a thread from the run queue */ t = POP_RUN_QUEUE(); + +#endif /* grab a capability */ @@ -403,6 +649,7 @@ schedule( void ) IF_DEBUG(scheduler,sched_belch("running thread %d", t->id)); + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ /* Run the current thread */ switch (cap->rCurrentTSO->whatNext) { @@ -433,6 +680,7 @@ schedule( void ) default: barf("schedule: invalid whatNext field"); } + /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ /* Costs for the scheduler are assigned to CCS_SYSTEM */ #ifdef PROFILING @@ -487,6 +735,14 @@ schedule( void ) break; case ThreadYielding: +#if defined(GRAN) + IF_DEBUG(gran, + DumpGranEvent(GR_DESCHEDULE, t)); + globalGranStats.tot_yields++; +#elif defined(PAR) + IF_DEBUG(par, + DumpGranEvent(GR_DESCHEDULE, t)); +#endif /* put the thread back on the run queue. Then, if we're ready to * GC, check whether this is the last task to stop. If so, wake * up the GC thread. getThread will block during a GC until the @@ -507,6 +763,13 @@ schedule( void ) break; case ThreadBlocked: +#if defined(GRAN) +# error ToDo: implement GranSim scheduler +#elif defined(PAR) + IF_DEBUG(par, + DumpGranEvent(GR_DESCHEDULE, t)); +#else +#endif /* don't need to do anything. Either the thread is blocked on * I/O, in which case we'll have called addToBlockedQueue * previously, or it's blocked on an MVar or Blackhole, in which @@ -527,6 +790,13 @@ schedule( void ) */ IF_DEBUG(scheduler,belch("thread %ld finished", t->id)); t->whatNext = ThreadComplete; +#if defined(GRAN) + // ToDo: endThread(t, CurrentProc); // clean-up the thread +#elif defined(PAR) + advisory_thread_count--; + if (RtsFlags.ParFlags.ParStats.Full) + DumpEndEvent(CURRENT_PROC, t, rtsFalse /* not mandatory */); +#endif break; default: @@ -540,10 +810,11 @@ schedule( void ) #endif #ifdef SMP - if (ready_to_gc && n_free_capabilities == RtsFlags.ParFlags.nNodes) { + if (ready_to_gc && n_free_capabilities == RtsFlags.ParFlags.nNodes) #else - if (ready_to_gc) { + if (ready_to_gc) #endif + { /* everybody back, start the GC. * Could do it in this thread, or signal a condition var * to do it in another thread. Either way, we need to @@ -558,10 +829,26 @@ schedule( void ) pthread_cond_broadcast(&gc_pending_cond); #endif } +#if defined(GRAN) + next_thread: + IF_GRAN_DEBUG(unused, + print_eventq(EventHd)); + + event = get_next_event(); + +#elif defined(PAR) + next_thread: + /* ToDo: wait for next message to arrive rather than busy wait */ + +#else /* GRAN */ + /* not any more + next_thread: + t = take_off_run_queue(END_TSO_QUEUE); + */ +#endif /* GRAN */ } /* end of while(1) */ } - /* A hack for Hugs concurrency support. Needs sanitisation (?) */ void deleteAllThreads ( void ) { @@ -577,8 +864,12 @@ void deleteAllThreads ( void ) blocked_queue_hd = blocked_queue_tl = END_TSO_QUEUE; } +/* startThread and insertThread are now in GranSim.c -- HWL */ -/* ----------------------------------------------------------------------------- +//@node Suspend and Resume, Run queue code, Main scheduling loop, Main scheduling code +//@subsection Suspend and Resume + +/* --------------------------------------------------------------------------- * Suspending & resuming Haskell threads. * * When making a "safe" call to C (aka _ccall_GC), the task gives back @@ -591,7 +882,7 @@ void deleteAllThreads ( void ) * duration of the call, on the susepended_ccalling_threads queue. We * give out a token to the task, which it can use to resume the thread * on return from the C function. - * -------------------------------------------------------------------------- */ + * ------------------------------------------------------------------------- */ StgInt suspendThread( Capability *cap ) @@ -660,17 +951,18 @@ resumeThread( StgInt tok ) return cap; } -/* ----------------------------------------------------------------------------- + +/* --------------------------------------------------------------------------- * Static functions - * -------------------------------------------------------------------------- */ + * ------------------------------------------------------------------------ */ static void unblockThread(StgTSO *tso); -/* ----------------------------------------------------------------------------- +/* --------------------------------------------------------------------------- * Comparing Thread ids. * * This is used from STG land in the implementation of the * instances of Eq/Ord for ThreadIds. - * -------------------------------------------------------------------------- */ + * ------------------------------------------------------------------------ */ int cmp_thread(const StgTSO *tso1, const StgTSO *tso2) { @@ -682,7 +974,7 @@ int cmp_thread(const StgTSO *tso1, const StgTSO *tso2) return 0; } -/* ----------------------------------------------------------------------------- +/* --------------------------------------------------------------------------- Create a new thread. The new thread starts with the given stack size. Before the @@ -692,19 +984,50 @@ int cmp_thread(const StgTSO *tso1, const StgTSO *tso2) createGenThread() and createIOThread() (in SchedAPI.h) are convenient packaged versions of this function. - -------------------------------------------------------------------------- */ + ------------------------------------------------------------------------ */ +//@cindex createThread +#if defined(GRAN) +/* currently pri (priority) is only used in a GRAN setup -- HWL */ +StgTSO * +createThread(nat stack_size, StgInt pri) +{ + return createThread_(stack_size, rtsFalse, pri); +} +static StgTSO * +createThread_(nat size, rtsBool have_lock, StgInt pri) +{ +#else StgTSO * -createThread(nat size) +createThread(nat stack_size) { - return createThread_(size, rtsFalse); + return createThread_(stack_size, rtsFalse); } static StgTSO * createThread_(nat size, rtsBool have_lock) { - StgTSO *tso; - nat stack_size; +#endif + StgTSO *tso; + nat stack_size; + + /* First check whether we should create a thread at all */ +#if defined(PAR) + /* check that no more than RtsFlags.ParFlags.maxThreads threads are created */ + if (advisory_thread_count >= RtsFlags.ParFlags.maxThreads) { + threadsIgnored++; + belch("{createThread}Daq ghuH: refusing to create another thread; no more than %d threads allowed (currently %d)", + RtsFlags.ParFlags.maxThreads, advisory_thread_count); + return END_TSO_QUEUE; + } + threadsCreated++; +#endif + +#if defined(GRAN) + ASSERT(!RtsFlags.GranFlags.Light || CurrentProc==0); +#endif + + // ToDo: check whether size = stack_size - TSO_STRUCT_SIZEW /* catch ridiculously small stack sizes */ if (size < MIN_STACK_WORDS + TSO_STRUCT_SIZEW) { @@ -716,9 +1039,13 @@ createThread_(nat size, rtsBool have_lock) stack_size = size - TSO_STRUCT_SIZEW; + // Hmm, this CCS_MAIN is not protected by a PROFILING cpp var; SET_HDR(tso, &TSO_info, CCS_MAIN); - tso->whatNext = ThreadEnterGHC; - +#if defined(GRAN) + SET_GRAN_HDR(tso, ThisPE); +#endif + tso->whatNext = ThreadEnterGHC; + /* tso->id needs to be unique. For now we use a heavyweight mutex to protect the increment operation on next_thread_id. In future, we could use an atomic increment instead. @@ -746,13 +1073,69 @@ createThread_(nat size, rtsBool have_lock) SET_HDR((StgClosure*)tso->sp,(StgInfoTable *)&stg_stop_thread_info,CCS_MAIN); tso->su = (StgUpdateFrame*)tso->sp; + IF_DEBUG(scheduler,belch("---- Initialised TSO %ld (%p), stack size = %lx words", + tso->id, tso, tso->stack_size)); + + // ToDo: check this +#if defined(GRAN) + tso->link = END_TSO_QUEUE; + /* uses more flexible routine in GranSim */ + insertThread(tso, CurrentProc); +#else + add_to_run_queue(tso); +#endif + +#if defined(GRAN) + tso->gran.pri = pri; + tso->gran.magic = TSO_MAGIC; // debugging only + tso->gran.sparkname = 0; + tso->gran.startedat = CURRENT_TIME; + tso->gran.exported = 0; + tso->gran.basicblocks = 0; + tso->gran.allocs = 0; + tso->gran.exectime = 0; + tso->gran.fetchtime = 0; + tso->gran.fetchcount = 0; + tso->gran.blocktime = 0; + tso->gran.blockcount = 0; + tso->gran.blockedat = 0; + tso->gran.globalsparks = 0; + tso->gran.localsparks = 0; + if (RtsFlags.GranFlags.Light) + tso->gran.clock = Now; /* local clock */ + else + tso->gran.clock = 0; + + IF_DEBUG(gran,printTSO(tso)); +#elif defined(PAR) + tso->par.sparkname = 0; + tso->par.startedat = CURRENT_TIME; + tso->par.exported = 0; + tso->par.basicblocks = 0; + tso->par.allocs = 0; + tso->par.exectime = 0; + tso->par.fetchtime = 0; + tso->par.fetchcount = 0; + tso->par.blocktime = 0; + tso->par.blockcount = 0; + tso->par.blockedat = 0; + tso->par.globalsparks = 0; + tso->par.localsparks = 0; +#endif + +#if defined(GRAN) + globalGranStats.tot_threads_created++; + globalGranStats.threads_created_on_PE[CurrentProc]++; + globalGranStats.tot_sq_len += spark_queue_len(CurrentProc); + globalGranStats.tot_sq_probes++; +#endif + IF_DEBUG(scheduler,sched_belch("created thread %ld, stack size = %lx words", tso->id, tso->stack_size)); return tso; } - -/* ----------------------------------------------------------------------------- +/* --------------------------------------------------------------------------- * scheduleThread() * * scheduleThread puts a thread on the head of the runnable queue. @@ -760,7 +1143,7 @@ createThread_(nat size, rtsBool have_lock) * The caller of scheduleThread must create the thread using e.g. * createThread and push an appropriate closure * on this thread's stack before the scheduler is invoked. - * -------------------------------------------------------------------------- */ + * ------------------------------------------------------------------------ */ void scheduleThread(StgTSO *tso) @@ -779,14 +1162,13 @@ scheduleThread(StgTSO *tso) RELEASE_LOCK(&sched_mutex); } - -/* ----------------------------------------------------------------------------- +/* --------------------------------------------------------------------------- * startTasks() * * Start up Posix threads to run each of the scheduler tasks. * I believe the task ids are not needed in the system as defined. - * KH @ 25/10/99 - * -------------------------------------------------------------------------- */ + * KH @ 25/10/99 + * ------------------------------------------------------------------------ */ #ifdef SMP static void * @@ -797,7 +1179,7 @@ taskStart( void *arg STG_UNUSED ) } #endif -/* ----------------------------------------------------------------------------- +/* --------------------------------------------------------------------------- * initScheduler() * * Initialise the scheduler. This resets all the queues - if the @@ -805,7 +1187,7 @@ taskStart( void *arg STG_UNUSED ) * next pass. * * This now calls startTasks(), so should only be called once! KH @ 25/10/99 - * -------------------------------------------------------------------------- */ + * ------------------------------------------------------------------------ */ #ifdef SMP static void @@ -819,12 +1201,26 @@ term_handler(int sig STG_UNUSED) } #endif -void initScheduler(void) +//@cindex initScheduler +void +initScheduler(void) { +#if defined(GRAN) + nat i; + + for (i=0; i<=MAX_PROC; i++) { + run_queue_hds[i] = END_TSO_QUEUE; + run_queue_tls[i] = END_TSO_QUEUE; + blocked_queue_hds[i] = END_TSO_QUEUE; + blocked_queue_tls[i] = END_TSO_QUEUE; + ccalling_threadss[i] = END_TSO_QUEUE; + } +#else run_queue_hd = END_TSO_QUEUE; run_queue_tl = END_TSO_QUEUE; blocked_queue_hd = END_TSO_QUEUE; blocked_queue_tl = END_TSO_QUEUE; +#endif suspended_ccalling_threads = END_TSO_QUEUE; @@ -1009,42 +1405,127 @@ waitThread(StgTSO *tso, /*out*/StgClosure **ret) return stat; } - -/* ----------------------------------------------------------------------------- - Debugging: why is a thread blocked - -------------------------------------------------------------------------- */ -#ifdef DEBUG -void printThreadBlockage(StgTSO *tso) +//@node Run queue code, Garbage Collextion Routines, Suspend and Resume, Main scheduling code +//@subsection Run queue code + +#if 0 +/* + NB: In GranSim we have many run queues; run_queue_hd is actually a macro + unfolding to run_queue_hds[CurrentProc], thus CurrentProc is an + implicit global variable that has to be correct when calling these + fcts -- HWL +*/ + +/* Put the new thread on the head of the runnable queue. + * The caller of createThread better push an appropriate closure + * on this thread's stack before the scheduler is invoked. + */ +static /* inline */ void +add_to_run_queue(tso) +StgTSO* tso; { - switch (tso->why_blocked) { - case BlockedOnRead: - fprintf(stderr,"blocked on read from fd %d", tso->block_info.fd); - break; - case BlockedOnWrite: - fprintf(stderr,"blocked on write to fd %d", tso->block_info.fd); - break; - case BlockedOnDelay: - fprintf(stderr,"blocked on delay of %d ms", tso->block_info.delay); - break; - case BlockedOnMVar: - fprintf(stderr,"blocked on an MVar"); - break; - case BlockedOnException: - fprintf(stderr,"blocked on delivering an exception to thread %d", - tso->block_info.tso->id); - break; - case BlockedOnBlackHole: - fprintf(stderr,"blocked on a black hole"); - break; - case NotBlocked: - fprintf(stderr,"not blocked"); - break; + ASSERT(tso!=run_queue_hd && tso!=run_queue_tl); + tso->link = run_queue_hd; + run_queue_hd = tso; + if (run_queue_tl == END_TSO_QUEUE) { + run_queue_tl = tso; } } -#endif -/* ----------------------------------------------------------------------------- +/* Put the new thread at the end of the runnable queue. */ +static /* inline */ void +push_on_run_queue(tso) +StgTSO* tso; +{ + ASSERT(get_itbl((StgClosure *)tso)->type == TSO); + ASSERT(run_queue_hd!=NULL && run_queue_tl!=NULL); + ASSERT(tso!=run_queue_hd && tso!=run_queue_tl); + if (run_queue_hd == END_TSO_QUEUE) { + run_queue_hd = tso; + } else { + run_queue_tl->link = tso; + } + run_queue_tl = tso; +} + +/* + Should be inlined because it's used very often in schedule. The tso + argument is actually only needed in GranSim, where we want to have the + possibility to schedule *any* TSO on the run queue, irrespective of the + actual ordering. Therefore, if tso is not the nil TSO then we traverse + the run queue and dequeue the tso, adjusting the links in the queue. +*/ +//@cindex take_off_run_queue +static /* inline */ StgTSO* +take_off_run_queue(StgTSO *tso) { + StgTSO *t, *prev; + + /* + qetlaHbogh Qu' ngaSbogh ghomDaQ {tso} yIteq! + + if tso is specified, unlink that tso from the run_queue (doesn't have + to be at the beginning of the queue); GranSim only + */ + if (tso!=END_TSO_QUEUE) { + /* find tso in queue */ + for (t=run_queue_hd, prev=END_TSO_QUEUE; + t!=END_TSO_QUEUE && t!=tso; + prev=t, t=t->link) + /* nothing */ ; + ASSERT(t==tso); + /* now actually dequeue the tso */ + if (prev!=END_TSO_QUEUE) { + ASSERT(run_queue_hd!=t); + prev->link = t->link; + } else { + /* t is at beginning of thread queue */ + ASSERT(run_queue_hd==t); + run_queue_hd = t->link; + } + /* t is at end of thread queue */ + if (t->link==END_TSO_QUEUE) { + ASSERT(t==run_queue_tl); + run_queue_tl = prev; + } else { + ASSERT(run_queue_tl!=t); + } + t->link = END_TSO_QUEUE; + } else { + /* take tso from the beginning of the queue; std concurrent code */ + t = run_queue_hd; + if (t != END_TSO_QUEUE) { + run_queue_hd = t->link; + t->link = END_TSO_QUEUE; + if (run_queue_hd == END_TSO_QUEUE) { + run_queue_tl = END_TSO_QUEUE; + } + } + } + return t; +} + +#endif /* 0 */ + +nat +run_queue_len(void) +{ + nat i; + StgTSO *tso; + + for (i=0, tso=run_queue_hd; + tso != END_TSO_QUEUE; + i++, tso=tso->link) + /* nothing */ + + return i; +} + + +//@node Garbage Collextion Routines, Blocking Queue Routines, Run queue code, Main scheduling code +//@subsection Garbage Collextion Routines + +/* --------------------------------------------------------------------------- Where are the roots that we know about? - all the threads on the runnable queue @@ -1052,7 +1533,7 @@ void printThreadBlockage(StgTSO *tso) - all the thread currently executing a _ccall_GC - all the "main threads" - -------------------------------------------------------------------------- */ + ------------------------------------------------------------------------ */ /* This has to be protected either by the scheduler monitor, or by the garbage collection monitor (probably the latter). @@ -1062,12 +1543,36 @@ void printThreadBlockage(StgTSO *tso) static void GetRoots(void) { StgMainThread *m; + nat i; + +#if defined(GRAN) + for (i=0; i<=RtsFlags.GranFlags.proc; i++) { + if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL))) + run_queue_hds[i] = (StgTSO *)MarkRoot((StgClosure *)run_queue_hds[i]); + if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL))) + run_queue_tls[i] = (StgTSO *)MarkRoot((StgClosure *)run_queue_tls[i]); + + if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL))) + blocked_queue_hds[i] = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hds[i]); + if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL))) + blocked_queue_tls[i] = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tls[i]); + if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL))) + ccalling_threadss[i] = (StgTSO *)MarkRoot((StgClosure *)ccalling_threadss[i]); + } + markEventQueue(); +#elif defined(PAR) + run_queue_hd = (StgTSO *)MarkRoot((StgClosure *)run_queue_hd); + run_queue_tl = (StgTSO *)MarkRoot((StgClosure *)run_queue_tl); + blocked_queue_hd = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hd); + blocked_queue_tl = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tl); +#else run_queue_hd = (StgTSO *)MarkRoot((StgClosure *)run_queue_hd); run_queue_tl = (StgTSO *)MarkRoot((StgClosure *)run_queue_tl); blocked_queue_hd = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hd); blocked_queue_tl = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tl); +#endif for (m = main_threads; m != NULL; m = m->link) { m->tso = (StgTSO *)MarkRoot((StgClosure *)m->tso); @@ -1205,10 +1710,93 @@ threadStackOverflow(StgTSO *tso) return dest; } -/* ----------------------------------------------------------------------------- +//@node Blocking Queue Routines, Exception Handling Routines, Garbage Collextion Routines, Main scheduling code +//@subsection Blocking Queue Routines + +/* --------------------------------------------------------------------------- Wake up a queue that was blocked on some resource. - -------------------------------------------------------------------------- */ + ------------------------------------------------------------------------ */ + +// ToDo: check push_on_run_queue vs. PUSH_ON_RUN_QUEUE + +#if defined(GRAN) +# error FixME +#elif defined(PAR) +static inline void +unblockCount ( StgBlockingQueueElement *bqe, StgClosure *node ) +{ + /* write RESUME events to log file and + update blocked and fetch time (depending on type of the orig closure) */ + if (RtsFlags.ParFlags.ParStats.Full) { + DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC, + GR_RESUMEQ, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure, + 0, 0 /* spark_queue_len(ADVISORY_POOL) */); + + switch (get_itbl(node)->type) { + case FETCH_ME_BQ: + ((StgTSO *)bqe)->par.fetchtime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat; + break; + case RBH: + case FETCH_ME: + case BLACKHOLE_BQ: + ((StgTSO *)bqe)->par.blocktime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat; + break; + default: + barf("{unblockOneLocked}Daq Qagh: unexpected closure in blocking queue"); + } + } +} +#endif + +#if defined(GRAN) +# error FixME +#elif defined(PAR) +static StgBlockingQueueElement * +unblockOneLocked(StgBlockingQueueElement *bqe, StgClosure *node) +{ + StgBlockingQueueElement *next; + + switch (get_itbl(bqe)->type) { + case TSO: + ASSERT(((StgTSO *)bqe)->why_blocked != NotBlocked); + /* if it's a TSO just push it onto the run_queue */ + next = bqe->link; + // ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging? + PUSH_ON_RUN_QUEUE((StgTSO *)bqe); + THREAD_RUNNABLE(); + unblockCount(bqe, node); + /* reset blocking status after dumping event */ + ((StgTSO *)bqe)->why_blocked = NotBlocked; + break; + + case BLOCKED_FETCH: + /* if it's a BLOCKED_FETCH put it on the PendingFetches list */ + next = bqe->link; + bqe->link = PendingFetches; + PendingFetches = bqe; + break; +# if defined(DEBUG) + /* can ignore this case in a non-debugging setup; + see comments on RBHSave closures above */ + case CONSTR: + /* check that the closure is an RBHSave closure */ + ASSERT(get_itbl((StgClosure *)bqe) == &RBH_Save_0_info || + get_itbl((StgClosure *)bqe) == &RBH_Save_1_info || + get_itbl((StgClosure *)bqe) == &RBH_Save_2_info); + break; + + default: + barf("{unblockOneLocked}Daq Qagh: Unexpected IP (%#lx; %s) in blocking queue at %#lx\n", + get_itbl((StgClosure *)bqe), info_type((StgClosure *)bqe), + (StgClosure *)bqe); +# endif + } + // IF_DEBUG(scheduler,sched_belch("waking up thread %ld", tso->id)); + return next; +} + +#else /* !GRAN && !PAR */ static StgTSO * unblockOneLocked(StgTSO *tso) { @@ -1223,7 +1811,20 @@ unblockOneLocked(StgTSO *tso) IF_DEBUG(scheduler,sched_belch("waking up thread %ld", tso->id)); return next; } +#endif +#if defined(GRAN) +# error FixME +#elif defined(PAR) +inline StgTSO * +unblockOne(StgTSO *tso, StgClosure *node) +{ + ACQUIRE_LOCK(&sched_mutex); + tso = unblockOneLocked(tso, node); + RELEASE_LOCK(&sched_mutex); + return tso; +} +#else inline StgTSO * unblockOne(StgTSO *tso) { @@ -1232,7 +1833,35 @@ unblockOne(StgTSO *tso) RELEASE_LOCK(&sched_mutex); return tso; } +#endif +#if defined(GRAN) +# error FixME +#elif defined(PAR) +void +awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node) +{ + StgBlockingQueueElement *bqe, *next; + + ACQUIRE_LOCK(&sched_mutex); + + IF_PAR_DEBUG(verbose, + belch("## AwBQ for node %p on [%x]: ", + node, mytid)); + + ASSERT(get_itbl(q)->type == TSO || + get_itbl(q)->type == BLOCKED_FETCH || + get_itbl(q)->type == CONSTR); + + bqe = q; + while (get_itbl(bqe)->type==TSO || + get_itbl(bqe)->type==BLOCKED_FETCH) { + bqe = unblockOneLocked(bqe, node); + } + RELEASE_LOCK(&sched_mutex); +} + +#else /* !GRAN && !PAR */ void awakenBlockedQueue(StgTSO *tso) { @@ -1242,11 +1871,275 @@ awakenBlockedQueue(StgTSO *tso) } RELEASE_LOCK(&sched_mutex); } +#endif -/* ----------------------------------------------------------------------------- +#if 0 +// ngoq ngo' + +#if defined(GRAN) +/* + Awakening a blocking queue in GranSim means checking for each of the + TSOs in the queue whether they are local or not, issuing a ResumeThread + or an UnblockThread event, respectively. The basic iteration over the + blocking queue is the same as in the standard setup. +*/ +void +awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node) +{ + StgBlockingQueueElement *bqe, *next; + StgTSO *tso; + PEs node_loc, tso_loc; + rtsTime bq_processing_time = 0; + nat len = 0, len_local = 0; + + IF_GRAN_DEBUG(bq, + belch("## AwBQ for node %p on PE %d @ %ld by TSO %d (%p): ", \ + node, CurrentProc, CurrentTime[CurrentProc], + CurrentTSO->id, CurrentTSO)); + + node_loc = where_is(node); + + ASSERT(get_itbl(q)->type == TSO || // q is either a TSO or an RBHSave + get_itbl(q)->type == CONSTR); // closure (type constructor) + ASSERT(is_unique(node)); + + /* FAKE FETCH: magically copy the node to the tso's proc; + no Fetch necessary because in reality the node should not have been + moved to the other PE in the first place + */ + if (CurrentProc!=node_loc) { + IF_GRAN_DEBUG(bq, + belch("## node %p is on PE %d but CurrentProc is %d (TSO %d); assuming fake fetch and adjusting bitmask (old: %#x)", + node, node_loc, CurrentProc, CurrentTSO->id, + // CurrentTSO, where_is(CurrentTSO), + node->header.gran.procs)); + node->header.gran.procs = (node->header.gran.procs) | PE_NUMBER(CurrentProc); + IF_GRAN_DEBUG(bq, + belch("## new bitmask of node %p is %#x", + node, node->header.gran.procs)); + if (RtsFlags.GranFlags.GranSimStats.Global) { + globalGranStats.tot_fake_fetches++; + } + } + + next = q; + // ToDo: check: ASSERT(CurrentProc==node_loc); + while (get_itbl(next)->type==TSO) { // q != END_TSO_QUEUE) { + bqe = next; + next = bqe->link; + /* + bqe points to the current element in the queue + next points to the next element in the queue + */ + tso = (StgTSO *)bqe; // wastes an assignment to get the type right + tso_loc = where_is(tso); + if (IS_LOCAL_TO(PROCS(node),tso_loc)) { // TSO is local + /* !fake_fetch => TSO is on CurrentProc is same as IS_LOCAL_TO */ + ASSERT(CurrentProc!=node_loc || tso_loc==CurrentProc); + bq_processing_time += RtsFlags.GranFlags.Costs.lunblocktime; + // insertThread(tso, node_loc); + new_event(tso_loc, tso_loc, + CurrentTime[CurrentProc]+bq_processing_time, + ResumeThread, + tso, node, (rtsSpark*)NULL); + tso->link = END_TSO_QUEUE; // overwrite link just to be sure + len_local++; + len++; + } else { // TSO is remote (actually should be FMBQ) + bq_processing_time += RtsFlags.GranFlags.Costs.mpacktime; + bq_processing_time += RtsFlags.GranFlags.Costs.gunblocktime; + new_event(tso_loc, CurrentProc, + CurrentTime[CurrentProc]+bq_processing_time+ + RtsFlags.GranFlags.Costs.latency, + UnblockThread, + tso, node, (rtsSpark*)NULL); + tso->link = END_TSO_QUEUE; // overwrite link just to be sure + bq_processing_time += RtsFlags.GranFlags.Costs.mtidytime; + len++; + } + /* the thread-queue-overhead is accounted for in either Resume or UnblockThread */ + IF_GRAN_DEBUG(bq, + fprintf(stderr," %s TSO %d (%p) [PE %d] (blocked_on=%p) (next=%p) ,", + (node_loc==tso_loc ? "Local" : "Global"), + tso->id, tso, CurrentProc, tso->block_info.closure, tso->link)) + tso->block_info.closure = NULL; + IF_DEBUG(scheduler,belch("-- Waking up thread %ld (%p)", + tso->id, tso)); + } + + /* if this is the BQ of an RBH, we have to put back the info ripped out of + the closure to make room for the anchor of the BQ */ + if (next!=END_BQ_QUEUE) { + ASSERT(get_itbl(node)->type == RBH && get_itbl(next)->type == CONSTR); + /* + ASSERT((info_ptr==&RBH_Save_0_info) || + (info_ptr==&RBH_Save_1_info) || + (info_ptr==&RBH_Save_2_info)); + */ + /* cf. convertToRBH in RBH.c for writing the RBHSave closure */ + ((StgRBH *)node)->blocking_queue = ((StgRBHSave *)next)->payload[0]; + ((StgRBH *)node)->mut_link = ((StgRBHSave *)next)->payload[1]; + + IF_GRAN_DEBUG(bq, + belch("## Filled in RBH_Save for %p (%s) at end of AwBQ", + node, info_type(node))); + } + + /* statistics gathering */ + if (RtsFlags.GranFlags.GranSimStats.Global) { + globalGranStats.tot_bq_processing_time += bq_processing_time; + globalGranStats.tot_bq_len += len; // total length of all bqs awakened + globalGranStats.tot_bq_len_local += len_local; // same for local TSOs only + globalGranStats.tot_awbq++; // total no. of bqs awakened + } + IF_GRAN_DEBUG(bq, + fprintf(stderr,"## BQ Stats of %p: [%d entries, %d local] %s\n", + node, len, len_local, (next!=END_TSO_QUEUE) ? "RBH" : "")); +} + +#elif defined(PAR) + +/* + Awakening a blocking queue in GUM has to check whether an entry in the + queue is a normal TSO or a BLOCKED_FETCH. The later indicates that a TSO is + waiting for the result of this computation on another PE. Thus, when + finding a BLOCKED_FETCH we have to send off a message to that PE. + Actually, we defer sending off a message, by just putting the BLOCKED_FETCH + onto the PendingFetches queue, which will be later traversed by + processFetches, sending off a RESUME message for each BLOCKED_FETCH. + + NB: There is no check for an RBHSave closure (type CONSTR) in the code + below. The reason is, if we awaken the BQ of an RBH closure (RBHSaves + only exist at the end of such BQs) we know that the closure has been + unpacked successfully on the other PE, and we can discard the info + contained in the RBHSave closure. The current closure will be turned + into a FetchMe closure anyway. +*/ +void +awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node) +{ + StgBlockingQueueElement *bqe, *next; + + IF_PAR_DEBUG(verbose, + belch("## AwBQ for node %p on [%x]: ", + node, mytid)); + + ASSERT(get_itbl(q)->type == TSO || + get_itbl(q)->type == BLOCKED_FETCH || + get_itbl(q)->type == CONSTR); + + next = q; + while (get_itbl(next)->type==TSO || + get_itbl(next)->type==BLOCKED_FETCH) { + bqe = next; + switch (get_itbl(bqe)->type) { + case TSO: + /* if it's a TSO just push it onto the run_queue */ + next = bqe->link; +#if defined(DEBUG) + ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging only +#endif + push_on_run_queue((StgTSO *)bqe); // HWL: was: PUSH_ON_RUN_QUEUE(tso); + + /* write RESUME events to log file and + update blocked and fetch time (depending on type of the orig closure) */ + if (RtsFlags.ParFlags.ParStats.Full) { + DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC, + GR_RESUMEQ, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure, + 0, spark_queue_len(ADVISORY_POOL)); + + switch (get_itbl(node)->type) { + case FETCH_ME_BQ: + ((StgTSO *)bqe)->par.fetchtime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat; + break; + case RBH: + case FETCH_ME: + case BLACKHOLE_BQ: + ((StgTSO *)bqe)->par.blocktime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat; + break; + default: + barf("{awaken_blocked_queue}Daq Qagh: unexpected closure %p (%s) with blocking queue", + node, info_type(node)); + } + } + /* reset block_info.closure field after dumping event */ + ((StgTSO *)bqe)->block_info.closure = NULL; + + /* rest of this branch is debugging only */ + IF_PAR_DEBUG(verbose, + fprintf(stderr," TSO %d (%p) [PE %lx] (block_info.closure=%p) (next=%p) ,", + ((StgTSO *)bqe)->id, (StgTSO *)bqe, + mytid, ((StgTSO *)bqe)->block_info.closure, ((StgTSO *)bqe)->link)); + + IF_DEBUG(scheduler, + if (!RtsFlags.ParFlags.Debug.verbose) + belch("-- Waking up thread %ld (%p)", + ((StgTSO *)bqe)->id, (StgTSO *)bqe)); + break; + + case BLOCKED_FETCH: + /* if it's a BLOCKED_FETCH put it on the PendingFetches list */ + next = bqe->link; + bqe->link = PendingFetches; + PendingFetches = bqe; + // bqe.tso->block_info.closure = NULL; + + /* rest of this branch is debugging only */ + IF_PAR_DEBUG(verbose, + fprintf(stderr," BLOCKED_FETCH (%p) on node %p [PE %lx] (next=%p) ,", + ((StgBlockedFetch *)bqe), + ((StgBlockedFetch *)bqe)->node, + mytid, ((StgBlockedFetch *)bqe)->link)); + break; + +# if defined(DEBUG) + /* can ignore this case in a non-debugging setup; + see comments on RBHSave closures above */ + case CONSTR: + /* check that the closure is an RBHSave closure */ + ASSERT(get_itbl((StgClosure *)bqe) == &RBH_Save_0_info || + get_itbl((StgClosure *)bqe) == &RBH_Save_1_info || + get_itbl((StgClosure *)bqe) == &RBH_Save_2_info); + break; + + default: + barf("{awaken_blocked_queue}Daq Qagh: Unexpected IP (%#lx; %s) in blocking queue at %#lx\n", + get_itbl((StgClosure *)bqe), info_type((StgClosure *)bqe), + (StgClosure *)bqe); +# endif + } + } +} + +#else /* !GRAN && !PAR */ + +void +awaken_blocked_queue(StgTSO *q) { awakenBlockedQueue(q); } + +/* +{ + StgTSO *tso; + + while (q != END_TSO_QUEUE) { + ASSERT(get_itbl(q)->type == TSO); + tso = q; + q = tso->link; + push_on_run_queue(tso); // HWL: was: PUSH_ON_RUN_QUEUE(tso); + //tso->block_info.closure = NULL; + IF_DEBUG(scheduler, belch("-- Waking up thread %ld (%p)", tso->id, tso)); + } +} +*/ +#endif /* GRAN */ +#endif /* 0 */ + +//@node Exception Handling Routines, Debugging Routines, Blocking Queue Routines, Main scheduling code +//@subsection Exception Handling Routines + +/* --------------------------------------------------------------------------- Interrupt execution - usually called inside a signal handler so it mustn't do anything fancy. - -------------------------------------------------------------------------- */ + ------------------------------------------------------------------------ */ void interruptStgRts(void) @@ -1260,6 +2153,7 @@ interruptStgRts(void) This is for use when we raise an exception in another thread, which may be blocked. + This has nothing to do with the UnblockThread event in GranSim. -- HWL -------------------------------------------------------------------------- */ static void @@ -1593,11 +2487,202 @@ raiseAsync(StgTSO *tso, StgClosure *exception) barf("raiseAsync"); } +//@node Debugging Routines, Index, Exception Handling Routines, Main scheduling code +//@subsection Debugging Routines + /* ----------------------------------------------------------------------------- - Debuggery... + Debugging: why is a thread blocked -------------------------------------------------------------------------- */ #ifdef DEBUG + +void printThreadBlockage(StgTSO *tso) +{ + switch (tso->why_blocked) { + case BlockedOnRead: + fprintf(stderr,"blocked on read from fd %d", tso->block_info.fd); + break; + case BlockedOnWrite: + fprintf(stderr,"blocked on write to fd %d", tso->block_info.fd); + break; + case BlockedOnDelay: + fprintf(stderr,"blocked on delay of %d ms", tso->block_info.delay); + break; + case BlockedOnMVar: + fprintf(stderr,"blocked on an MVar"); + break; + case BlockedOnException: + fprintf(stderr,"blocked on delivering an exception to thread %d", + tso->block_info.tso->id); + break; + case BlockedOnBlackHole: + fprintf(stderr,"blocked on a black hole"); + break; + case NotBlocked: + fprintf(stderr,"not blocked"); + break; +#if defined(PAR) + case BlockedOnGA: + fprintf(stderr,"blocked on global address"); + break; +#endif + } +} + +/* + Print a whole blocking queue attached to node (debugging only). +*/ +//@cindex print_bq +# if defined(PAR) +void +print_bq (StgClosure *node) +{ + StgBlockingQueueElement *bqe; + StgTSO *tso; + rtsBool end; + + fprintf(stderr,"## BQ of closure %p (%s): ", + node, info_type(node)); + + /* should cover all closures that may have a blocking queue */ + ASSERT(get_itbl(node)->type == BLACKHOLE_BQ || + get_itbl(node)->type == FETCH_ME_BQ || + get_itbl(node)->type == RBH); + + ASSERT(node!=(StgClosure*)NULL); // sanity check + /* + NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure; + */ + for (bqe = ((StgBlockingQueue*)node)->blocking_queue, end = (bqe==END_BQ_QUEUE); + !end; // iterate until bqe points to a CONSTR + end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE), bqe = end ? END_BQ_QUEUE : bqe->link) { + ASSERT(bqe != END_BQ_QUEUE); // sanity check + ASSERT(bqe != (StgTSO*)NULL); // sanity check + /* types of closures that may appear in a blocking queue */ + ASSERT(get_itbl(bqe)->type == TSO || + get_itbl(bqe)->type == BLOCKED_FETCH || + get_itbl(bqe)->type == CONSTR); + /* only BQs of an RBH end with an RBH_Save closure */ + ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH); + + switch (get_itbl(bqe)->type) { + case TSO: + fprintf(stderr," TSO %d (%x),", + ((StgTSO *)bqe)->id, ((StgTSO *)bqe)); + break; + case BLOCKED_FETCH: + fprintf(stderr," BF (node=%p, ga=((%x, %d, %x)),", + ((StgBlockedFetch *)bqe)->node, + ((StgBlockedFetch *)bqe)->ga.payload.gc.gtid, + ((StgBlockedFetch *)bqe)->ga.payload.gc.slot, + ((StgBlockedFetch *)bqe)->ga.weight); + break; + case CONSTR: + fprintf(stderr," %s (IP %p),", + (get_itbl(bqe) == &RBH_Save_0_info ? "RBH_Save_0" : + get_itbl(bqe) == &RBH_Save_1_info ? "RBH_Save_1" : + get_itbl(bqe) == &RBH_Save_2_info ? "RBH_Save_2" : + "RBH_Save_?"), get_itbl(bqe)); + break; + default: + barf("Unexpected closure type %s in blocking queue of %p (%s)", + info_type(bqe), node, info_type(node)); + break; + } + } /* for */ + fputc('\n', stderr); +} +# elif defined(GRAN) +void +print_bq (StgClosure *node) +{ + StgBlockingQueueElement *bqe; + StgTSO *tso; + PEs node_loc, tso_loc; + rtsBool end; + + /* should cover all closures that may have a blocking queue */ + ASSERT(get_itbl(node)->type == BLACKHOLE_BQ || + get_itbl(node)->type == FETCH_ME_BQ || + get_itbl(node)->type == RBH); + + ASSERT(node!=(StgClosure*)NULL); // sanity check + node_loc = where_is(node); + + fprintf(stderr,"## BQ of closure %p (%s) on [PE %d]: ", + node, info_type(node), node_loc); + + /* + NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure; + */ + for (bqe = ((StgBlockingQueue*)node)->blocking_queue, end = (bqe==END_BQ_QUEUE); + !end; // iterate until bqe points to a CONSTR + end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE), bqe = end ? END_BQ_QUEUE : bqe->link) { + ASSERT(bqe != END_BQ_QUEUE); // sanity check + ASSERT(bqe != (StgTSO*)NULL); // sanity check + /* types of closures that may appear in a blocking queue */ + ASSERT(get_itbl(bqe)->type == TSO || + get_itbl(bqe)->type == CONSTR); + /* only BQs of an RBH end with an RBH_Save closure */ + ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH); + + tso_loc = where_is((StgClosure *)bqe); + switch (get_itbl(bqe)->type) { + case TSO: + fprintf(stderr," TSO %d (%x) on [PE %d],", + ((StgTSO *)bqe)->id, ((StgTSO *)bqe), tso_loc); + break; + case CONSTR: + fprintf(stderr," %s (IP %p),", + (get_itbl(bqe) == &RBH_Save_0_info ? "RBH_Save_0" : + get_itbl(bqe) == &RBH_Save_1_info ? "RBH_Save_1" : + get_itbl(bqe) == &RBH_Save_2_info ? "RBH_Save_2" : + "RBH_Save_?"), get_itbl(bqe)); + break; + default: + barf("Unexpected closure type %s in blocking queue of %p (%s)", + info_type(bqe), node, info_type(node)); + break; + } + } /* for */ + fputc('\n', stderr); +} +#else +/* + Nice and easy: only TSOs on the blocking queue +*/ +void +print_bq (StgClosure *node) +{ + StgTSO *tso; + + ASSERT(node!=(StgClosure*)NULL); // sanity check + for (tso = ((StgBlockingQueue*)node)->blocking_queue; + tso != END_TSO_QUEUE; + tso=tso->link) { + ASSERT(tso!=(StgTSO*)NULL && tso!=END_TSO_QUEUE); // sanity check + ASSERT(get_itbl(tso)->type == TSO); // guess what, sanity check + fprintf(stderr," TSO %d (%x),", tso->id, tso); + } + fputc('\n', stderr); +} +# endif + +/* A debugging function used all over the place in GranSim and GUM. + Dummy function in other setups. +*/ +# if !defined(GRAN) && !defined(PAR) +char * +info_type(StgClosure *closure){ + return "petaQ"; +} + +char * +info_type_by_ip(StgInfoTable *ip){ + return "petaQ"; +} +#endif + static void sched_belch(char *s, ...) { @@ -1611,4 +2696,33 @@ sched_belch(char *s, ...) vfprintf(stderr, s, ap); fprintf(stderr, "\n"); } -#endif + +#endif /* DEBUG */ + +//@node Index, , Debugging Routines, Main scheduling code +//@subsection Index + +//@index +//* MainRegTable:: @cindex\s-+MainRegTable +//* StgMainThread:: @cindex\s-+StgMainThread +//* awaken_blocked_queue:: @cindex\s-+awaken_blocked_queue +//* blocked_queue_hd:: @cindex\s-+blocked_queue_hd +//* blocked_queue_tl:: @cindex\s-+blocked_queue_tl +//* context_switch:: @cindex\s-+context_switch +//* createThread:: @cindex\s-+createThread +//* free_capabilities:: @cindex\s-+free_capabilities +//* gc_pending_cond:: @cindex\s-+gc_pending_cond +//* initScheduler:: @cindex\s-+initScheduler +//* interrupted:: @cindex\s-+interrupted +//* n_free_capabilities:: @cindex\s-+n_free_capabilities +//* next_thread_id:: @cindex\s-+next_thread_id +//* print_bq:: @cindex\s-+print_bq +//* run_queue_hd:: @cindex\s-+run_queue_hd +//* run_queue_tl:: @cindex\s-+run_queue_tl +//* sched_mutex:: @cindex\s-+sched_mutex +//* schedule:: @cindex\s-+schedule +//* take_off_run_queue:: @cindex\s-+take_off_run_queue +//* task_ids:: @cindex\s-+task_ids +//* term_mutex:: @cindex\s-+term_mutex +//* thread_ready_cond:: @cindex\s-+thread_ready_cond +//@end index diff --git a/ghc/rts/Schedule.h b/ghc/rts/Schedule.h index f559efc..1c93099 100644 --- a/ghc/rts/Schedule.h +++ b/ghc/rts/Schedule.h @@ -1,13 +1,26 @@ /* ----------------------------------------------------------------------------- - * $Id: Schedule.h,v 1.12 2000/01/12 15:15:18 simonmar Exp $ + * $Id: Schedule.h,v 1.13 2000/01/13 14:34:05 hwloidl Exp $ * * (c) The GHC Team 1998-1999 * * Prototypes for functions in Schedule.c * (RTS internal scheduler interface) * - * ---------------------------------------------------------------------------*/ + * -------------------------------------------------------------------------*/ +//@menu +//* Scheduler Functions:: +//* Scheduler Vars and Data Types:: +//* Some convenient macros:: +//* Index:: +//@end menu + +//@node Scheduler Functions, Scheduler Vars and Data Types +//@subsection Scheduler Functions + +//@cindex initScheduler +//@cindex exitScheduler +//@cindex startTasks /* initScheduler(), exitScheduler(), startTasks() * * Called from STG : no @@ -19,6 +32,7 @@ void exitScheduler( void ); void startTasks( void ); #endif +//@cindex awakenBlockedQueue /* awakenBlockedQueue() * * Takes a pointer to the beginning of a blocked TSO queue, and @@ -27,8 +41,15 @@ void startTasks( void ); * Called from STG : yes * Locks assumed : none */ +#if defined(GRAN) +# error FixME +#elif defined(PAR) +void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node); +#else void awakenBlockedQueue(StgTSO *tso); +#endif +//@cindex unblockOne /* unblockOne() * * Takes a pointer to the beginning of a blocked TSO queue, and @@ -37,8 +58,15 @@ void awakenBlockedQueue(StgTSO *tso); * Called from STG : yes * Locks assumed : none */ +#if defined(GRAN) +# error FixME +#elif defined(PAR) +StgTSO *unblockOne(StgTSO *tso, StgClosure *node); +#else StgTSO *unblockOne(StgTSO *tso); +#endif +//@cindex raiseAsync /* raiseAsync() * * Raises an exception asynchronously in the specified thread. @@ -48,6 +76,7 @@ StgTSO *unblockOne(StgTSO *tso); */ void raiseAsync(StgTSO *tso, StgClosure *exception); +//@cindex awaitEvent /* awaitEvent() * * Raises an exception asynchronously in the specified thread. @@ -57,6 +86,33 @@ void raiseAsync(StgTSO *tso, StgClosure *exception); */ void awaitEvent(rtsBool wait); /* In Select.c */ +// ToDo: check whether all fcts below are used in the SMP version, too +//@cindex awaken_blocked_queue +#if defined(GRAN) +void awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node); +void unlink_from_bq(StgTSO* tso, StgClosure* node); +void initThread(StgTSO *tso, nat stack_size, StgInt pri); +#elif defined(PAR) +nat run_queue_len(void); +void awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node); +void initThread(StgTSO *tso, nat stack_size); +#else +char *info_type(StgClosure *closure); // dummy +char *info_type_by_ip(StgInfoTable *ip); // dummy +void awaken_blocked_queue(StgTSO *q); +void initThread(StgTSO *tso, nat stack_size); +#endif + +// debugging only +#ifdef DEBUG +extern void printThreadBlockage(StgTSO *tso); +#endif +void print_bq (StgClosure *node); + +//@node Scheduler Vars and Data Types, Some convenient macros, Scheduler Functions +//@subsection Scheduler Vars and Data Types + +//@cindex context_switch /* Context switch flag. * Locks required : sched_mutex */ @@ -65,6 +121,7 @@ extern rtsBool interrupted; extern nat ticks_since_select; +//@cindex Capability /* Capability type */ typedef StgRegTable Capability; @@ -85,16 +142,16 @@ extern Capability MainRegTable; extern StgTSO *run_queue_hd, *run_queue_tl; extern StgTSO *blocked_queue_hd, *blocked_queue_tl; -#ifdef DEBUG -extern void printThreadBlockage(StgTSO *tso); -#endif - #ifdef SMP +//@cindex sched_mutex +//@cindex thread_ready_cond +//@cindex gc_pending_cond extern pthread_mutex_t sched_mutex; extern pthread_cond_t thread_ready_cond; extern pthread_cond_t gc_pending_cond; #endif +//@cindex task_info #ifdef SMP typedef struct { pthread_t id; @@ -108,9 +165,19 @@ typedef struct { extern task_info *task_ids; #endif +#if !defined(GRAN) +extern StgTSO *run_queue_hd, *run_queue_tl; +extern StgTSO *blocked_queue_hd, *blocked_queue_tl; +#endif + /* Needed by Hugs. */ void interruptStgRts ( void ); +// ?? needed -- HWL +void raiseAsync(StgTSO *tso, StgClosure *exception); + +//@node Some convenient macros, Index, Scheduler Vars and Data Types +//@subsection Some convenient macros /* ----------------------------------------------------------------------------- * Some convenient macros... @@ -119,6 +186,7 @@ void interruptStgRts ( void ); #define END_TSO_QUEUE ((StgTSO *)(void*)&END_TSO_QUEUE_closure) #define END_CAF_LIST ((StgCAF *)(void*)&END_TSO_QUEUE_closure) +//@cindex APPEND_TO_RUN_QUEUE /* Add a thread to the end of the run queue. * NOTE: tso->link should be END_TSO_QUEUE before calling this macro. */ @@ -131,6 +199,7 @@ void interruptStgRts ( void ); } \ run_queue_tl = tso; +//@cindex PUSH_ON_RUN_QUEUE /* Push a thread on the beginning of the run queue. Used for * newly awakened threads, so they get run as soon as possible. */ @@ -140,7 +209,8 @@ void interruptStgRts ( void ); if (run_queue_tl == END_TSO_QUEUE) { \ run_queue_tl = tso; \ } - + +//@cindex POP_RUN_QUEUE /* Pop the first thread off the runnable queue. */ #define POP_RUN_QUEUE() \ @@ -155,6 +225,7 @@ void interruptStgRts ( void ); t; \ }) +//@cindex APPEND_TO_BLOCKED_QUEUE /* Add a thread to the end of the blocked queue. */ #define APPEND_TO_BLOCKED_QUEUE(tso) \ @@ -166,6 +237,7 @@ void interruptStgRts ( void ); } \ blocked_queue_tl = tso; +//@cindex THREAD_RUNNABLE /* Signal that a runnable thread has become available, in * case there are any waiting tasks to execute it. */ @@ -179,3 +251,27 @@ void interruptStgRts ( void ); #define THREAD_RUNNABLE() /* nothing */ #endif +//@node Index, , Some convenient macros +//@subsection Index + +//@index +//* APPEND_TO_BLOCKED_QUEUE:: @cindex\s-+APPEND_TO_BLOCKED_QUEUE +//* APPEND_TO_RUN_QUEUE:: @cindex\s-+APPEND_TO_RUN_QUEUE +//* Capability:: @cindex\s-+Capability +//* POP_RUN_QUEUE :: @cindex\s-+POP_RUN_QUEUE +//* PUSH_ON_RUN_QUEUE:: @cindex\s-+PUSH_ON_RUN_QUEUE +//* THREAD_RUNNABLE:: @cindex\s-+THREAD_RUNNABLE +//* awaitEvent:: @cindex\s-+awaitEvent +//* awakenBlockedQueue:: @cindex\s-+awakenBlockedQueue +//* awaken_blocked_queue:: @cindex\s-+awaken_blocked_queue +//* context_switch:: @cindex\s-+context_switch +//* exitScheduler:: @cindex\s-+exitScheduler +//* gc_pending_cond:: @cindex\s-+gc_pending_cond +//* initScheduler:: @cindex\s-+initScheduler +//* raiseAsync:: @cindex\s-+raiseAsync +//* sched_mutex:: @cindex\s-+sched_mutex +//* startTasks:: @cindex\s-+startTasks +//* task_info:: @cindex\s-+task_info +//* thread_ready_cond:: @cindex\s-+thread_ready_cond +//* unblockOne:: @cindex\s-+unblockOne +//@end index diff --git a/ghc/rts/StgMiscClosures.hc b/ghc/rts/StgMiscClosures.hc index 8c436d8..4809be7 100644 --- a/ghc/rts/StgMiscClosures.hc +++ b/ghc/rts/StgMiscClosures.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: StgMiscClosures.hc,v 1.30 1999/11/30 11:43:26 simonmar Exp $ + * $Id: StgMiscClosures.hc,v 1.31 2000/01/13 14:34:05 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -9,12 +9,17 @@ #include "Rts.h" #include "RtsUtils.h" +#include "RtsFlags.h" #include "StgMiscClosures.h" #include "HeapStackCheck.h" /* for stg_gen_yield */ #include "Storage.h" #include "StoragePriv.h" #include "ProfRts.h" #include "SMP.h" +#if defined(GRAN) || defined(PAR) +# include "GranSimRts.h" /* for DumpRawGranEvent */ +# include "StgRun.h" /* for StgReturn and register saving */ +#endif #ifdef HAVE_STDIO_H #include @@ -25,6 +30,20 @@ */ #define DUMP_ERRMSG(msg) STGCALL2(fprintf,stderr,msg) +/* + Template for the entry code of non-enterable closures. +*/ + +#define NON_ENTERABLE_ENTRY_CODE(type) \ +STGFUN(type##_entry) \ +{ \ + FB_ \ + DUMP_ERRMSG(#type " object entered!\n"); \ + STGCALL1(raiseError, errorHandler); \ + stg_exit(EXIT_FAILURE); /* not executed */ \ + FE_ \ +} + /* ----------------------------------------------------------------------------- Entry code for an indirection. @@ -185,6 +204,11 @@ INFO_TABLE(BLACKHOLE_info, BLACKHOLE_entry,0,2,BLACKHOLE,,EF_,0,0); STGFUN(BLACKHOLE_entry) { FB_ +#if defined(GRAN) + /* Before overwriting TSO_LINK */ + STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/); +#endif + #ifdef SMP CMPXCHG(R1.cl->header.info, &BLACKHOLE_info, &WHITEHOLE_info); #endif @@ -192,15 +216,43 @@ STGFUN(BLACKHOLE_entry) TICK_ENT_BH(); /* Put ourselves on the blocking queue for this black hole */ - CurrentTSO->link = (StgTSO *)&END_TSO_QUEUE_closure; +#if defined(GRAN) || defined(PAR) + /* in fact, only difference is the type of the end-of-queue marker! */ + CurrentTSO->link = END_BQ_QUEUE; + ((StgBlockingQueue *)R1.p)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO; +#else + CurrentTSO->link = END_TSO_QUEUE; ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO; +#endif + /* jot down why and on what closure we are blocked */ CurrentTSO->why_blocked = BlockedOnBlackHole; CurrentTSO->block_info.closure = R1.cl; + /* closure is mutable since something has just been added to its BQ */ recordMutable((StgMutClosure *)R1.cl); /* Change the BLACKHOLE into a BLACKHOLE_BQ */ ((StgBlockingQueue *)R1.p)->header.info = &BLACKHOLE_BQ_info; + +#if defined(PAR) + /* Save the Thread State here, before calling RTS routines below! */ + SAVE_THREAD_STATE(1); + + /* if collecting stats update the execution time etc */ + if (RtsFlags.ParFlags.ParStats.Full) { + /* Note that CURRENT_TIME may perform an unsafe call */ + //rtsTime now = CURRENT_TIME; /* Now */ + CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; + CurrentTSO->par.blockcount++; + CurrentTSO->par.blockedat = CURRENT_TIME; + DumpRawGranEvent(CURRENT_PROC, thisPE, + GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0); + } + + THREAD_RETURN(1); /* back to the scheduler */ +#else /* stg_gen_block is too heavyweight, use a specialised one */ BLOCK_NP(1); +#endif + FE_ } @@ -208,6 +260,11 @@ INFO_TABLE(BLACKHOLE_BQ_info, BLACKHOLE_BQ_entry,1,1,BLACKHOLE_BQ,,EF_,0,0); STGFUN(BLACKHOLE_BQ_entry) { FB_ +#if defined(GRAN) + /* Before overwriting TSO_LINK */ + STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/); +#endif + #ifdef SMP CMPXCHG(R1.cl->header.info, &BLACKHOLE_BQ_info, &WHITEHOLE_info); #endif @@ -215,42 +272,156 @@ STGFUN(BLACKHOLE_BQ_entry) TICK_ENT_BH(); /* Put ourselves on the blocking queue for this black hole */ - CurrentTSO->why_blocked = BlockedOnBlackHole; - CurrentTSO->block_info.closure = R1.cl; CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue; ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO; + /* jot down why and on what closure we are blocked */ + CurrentTSO->why_blocked = BlockedOnBlackHole; + CurrentTSO->block_info.closure = R1.cl; #ifdef SMP ((StgBlockingQueue *)R1.p)->header.info = &BLACKHOLE_BQ_info; #endif +#if defined(PAR) + /* Save the Thread State here, before calling RTS routines below! */ + SAVE_THREAD_STATE(1); + + /* if collecting stats update the execution time etc */ + if (RtsFlags.ParFlags.ParStats.Full) { + /* Note that CURRENT_TIME may perform an unsafe call */ + //rtsTime now = CURRENT_TIME; /* Now */ + CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; + CurrentTSO->par.blockcount++; + CurrentTSO->par.blockedat = CURRENT_TIME; + DumpRawGranEvent(CURRENT_PROC, thisPE, + GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0); + } + + THREAD_RETURN(1); /* back to the scheduler */ +#else /* stg_gen_block is too heavyweight, use a specialised one */ BLOCK_NP(1); +#endif + FE_ +} + +/* + Revertible black holes are needed in the parallel world, to handle + negative acknowledgements of messages containing updatable closures. + The idea is that when the original message is transmitted, the closure + is turned into a revertible black hole...an object which acts like a + black hole when local threads try to enter it, but which can be reverted + back to the original closure if necessary. + + It's actually a lot like a blocking queue (BQ) entry, because revertible + black holes are initially set up with an empty blocking queue. +*/ + +#if defined(PAR) || defined(GRAN) + +INFO_TABLE(RBH_info, RBH_entry,1,1,RBH,,EF_,0,0); +STGFUN(RBH_entry) +{ + FB_ +# if defined(GRAN) + /* mainly statistics gathering for GranSim simulation */ + STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/); +# endif + + /* exactly the same as a BLACKHOLE_BQ_entry -- HWL */ + /* Put ourselves on the blocking queue for this black hole */ + CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue; + ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO; + /* jot down why and on what closure we are blocked */ + CurrentTSO->why_blocked = BlockedOnBlackHole; + CurrentTSO->block_info.closure = R1.cl; + +#if defined(PAR) + /* Save the Thread State here, before calling RTS routines below! */ + SAVE_THREAD_STATE(1); + + /* if collecting stats update the execution time etc */ + if (RtsFlags.ParFlags.ParStats.Full) { + /* Note that CURRENT_TIME may perform an unsafe call */ + //rtsTime now = CURRENT_TIME; /* Now */ + CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; + CurrentTSO->par.blockcount++; + CurrentTSO->par.blockedat = CURRENT_TIME; + DumpRawGranEvent(CURRENT_PROC, thisPE, + GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0); + } + + THREAD_RETURN(1); /* back to the scheduler */ +#else + /* saves thread state and leaves thread in ThreadEnterGHC state; */ + /* stg_gen_block is too heavyweight, use a specialised one */ + BLOCK_NP(1); +#endif + FE_ } +INFO_TABLE(RBH_Save_0_info, RBH_Save_0_entry,0,2,CONSTR,,EF_,0,0); +NON_ENTERABLE_ENTRY_CODE(RBH_Save_0); + +INFO_TABLE(RBH_Save_1_info, RBH_Save_1_entry,1,1,CONSTR,,EF_,0,0); +NON_ENTERABLE_ENTRY_CODE(RBH_Save_1); + +INFO_TABLE(RBH_Save_2_info, RBH_Save_2_entry,2,0,CONSTR,,EF_,0,0); +NON_ENTERABLE_ENTRY_CODE(RBH_Save_2); +#endif /* defined(PAR) || defined(GRAN) */ + /* identical to BLACKHOLEs except for the infotag */ INFO_TABLE(CAF_BLACKHOLE_info, CAF_BLACKHOLE_entry,0,2,CAF_BLACKHOLE,,EF_,0,0); STGFUN(CAF_BLACKHOLE_entry) { FB_ +#if defined(GRAN) + /* mainly statistics gathering for GranSim simulation */ + STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/); +#endif + #ifdef SMP CMPXCHG(R1.cl->header.info, &CAF_BLACKHOLE_info, &WHITEHOLE_info); +#endif TICK_ENT_BH(); /* Put ourselves on the blocking queue for this black hole */ - CurrentTSO->link = (StgTSO *)&END_TSO_QUEUE_closure; +#if defined(GRAN) || defined(PAR) + /* in fact, only difference is the type of the end-of-queue marker! */ + CurrentTSO->link = END_BQ_QUEUE; + ((StgBlockingQueue *)R1.p)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO; +#else + CurrentTSO->link = END_TSO_QUEUE; ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO; +#endif + /* jot down why and on what closure we are blocked */ CurrentTSO->why_blocked = BlockedOnBlackHole; CurrentTSO->block_info.closure = R1.cl; + /* closure is mutable since something has just been added to its BQ */ recordMutable((StgMutClosure *)R1.cl); /* Change the CAF_BLACKHOLE into a BLACKHOLE_BQ */ ((StgBlockingQueue *)R1.p)->header.info = &BLACKHOLE_BQ_info; - /* stg_gen_block is too heavyweight, use a specialised one */ - BLOCK_NP(1); +#if defined(PAR) + /* Save the Thread State here, before calling RTS routines below! */ + SAVE_THREAD_STATE(1); + + /* if collecting stats update the execution time etc */ + if (RtsFlags.ParFlags.ParStats.Full) { + /* Note that CURRENT_TIME may perform an unsafe call */ + //rtsTime now = CURRENT_TIME; /* Now */ + CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; + CurrentTSO->par.blockcount++; + CurrentTSO->par.blockedat = CURRENT_TIME; + DumpRawGranEvent(CURRENT_PROC, thisPE, + GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0); + } + + THREAD_RETURN(1); /* back to the scheduler */ #else - JMP_(BLACKHOLE_entry); + /* stg_gen_block is too heavyweight, use a specialised one */ + BLOCK_NP(1); #endif FE_ @@ -301,17 +472,9 @@ EF_(BCO_entry) { /* ----------------------------------------------------------------------------- Some static info tables for things that don't get entered, and therefore don't need entry code (i.e. boxed but unpointed objects) + NON_ENTERABLE_ENTRY_CODE now defined at the beginning of the file -------------------------------------------------------------------------- */ -#define NON_ENTERABLE_ENTRY_CODE(type) \ -STGFUN(type##_entry) \ -{ \ - FB_ \ - DUMP_ERRMSG(#type " object entered!\n"); \ - STGCALL1(shutdownHaskellAndExit, EXIT_FAILURE); \ - FE_ \ -} - INFO_TABLE(TSO_info, TSO_entry, 0,0,TSO,,EF_,0,0); NON_ENTERABLE_ENTRY_CODE(TSO); diff --git a/ghc/rts/Storage.h b/ghc/rts/Storage.h index c996edf..17076bf 100644 --- a/ghc/rts/Storage.h +++ b/ghc/rts/Storage.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Storage.h,v 1.13 1999/11/11 11:49:26 simonmar Exp $ + * $Id: Storage.h,v 1.14 2000/01/13 14:34:05 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -178,5 +178,10 @@ updateWithPermIndirection(const StgInfoTable *info, StgClosure *p1, StgClosure * extern StgCAF* enteredCAFs; +#if defined(DEBUG) +void printMutOnceList(generation *gen); +void printMutableList(generation *gen); +#endif DEBUG + #endif STORAGE_H diff --git a/ghc/rts/Updates.hc b/ghc/rts/Updates.hc index 53f2476..38e69e8 100644 --- a/ghc/rts/Updates.hc +++ b/ghc/rts/Updates.hc @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Updates.hc,v 1.24 1999/12/01 14:34:39 simonmar Exp $ + * $Id: Updates.hc,v 1.25 2000/01/13 14:34:05 hwloidl Exp $ * * (c) The GHC Team, 1998-1999 * @@ -13,6 +13,9 @@ #include "HeapStackCheck.h" #include "Storage.h" #include "ProfRts.h" +#if defined(GRAN) || defined(PAR) +# include "FetchMe.h" +#endif /* The update frame return address must be *polymorphic*, that means @@ -245,11 +248,6 @@ EXTFUN(stg_update_PAP) */ Fun = R1.cl; -#if defined(GRAN_COUNT) -#error Fixme. - ++nPAPs; -#endif - /* Just copy the whole block of stack between the stack pointer * and the update frame pointer. */ diff --git a/ghc/rts/parallel/0Hash.c b/ghc/rts/parallel/0Hash.c new file mode 100644 index 0000000..56e6646 --- /dev/null +++ b/ghc/rts/parallel/0Hash.c @@ -0,0 +1,321 @@ +/*----------------------------------------------------------------------------- + * $Id: 0Hash.c,v 1.2 2000/01/13 14:34:06 hwloidl Exp $ + * + * (c) The AQUA Project, Glasgow University, 1995-1998 + * (c) The GHC Team, 1999 + * + * Dynamically expanding linear hash tables, as described in + * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988, + * pp. 446 -- 457. + * -------------------------------------------------------------------------- */ + +/* + Replaced with ghc/rts/Hash.c in the new RTS +*/ + +#if 0 + +#include "Rts.h" +#include "Hash.h" +#include "RtsUtils.h" + +#define HSEGSIZE 1024 /* Size of a single hash table segment */ + /* Also the minimum size of a hash table */ +#define HDIRSIZE 1024 /* Size of the segment directory */ + /* Maximum hash table size is HSEGSIZE * HDIRSIZE */ +#define HLOAD 5 /* Maximum average load of a single hash bucket */ + +#define HCHUNK (1024 * sizeof(W_) / sizeof(HashList)) + /* Number of HashList cells to allocate in one go */ + + +/* Linked list of (key, data) pairs for separate chaining */ +struct hashlist { + StgWord key; + void *data; + struct hashlist *next; /* Next cell in bucket chain (same hash value) */ +}; + +typedef struct hashlist HashList; + +struct hashtable { + int split; /* Next bucket to split when expanding */ + int max; /* Max bucket of smaller table */ + int mask1; /* Mask for doing the mod of h_1 (smaller table) */ + int mask2; /* Mask for doing the mod of h_2 (larger table) */ + int kcount; /* Number of keys */ + int bcount; /* Number of buckets */ + HashList **dir[HDIRSIZE]; /* Directory of segments */ +}; + +/* ----------------------------------------------------------------------------- + * Hash first using the smaller table. If the bucket is less than the + * next bucket to be split, re-hash using the larger table. + * -------------------------------------------------------------------------- */ + +static int +hash(HashTable *table, W_ key) +{ + int bucket; + + /* Strip the boring zero bits */ + key /= sizeof(StgWord); + + /* Mod the size of the hash table (a power of 2) */ + bucket = key & table->mask1; + + if (bucket < table->split) { + /* Mod the size of the expanded hash table (also a power of 2) */ + bucket = key & table->mask2; + } + return bucket; +} + +/* ----------------------------------------------------------------------------- + * Allocate a new segment of the dynamically growing hash table. + * -------------------------------------------------------------------------- */ + +static void +allocSegment(HashTable *table, int segment) +{ + table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *), + "allocSegment"); +} + + +/* ----------------------------------------------------------------------------- + * Expand the larger hash table by one bucket, and split one bucket + * from the smaller table into two parts. Only the bucket referenced + * by @table->split@ is affected by the expansion. + * -------------------------------------------------------------------------- */ + +static void +expand(HashTable *table) +{ + int oldsegment; + int oldindex; + int newbucket; + int newsegment; + int newindex; + HashList *hl; + HashList *next; + HashList *old, *new; + + if (table->split + table->max >= HDIRSIZE * HSEGSIZE) + /* Wow! That's big. Too big, so don't expand. */ + return; + + /* Calculate indices of bucket to split */ + oldsegment = table->split / HSEGSIZE; + oldindex = table->split % HSEGSIZE; + + newbucket = table->max + table->split; + + /* And the indices of the new bucket */ + newsegment = newbucket / HSEGSIZE; + newindex = newbucket % HSEGSIZE; + + if (newindex == 0) + allocSegment(table, newsegment); + + if (++table->split == table->max) { + table->split = 0; + table->max *= 2; + table->mask1 = table->mask2; + table->mask2 = table->mask2 << 1 | 1; + } + table->bcount++; + + /* Split the bucket, paying no attention to the original order */ + + old = new = NULL; + for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) { + next = hl->next; + if (hash(table, hl->key) == newbucket) { + hl->next = new; + new = hl; + } else { + hl->next = old; + old = hl; + } + } + table->dir[oldsegment][oldindex] = old; + table->dir[newsegment][newindex] = new; + + return; +} + +void * +lookupHashTable(HashTable *table, StgWord key) +{ + int bucket; + int segment; + int index; + HashList *hl; + + bucket = hash(table, key); + segment = bucket / HSEGSIZE; + index = bucket % HSEGSIZE; + + for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) + if (hl->key == key) + return hl->data; + + /* It's not there */ + return NULL; +} + +/* ----------------------------------------------------------------------------- + * We allocate the hashlist cells in large chunks to cut down on malloc + * overhead. Although we keep a free list of hashlist cells, we make + * no effort to actually return the space to the malloc arena. + * -------------------------------------------------------------------------- */ + +static HashList *freeList = NULL; + +static HashList * +allocHashList(void) +{ + HashList *hl, *p; + + if ((hl = freeList) != NULL) { + freeList = hl->next; + } else { + hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList"); + + freeList = hl + 1; + for (p = freeList; p < hl + HCHUNK - 1; p++) + p->next = p + 1; + p->next = NULL; + } + return hl; +} + +static void +freeHashList(HashList *hl) +{ + hl->next = freeList; + freeList = hl; +} + +void +insertHashTable(HashTable *table, StgWord key, void *data) +{ + int bucket; + int segment; + int index; + HashList *hl; + + /* We want no duplicates */ + ASSERT(lookupHashTable(table, key) == NULL); + + /* When the average load gets too high, we expand the table */ + if (++table->kcount >= HLOAD * table->bcount) + expand(table); + + bucket = hash(table, key); + segment = bucket / HSEGSIZE; + index = bucket % HSEGSIZE; + + hl = allocHashList(); + + hl->key = key; + hl->data = data; + hl->next = table->dir[segment][index]; + table->dir[segment][index] = hl; + +} + +void * +removeHashTable(HashTable *table, StgWord key, void *data) +{ + int bucket; + int segment; + int index; + HashList *hl; + HashList *prev = NULL; + + bucket = hash(table, key); + segment = bucket / HSEGSIZE; + index = bucket % HSEGSIZE; + + for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { + if (hl->key == key && (data == NULL || hl->data == data)) { + if (prev == NULL) + table->dir[segment][index] = hl->next; + else + prev->next = hl->next; + table->kcount--; + return hl->data; + } + prev = hl; + } + + /* It's not there */ + ASSERT(data == NULL); + return NULL; +} + +/* ----------------------------------------------------------------------------- + * When we free a hash table, we are also good enough to free the + * data part of each (key, data) pair, as long as our caller can tell + * us how to do it. + * -------------------------------------------------------------------------- */ + +void +freeHashTable(HashTable *table, void (*freeDataFun)(void *) ) +{ + long segment; + long index; + HashList *hl; + HashList *next; + + /* The last bucket with something in it is table->max + table->split - 1 */ + segment = (table->max + table->split - 1) / HSEGSIZE; + index = (table->max + table->split - 1) % HSEGSIZE; + + while (segment >= 0) { + while (index >= 0) { + for (hl = table->dir[segment][index]; hl != NULL; hl = next) { + next = hl->next; + if (freeDataFun != NULL) + (*freeDataFun)(hl->data); + freeHashList(hl); + } + index--; + } + free(table->dir[segment]); + segment--; + index = HSEGSIZE - 1; + } + free(table); +} + +/* ----------------------------------------------------------------------------- + * When we initialize a hash table, we set up the first segment as well, + * initializing all of the first segment's hash buckets to NULL. + * -------------------------------------------------------------------------- */ + +HashTable * +allocHashTable(void) +{ + HashTable *table; + HashList **hb; + + table = stgMallocBytes(sizeof(HashTable),"allocHashTable"); + + allocSegment(table, 0); + + for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++) + *hb = NULL; + + table->split = 0; + table->max = HSEGSIZE; + table->mask1 = HSEGSIZE - 1; + table->mask2 = 2 * HSEGSIZE - 1; + table->kcount = 0; + table->bcount = HSEGSIZE; + + return table; +} +#endif diff --git a/ghc/rts/parallel/0Parallel.h b/ghc/rts/parallel/0Parallel.h new file mode 100644 index 0000000..d52bf00 --- /dev/null +++ b/ghc/rts/parallel/0Parallel.h @@ -0,0 +1,414 @@ +/* + Time-stamp: + + Definitions for parallel machines. + +This section contains definitions applicable only to programs compiled +to run on a parallel machine, i.e. on GUM. Some of these definitions +are also used when simulating parallel execution, i.e. on GranSim. + */ + +/* + ToDo: Check the PAR specfic part of this file + Move stuff into Closures.h and ClosureMacros.h + Clean-up GRAN specific code + -- HWL + */ + +#ifndef PARALLEL_H +#define PARALLEL_H + +#if defined(PAR) || defined(GRAN) /* whole file */ + +#include "Rts.h" +#include "GranSim.h" +//#include "ClosureTypes.h" + +//@menu +//* Basic definitions:: +//* Externs and types:: +//* Dummy defs:: +//* Par specific fixed headers:: +//* Parallel only heap objects:: +//* Packing definitions:: +//* End of File:: +//@end menu +//*/ + +//@node Basic definitions, Externs and types +//@section Basic definitions + +/* SET_PAR_HDR and SET_STATIC_PAR_HDR now live in ClosureMacros.h */ + +/* Needed for dumping routines */ +#if defined(PAR) +# define TIME ullong +# define CURRENT_TIME msTime() +# define TIME_ON_PROC(p) msTime() +# define CURRENT_PROC thisPE +# define BINARY_STATS RtsFlags.ParFlags.granSimStats_Binary +#elif defined(GRAN) +# define TIME rtsTime +# define CURRENT_TIME CurrentTime[CurrentProc] +# define TIME_ON_PROC(p) CurrentTime[p] +# define CURRENT_PROC CurrentProc +# define BINARY_STATS RtsFlags.GranFlags.granSimStats_Binary +#endif + +#if defined(PAR) +# define MAX_PES 256 /* Maximum number of processors */ + /* MAX_PES is enforced by SysMan, which does not + allow more than this many "processors". + This is important because PackGA [GlobAddr.lc] + **assumes** that a PE# can fit in 8+ bits. + */ +#endif + +//@node Externs and types, Dummy defs, Basic definitions +//@section Externs and types + +#if defined(PAR) +/* GUM: one spark queue on each PE, and each PE sees only its own spark queue */ +extern rtsSparkQ pending_sparks_hd; +extern rtsSparkQ pending_sparks_tl; +#elif defined(GRAN) +/* GranSim: a globally visible array of spark queues */ +extern rtsSparkQ pending_sparks_hds[]; +extern rtsSparkQ pending_sparks_tls[]; +#endif +extern unsigned int /* nat */ spark_queue_len(PEs proc); + +extern StgInt SparksAvail; /* How many sparks are available */ + +/* prototypes of spark routines */ +/* ToDo: check whether all have to be visible -- HWL */ +#if defined(GRAN) +rtsSpark *newSpark(StgClosure *node, StgInt name, StgInt gran_info, StgInt size_info, StgInt par_info, StgInt local); +void disposeSpark(rtsSpark *spark); +void disposeSparkQ(rtsSparkQ spark); +void add_to_spark_queue(rtsSpark *spark); +void delete_from_spark_queue (rtsSpark *spark); +#endif + +#define STATS_FILENAME_MAXLEN 128 + +/* Where to write the log file */ +//extern FILE *gr_file; +extern char gr_filename[STATS_FILENAME_MAXLEN]; + +#if defined(GRAN) +int init_gr_simulation(char *rts_argv[], int rts_argc, char *prog_argv[], int prog_argc); +void end_gr_simulation(void); +#endif + +#if defined(PAR) +extern I_ do_sp_profile; + +extern P_ PendingFetches; +extern GLOBAL_TASK_ID *PEs; + +extern rtsBool IAmMainThread, GlobalStopPending; +extern rtsBool fishing; +extern GLOBAL_TASK_ID SysManTask; +extern int seed; /*pseudo-random-number generator seed:*/ + /*Initialised in ParInit*/ +extern I_ threadId; /*Number of Threads that have existed on a PE*/ +extern GLOBAL_TASK_ID mytid; + +extern int nPEs; + +extern rtsBool InGlobalGC; /* Are we in the midst of performing global GC */ + +extern HashTable *pGAtoGALAtable; +extern HashTable *LAtoGALAtable; +extern GALA *freeIndirections; +extern GALA *liveIndirections; +extern GALA *freeGALAList; +extern GALA *liveRemoteGAs; +extern int thisPE; + +void RunParallelSystem (StgPtr program_closure); +void initParallelSystem(); +void SynchroniseSystem(); + +void registerTask (GLOBAL_TASK_ID gtid); +globalAddr *LAGAlookup (P_ addr); +P_ GALAlookup (globalAddr *ga); +globalAddr *MakeGlobal (P_ addr, rtsBool preferred); +globalAddr *setRemoteGA (P_ addr, globalAddr *ga, rtsBool preferred); +void splitWeight (globalAddr *to, globalAddr *from); +globalAddr *addWeight (globalAddr *ga); +void initGAtables(); +W_ taskIDtoPE (GLOBAL_TASK_ID gtid); +void RebuildLAGAtable(); + +void *lookupHashTable (HashTable *table, StgWord key); +void insertHashTable (HashTable *table, StgWord key, void *data); +void freeHashTable (HashTable *table, void (*freeDataFun) ((void *data))); +HashTable *allocHashTable(); +void *removeHashTable (HashTable *table, StgWord key, void *data); +#endif /* PAR */ + +/* Interface for dumping routines (i.e. writing to log file) */ +void DumpGranEvent(GranEventType name, StgTSO *tso); +void DumpRawGranEvent(PEs proc, PEs p, GranEventType name, + StgTSO *tso, StgClosure *node, StgInt sparkname, StgInt len); +//void DumpEndEvent(PEs proc, StgTSO *tso, rtsBool mandatory_thread); + +//@node Dummy defs, Par specific fixed headers, Externs and types +//@section Dummy defs + +/* +Get this out of the way. These are all null definitions. +*/ + + +//# define GA_HDR_SIZE 0 +//# define GA(closure) /*nothing */ + +//# define SET_GA(closure,ga) /* nothing */ +//# define SET_STATIC_GA(closure) /* nothing */ +//# define SET_GRAN_HDR(closure,pe) /* nothing */ +//# define SET_STATIC_PROCS(closure) /* nothing */ + +//# define SET_TASK_ACTIVITY(act) /* nothing */ + +#if defined(GRAN) + +# define GA_HDR_SIZE 1 + +# define PROCS_HDR_POSN PAR_HDR_POSN +# define PROCS_HDR_SIZE 1 + +/* Accessing components of the field */ +# define PROCS(closure) ((closure)->header.gran.procs) +/* SET_PROCS is now SET_GRAN_HEADER in ClosureMacros.h. */ +#endif + + +//@node Par specific fixed headers, Parallel only heap objects, Dummy defs +//@section Par specific fixed headers + +/* +Definitions relating to the entire parallel-only fixed-header field. + +On GUM, the global addresses for each local closure are stored in a separate +hash table, rather then with the closure in the heap. We call @getGA@ to +look up the global address associated with a local closure (0 is returned +for local closures that have no global address), and @setGA@ to store a new +global address for a local closure which did not previously have one. +*/ + +#if defined(PAR) + +# define GA_HDR_SIZE 0 + +# define GA(closure) getGA(closure) + +# define SET_GA(closure, ga) setGA(closure,ga) +# define SET_STATIC_GA(closure) +# define SET_GRAN_HDR(closure,pe) +# define SET_STATIC_PROCS(closure) + +# define MAX_GA_WEIGHT 0 /* Treat as 2^n */ + +W_ PackGA ((W_, int)); + /* There was a PACK_GA macro here; but we turned it into the PackGA + routine [GlobAddr.lc] (because it needs to do quite a bit of + paranoia checking. Phil & Will (95/08) + */ + +/* At the moment, there is no activity profiling for GUM. This may change. */ +# define SET_TASK_ACTIVITY(act) /* nothing */ +#endif + +//@node Parallel only heap objects, Packing definitions, Par specific fixed headers +//@section Parallel only heap objects + +// NB: The following definitons are BOTH for GUM and GrAnSim -- HWL + +/* All in Closures.h and CLosureMacros.h */ + +//@node Packing definitions, End of File, Parallel only heap objects +//@section Packing definitions + +//@menu +//* GUM:: +//* GranSim:: +//@end menu +//*/ + +//@node GUM, GranSim, Packing definitions, Packing definitions +//@subsection GUM + +#if defined(PAR) +/* +Symbolic constants for the packing code. + +This constant defines how many words of data we can pack into a single +packet in the parallel (GUM) system. +*/ + +//@menu +//* Externs:: +//* Prototypes:: +//* Macros:: +//@end menu +//*/ + +//@node Externs, Prototypes, GUM, GUM +//@subsubsection Externs + +extern W_ *PackBuffer; /* size: can be set via option */ +extern long *buffer; /* HWL_ */ +extern W_ *freeBuffer; /* HWL_ */ +extern W_ *packBuffer; /* HWL_ */ + +extern void InitPackBuffer(STG_NO_ARGS); +extern void InitMoreBuffers(STG_NO_ARGS); +extern void InitPendingGABuffer(W_ size); +extern void AllocClosureQueue(W_ size); + +//@node Prototypes, Macros, Externs, GUM +//@subsubsection Prototypes + +void InitPackBuffer(); +P_ PackTSO (P_ tso, W_ *size); +P_ PackStkO (P_ stko, W_ *size); +P_ AllocateHeap (W_ size); /* Doesn't belong */ + +void InitClosureQueue (); +P_ DeQueueClosure(); +void QueueClosure (P_ closure); +rtsBool QueueEmpty(); +void PrintPacket (P_ buffer); + +P_ get_closure_info (P_ closure, W_ *size, W_ *ptrs, W_ *nonptrs, W_ *vhs, char *type); + +rtsBool isOffset (globalAddr *ga), + isFixed (globalAddr *ga); + +void doGlobalGC(); + +P_ PackNearbyGraph (P_ closure,W_ *size); +P_ UnpackGraph (W_ *buffer, globalAddr **gamap, W_ *nGAs); + + +//@node Macros, , Prototypes, GUM +//@subsubsection Macros + +# define PACK_HEAP_REQUIRED \ + ((RtsFlags.ParFlags.packBufferSize - PACK_HDR_SIZE) / (PACK_GA_SIZE + _FHS) * (SPEC_HS + 2)) + +# define MAX_GAS (RtsFlags.ParFlags.packBufferSize / PACK_GA_SIZE) + + +# define PACK_GA_SIZE 3 /* Size of a packed GA in words */ + /* Size of a packed fetch-me in words */ +# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS) + +# define PACK_HDR_SIZE 1 /* Words of header in a packet */ + +# define PACK_PLC_SIZE 2 /* Size of a packed PLC in words */ + +#endif /* PAR */ + +//@node GranSim, , GUM, Packing definitions +//@subsection GranSim + +#if defined(GRAN) +/* ToDo: Check which of the PAR routines are needed in GranSim -- HWL */ + +//@menu +//* Types:: +//* Prototypes:: +//* Macros:: +//@end menu +//*/ + +//@node Types, Prototypes, GranSim, GranSim +//@subsubsection Types + +typedef struct rtsPackBuffer_ { + StgInt /* nat */ size; + StgInt /* nat */ unpacked_size; + StgTSO *tso; + StgClosure **buffer; +} rtsPackBuffer; + +//@node Prototypes, Macros, Types, GranSim +//@subsubsection Prototypes + + +/* main packing functions */ +/* +rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso, nat *packbuffersize); +rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso, nat *packbuffersize); +void PrintPacket(rtsPackBuffer *buffer); +StgClosure *UnpackGraph(rtsPackBuffer* buffer); +*/ +/* important auxiliary functions */ + +//StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty); +int IS_BLACK_HOLE(StgClosure* node); +StgClosure *IS_INDIRECTION(StgClosure* node); +int IS_THUNK(StgClosure* closure); +char *display_info_type(StgClosure* closure, char *str); + +/* +OLD CODE -- HWL +void InitPackBuffer(void); +P_ AllocateHeap (W_ size); +P_ PackNearbyGraph (P_ closure, P_ tso, W_ *packbuffersize); +P_ PackOneNode (P_ closure, P_ tso, W_ *packbuffersize); +P_ UnpackGraph (P_ buffer); + +void InitClosureQueue (void); +P_ DeQueueClosure(void); +void QueueClosure (P_ closure); +// rtsBool QueueEmpty(); +void PrintPacket (P_ buffer); +*/ + +// StgInfoTable *get_closure_info(StgClosure* node, unsigned int /* nat */ *size, unsigned int /* nat */ *ptrs, unsigned int /* nat */ *nonptrs, unsigned int /* nat */ *vhs, char *info_hdr_ty); +// int /* rtsBool */ IS_BLACK_HOLE(StgClosure* node) ; + +//@node Macros, , Prototypes, GranSim +//@subsubsection Macros + +/* These are needed in the packing code to get the size of the packet + right. The closures itself are never built in GrAnSim. */ +# define FETCHME_VHS IND_VHS +# define FETCHME_HS IND_HS + +# define FETCHME_GA_LOCN FETCHME_HS + +# define FETCHME_CLOSURE_SIZE(closure) IND_CLOSURE_SIZE(closure) +# define FETCHME_CLOSURE_NoPTRS(closure) 0L +# define FETCHME_CLOSURE_NoNONPTRS(closure) (IND_CLOSURE_SIZE(closure)-IND_VHS) + +# define MAX_GAS (RtsFlags.GranFlags.packBufferSize / PACK_GA_SIZE) +# define PACK_GA_SIZE 3 /* Size of a packed GA in words */ + /* Size of a packed fetch-me in words */ +# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS) +# define PACK_HDR_SIZE 4 /* Words of header in a packet */ + +# define PACK_HEAP_REQUIRED \ + (RtsFlags.GranFlags.packBufferSize * sizeofW(StgClosure*) + \ + 2 * sizeofW(StgInt) + sizeofW(StgTSO*)) + +# define PACK_FLAG_LOCN 0 +# define PACK_TSO_LOCN 1 +# define PACK_UNPACKED_SIZE_LOCN 2 +# define PACK_SIZE_LOCN 3 +# define MAGIC_PACK_FLAG 0xfabc + +#endif /* GRAN */ + +//@node End of File, , Packing definitions +//@section End of File + +#endif /* defined(PAR) || defined(GRAN) whole file */ +#endif /* Parallel_H */ + + diff --git a/ghc/rts/parallel/0Unpack.c b/ghc/rts/parallel/0Unpack.c new file mode 100644 index 0000000..fc4a8e5 --- /dev/null +++ b/ghc/rts/parallel/0Unpack.c @@ -0,0 +1,440 @@ +/* + Time-stamp: + + Unpacking closures which have been exported to remote processors + + This module defines routines for unpacking closures in the parallel + runtime system (GUM). + + In the case of GrAnSim, this module defines routines for *simulating* the + unpacking of closures as it is done in the parallel runtime system. +*/ + +/* + Code in this file has been merged with Pack.c +*/ + +#if 0 + +//@node Unpacking closures, , , +//@section Unpacking closures + +//@menu +//* Includes:: +//* Prototypes:: +//* GUM code:: +//* GranSim Code:: +//* Index:: +//@end menu +//*/ + +//@node Includes, Prototypes, Unpacking closures, Unpacking closures +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "GranSimRts.h" +#include "ParallelRts.h" +#include "ParallelDebug.h" +#include "FetchMe.h" +#include "Storage.h" + +//@node Prototypes, GUM code, Includes, Unpacking closures +//@subsection Prototypes + +void InitPacking(void); +# if defined(PAR) +void InitPackBuffer(void); +# endif +/* Interface for ADT of closure queues */ +void AllocClosureQueue(nat size); +void InitClosureQueue(void); +rtsBool QueueEmpty(void); +void QueueClosure(StgClosure *closure); +StgClosure *DeQueueClosure(void); + +StgPtr AllocateHeap(nat size); + +//@node GUM code, GranSim Code, Prototypes, Unpacking closures +//@subsection GUM code + +#if defined(PAR) + +//@node Local Definitions, , GUM code, GUM code +//@subsubsection Local Definitions + +//@cindex PendingGABuffer +static globalAddr *PendingGABuffer; +/* is initialised in main; */ + +//@cindex InitPendingGABuffer +void +InitPendingGABuffer(size) +nat size; +{ + PendingGABuffer = (globalAddr *) + stgMallocBytes((size-PACK_HDR_SIZE)*2*sizeof(globalAddr), + "InitPendingGABuffer"); +} + +/* + @CommonUp@ commons up two closures which we have discovered to be + variants of the same object. One is made an indirection to the other. */ + +//@cindex CommonUp +void +CommonUp(StgClosure *src, StgClosure *dst) +{ + StgBlockingQueueElement *bqe; + + ASSERT(src != dst); + switch (get_itbl(src)->type) { + case BLACKHOLE_BQ: + bqe = ((StgBlockingQueue *)src)->blocking_queue; + break; + + case FETCH_ME_BQ: + bqe = ((StgFetchMeBlockingQueue *)src)->blocking_queue; + break; + + case RBH: + bqe = ((StgRBH *)src)->blocking_queue; + break; + + case BLACKHOLE: + case FETCH_ME: + bqe = END_BQ_QUEUE; + break; + + default: + /* Don't common up anything else */ + return; + } + /* We do not use UPD_IND because that would awaken the bq, too */ + // UPD_IND(src, dst); + updateWithIndirection(get_itbl(src), src, dst); + //ASSERT(!IS_BIG_MOTHER(INFO_PTR(dst))); + if (bqe != END_BQ_QUEUE) + awaken_blocked_queue(bqe, src); +} + +/* + @UnpackGraph@ unpacks the graph contained in a message buffer. It + returns a pointer to the new graph. The @gamap@ parameter is set to + point to an array of (oldGA,newGA) pairs which were created as a result + of unpacking the buffer; @nGAs@ is set to the number of GA pairs which + were created. + + The format of graph in the pack buffer is as defined in @Pack.lc@. */ + +//@cindex UnpackGraph +StgClosure * +UnpackGraph(packBuffer, gamap, nGAs) +rtsPackBuffer *packBuffer; +globalAddr **gamap; +nat *nGAs; +{ + nat size, ptrs, nonptrs, vhs; + StgWord **buffer, **bufptr, **slotptr; + globalAddr ga, *gaga; + StgClosure *closure, *existing, + *graphroot, *graph, *parent; + StgInfoTable *ip, *oldip; + nat bufsize, i, + pptr = 0, pptrs = 0, pvhs; + char str[80]; + + InitPackBuffer(); /* in case it isn't already init'd */ + graphroot = (StgClosure *)NULL; + + gaga = PendingGABuffer; + + InitClosureQueue(); + + /* Unpack the header */ + bufsize = packBuffer->size; + buffer = packBuffer->buffer; + bufptr = buffer; + + /* allocate heap */ + if (bufsize > 0) { + graph = allocate(bufsize); + ASSERT(graph != NULL); + } + + parent = (StgClosure *)NULL; + + do { + /* This is where we will ultimately save the closure's address */ + slotptr = bufptr; + + /* First, unpack the next GA or PLC */ + ga.weight = (rtsWeight) *bufptr++; + + if (ga.weight > 0) { + ga.payload.gc.gtid = (GlobalTaskId) *bufptr++; + ga.payload.gc.slot = (int) *bufptr++; + } else + ga.payload.plc = (StgPtr) *bufptr++; + + /* Now unpack the closure body, if there is one */ + if (isFixed(&ga)) { + /* No more to unpack; just set closure to local address */ + IF_PAR_DEBUG(pack, + belch("Unpacked PLC at %x", ga.payload.plc)); + closure = ga.payload.plc; + } else if (isOffset(&ga)) { + /* No more to unpack; just set closure to cached address */ + ASSERT(parent != (StgClosure *)NULL); + closure = (StgClosure *) buffer[ga.payload.gc.slot]; + } else { + /* Now we have to build something. */ + + ASSERT(bufsize > 0); + + /* + * Close your eyes. You don't want to see where we're looking. You + * can't get closure info until you've unpacked the variable header, + * but you don't know how big it is until you've got closure info. + * So...we trust that the closure in the buffer is organized the + * same way as they will be in the heap...at least up through the + * end of the variable header. + */ + ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str); + + /* + Remember, the generic closure layout is as follows: + +-------------------------------------------------+ + | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS | + +-------------------------------------------------+ + */ + /* Fill in the fixed header */ + for (i = 0; i < FIXED_HS; i++) + ((StgPtr)graph)[i] = *bufptr++; + + if (ip->type == FETCH_ME) + size = ptrs = nonptrs = vhs = 0; + + /* Fill in the packed variable header */ + for (i = 0; i < vhs; i++) + ((StgPtr)graph)[FIXED_HS + i] = *bufptr++; + + /* Pointers will be filled in later */ + + /* Fill in the packed non-pointers */ + for (i = 0; i < nonptrs; i++) + ((StgPtr)graph)[FIXED_HS + i + vhs + ptrs] = *bufptr++; + + /* Indirections are never packed */ + // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE); + + /* Add to queue for processing */ + QueueClosure(graph); + + /* + * Common up the new closure with any existing closure having the same + * GA + */ + + if ((existing = GALAlookup(&ga)) == NULL) { + globalAddr *newGA; + /* Just keep the new object */ + IF_PAR_DEBUG(pack, + belch("Unpacking new (%x, %d, %x)\n", + ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight)); + + closure = graph; + newGA = setRemoteGA(graph, &ga, rtsTrue); + if (ip->type == FETCH_ME) + // FETCHME_GA(closure) = newGA; + ((StgFetchMe *)closure)->ga = newGA; + } else { + /* Two closures, one global name. Someone loses */ + oldip = get_itbl(existing); + + if ((oldip->type == FETCH_ME || IS_BLACK_HOLE(existing)) && + ip->type != FETCH_ME) { + + /* What we had wasn't worth keeping */ + closure = graph; + CommonUp(existing, graph); + } else { + + /* + * Either we already had something worthwhile by this name or + * the new thing is just another FetchMe. However, the thing we + * just unpacked has to be left as-is, or the child unpacking + * code will fail. Remember that the way pointer words are + * filled in depends on the info pointers of the parents being + * the same as when they were packed. + */ + IF_PAR_DEBUG(pack, + belch("Unpacking old (%x, %d, %x), keeping %#lx", + ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight, + existing)); + + closure = existing; + } + /* Pool the total weight in the stored ga */ + (void) addWeight(&ga); + } + + /* Sort out the global address mapping */ + if ((ip_THUNK(ip) && !ip_UNPOINTED(ip)) || + (ip_MUTABLE(ip) && ip->type != FETCH_ME)) { + /* Make up new GAs for single-copy closures */ + globalAddr *newGA = makeGlobal(closure, rtsTrue); + + ASSERT(closure == graph); + + /* Create an old GA to new GA mapping */ + *gaga++ = ga; + splitWeight(gaga, newGA); + ASSERT(gaga->weight == 1L << (BITS_IN(unsigned) - 1)); + gaga++; + } + graph += FIXED_HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size); + } + + /* + * Set parent pointer to point to chosen closure. If we're at the top of + * the graph (our parent is NULL), then we want to arrange to return the + * chosen closure to our caller (possibly in place of the allocated graph + * root.) + */ + if (parent == NULL) + graphroot = closure; + else + ((StgPtr)parent)[FIXED_HS + pvhs + pptr] = (StgWord) closure; + + /* Save closure pointer for resolving offsets */ + *slotptr = (StgWord) closure; + + /* Locate next parent pointer */ + pptr++; + while (pptr + 1 > pptrs) { + parent = DeQueueClosure(); + + if (parent == NULL) + break; + else { + (void) get_closure_info(parent, &size, &pptrs, &nonptrs, + &pvhs, str); + pptr = 0; + } + } + } while (parent != NULL); + + ASSERT(bufsize == 0 || graph - 1 <= SAVE_Hp); + + *gamap = PendingGABuffer; + *nGAs = (gaga - PendingGABuffer) / 2; + + /* ToDo: are we *certain* graphroot has been set??? WDP 95/07 */ + ASSERT(graphroot!=NULL); + return (graphroot); +} +#endif /* PAR */ + +//@node GranSim Code, Index, GUM code, Unpacking closures +//@subsection GranSim Code + +/* + For GrAnSim: In general no actual unpacking should be necessary. We just + have to walk over the graph and set the bitmasks appropriately. -- HWL */ + +//@node Unpacking, , GranSim Code, GranSim Code +//@subsubsection Unpacking + +#if defined(GRAN) +void +CommonUp(StgClosure *src, StgClosure *dst) +{ + barf("CommonUp: should never be entered in a GranSim setup"); +} + +/* This code fakes the unpacking of a somewhat virtual buffer */ +StgClosure* +UnpackGraph(buffer) +rtsPackBuffer* buffer; +{ + nat size, ptrs, nonptrs, vhs, + bufptr = 0; + StgClosure *closure, *graphroot, *graph; + StgInfoTable *ip; + StgWord bufsize, unpackedsize, + pptr = 0, pptrs = 0, pvhs; + StgTSO* tso; + char str[240], str1[80]; + int i; + + bufptr = 0; + graphroot = buffer->buffer[0]; + + tso = buffer->tso; + + /* Unpack the header */ + unpackedsize = buffer->unpacked_size; + bufsize = buffer->size; + + IF_GRAN_DEBUG(pack, + belch("<<< Unpacking <<%d>> (buffer @ %p):\n (root @ %p, PE %d,size=%d), demanded by TSO %d (%p)[PE %d]", + buffer->id, buffer, graphroot, where_is(graphroot), + bufsize, tso->id, tso, + where_is((StgClosure *)tso))); + + do { + closure = buffer->buffer[bufptr++]; /* that's all we need for GrAnSim -- HWL */ + + /* Actually only ip is needed; rest is useful for TESTING -- HWL */ + ip = get_closure_info(closure, + &size, &ptrs, &nonptrs, &vhs, str); + + IF_GRAN_DEBUG(pack, + sprintf(str, "** (%p): Changing bitmask[%s]: 0x%x ", + closure, (closure_HNF(closure) ? "NF" : "__"), + PROCS(closure))); + + if (ip->type == RBH) { + closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */ + + IF_GRAN_DEBUG(pack, + strcat(str, " (converting RBH) ")); + + convertFromRBH(closure); /* In GUM that's done by convertToFetchMe */ + } else if (IS_BLACK_HOLE(closure)) { + closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */ + } else if ( closure->header.gran.procs & PE_NUMBER(CurrentProc) == 0 ) { + if (closure_HNF(closure)) + closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */ + else + closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */ + } + + IF_GRAN_DEBUG(pack, + sprintf(str1, "0x%x", PROCS(closure)); strcat(str, str1)); + IF_GRAN_DEBUG(pack, belch(str)); + + } while (bufptrsize) ; /* (parent != NULL); */ + + /* In GrAnSim we allocate pack buffers dynamically! -- HWL */ + free(buffer->buffer); + free(buffer); + + IF_GRAN_DEBUG(pack, + belch("PrintGraph of %p is:", graphroot); PrintGraph(graphroot,0)); + + return (graphroot); +} +#endif /* GRAN */ +#endif + +//@node Index, , GranSim Code, Unpacking closures +//@subsection Index + +//@index +//* CommonUp:: @cindex\s-+CommonUp +//* InitPendingGABuffer:: @cindex\s-+InitPendingGABuffer +//* PendingGABuffer:: @cindex\s-+PendingGABuffer +//* UnpackGraph:: @cindex\s-+UnpackGraph +//@end index diff --git a/ghc/rts/parallel/FetchMe.h b/ghc/rts/parallel/FetchMe.h new file mode 100644 index 0000000..ebbb8dd --- /dev/null +++ b/ghc/rts/parallel/FetchMe.h @@ -0,0 +1,22 @@ +/* ----------------------------------------------------------------------------- + * $Id: FetchMe.h,v 1.2 2000/01/13 14:34:06 hwloidl Exp $ + * + * Closure types for the parallel system. + * + * ---------------------------------------------------------------------------*/ + +EI_(FETCH_ME_info); +EF_(FETCH_ME_entry); + +EI_(FETCH_ME_BQ_info); +EF_(FETCH_ME_BQ_entry); + +EI_(BLOCKED_FETCH_info); +EF_(BLOCKED_FETCH_entry); + +EI_(RBH_Save_0_info); +EF_(RBH_Save_0_entry); +EI_(RBH_Save_1_info); +EF_(RBH_Save_1_entry); +EI_(RBH_Save_2_info); +EF_(RBH_Save_2_entry); diff --git a/ghc/rts/parallel/FetchMe.hc b/ghc/rts/parallel/FetchMe.hc new file mode 100644 index 0000000..01f1f14 --- /dev/null +++ b/ghc/rts/parallel/FetchMe.hc @@ -0,0 +1,214 @@ +/* ---------------------------------------------------------------------------- + Time-stamp: + $Id: FetchMe.hc,v 1.2 2000/01/13 14:34:06 hwloidl Exp $ + + Entry code for a FETCH_ME closure + + This module defines routines for handling remote pointers (@FetchMe@s) + in GUM. It is threaded (@.hc@) because @FetchMe_entry@ will be + called during evaluation. + + * --------------------------------------------------------------------------*/ + +#ifdef PAR /* all of it */ + +//@menu +//* Includes:: +//* Info tables:: +//* Index:: +//@end menu + +//@node Includes, Info tables +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "Storage.h" +#include "GranSim.h" +#include "GranSimRts.h" +#include "Parallel.h" +#include "ParallelRts.h" +#include "FetchMe.h" +#include "HLC.h" +#include "StgRun.h" /* for StgReturn and register saving */ + +/* -------------------------------------------------------------------------- + FETCH_ME closures. + + A FETCH_ME closure represents data that currently resides on + another PE. We issue a fetch message, and wait for the data to be + retrieved. + + About the difference between std and PAR in returning to the RTS: + in PAR we call RTS functions from within the entry code (see also + BLACKHOLE_entry and friends in StgMiscClosures.hc); therefore, we + have to save the thread state before calling these functions --- + this is done via SAVE_THREAD_STATE; we then just load the return + code into R1 before jumping into the RTS --- this is done via + THREAD_RETURN; so, in short we have something like + SAVE_THREAD_STATE + THREAD_RETURN = BLOCK_NP + + ------------------------------------------------------------------------ */ + +//@node Info tables, Index, Includes +//@subsection Info tables + +//@cindex FETCH_ME_info +INFO_TABLE(FETCH_ME_info, FETCH_ME_entry, 0,2, FETCH_ME, const, EF_,0,0); +//@cindex FETCH_ME_entry +STGFUN(FETCH_ME_entry) +{ + extern globalAddr *rga_GLOBAL; + extern globalAddr *lga_GLOBAL; + extern globalAddr fmbqga_GLOBAL; + extern StgClosure *p_GLOBAL; + /* + globalAddr *rga; + globalAddr *lga; + globalAddr fmbqga; + StgClosure *p; + */ + + rga_GLOBAL = ((StgFetchMe *)R1.p)->ga; + ASSERT(rga->payload.gc.gtid != mytid); + + /* Turn the FETCH_ME into a FETCH_ME_BQ, and place the current thread + * on the blocking queue. + */ + // R1.cl->header.info = FETCH_ME_BQ_info; + SET_INFO((StgClosure *)R1.cl, &FETCH_ME_BQ_info); + + CurrentTSO->link = END_BQ_QUEUE; + ((StgFetchMeBlockingQueue *)R1.cl)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO; + + /* record onto which closure the current thread is blcoking */ + CurrentTSO->block_info.closure = R1.cl; + //recordMutable((StgMutClosure *)R1.cl); + p_GLOBAL = R1.cl; + + /* Save the Thread State here, before calling RTS routines below! */ + //BLOCK_NP_NO_JUMP(1); + SAVE_THREAD_STATE(1); + + /* unknown junk... needed? --SDM yes, want to see what's happening -- HWL */ + if (RtsFlags.ParFlags.ParStats.Full) { + /* Note that CURRENT_TIME may perform an unsafe call */ + //rtsTime now = CURRENT_TIME; /* Now */ + CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; + CurrentTSO->par.fetchcount++; + /* TSO_QUEUE(CurrentTSO) = Q_FETCHING; */ + CurrentTSO->par.blockedat = CURRENT_TIME; + /* we are about to send off a FETCH message, so dump a FETCH event */ + DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(rga_GLOBAL->payload.gc.gtid), + GR_FETCH, CurrentTSO, (StgClosure *)R1.p, 0); + } + + /* Phil T. claims that this was a workaround for a hard-to-find + * bug, hence I'm leaving it out for now --SDM + */ + /* Assign a brand-new global address to the newly created FMBQ */ + lga_GLOBAL = makeGlobal(p_GLOBAL, rtsFalse); + splitWeight(&fmbqga_GLOBAL, lga_GLOBAL); + ASSERT(fmbqga_GLOBAL.weight == 1L << (BITS_IN(unsigned) - 1)); + + /* I *hope* it's ok to call this from STG land. --SDM */ + STGCALL3(sendFetch, rga_GLOBAL, &fmbqga_GLOBAL, 0/*load*/); + + // sendFetch now called from processTheRealFetch, to make SDM happy + //theGlobalFromGA.payload.gc.gtid = rga->payload.gc.gtid; + //theGlobalFromGA.payload.gc.slot = rga->payload.gc.slot; + //theGlobalFromGA.weight = rga->weight; + //theGlobalToGA.payload.gc.gtid = fmbqga.payload.gc.gtid; + //theGlobalToGA.payload.gc.slot = fmbqga.payload.gc.slot; + //theGlobalToGA.weight = fmbqga.weight; + + // STGCALL6(fprintf,stderr,"%% Fetching %p from remote PE ((%x,%d,%x))\n",R1.p,rga->payload.gc.gtid, rga->payload.gc.slot, rga->weight); + + THREAD_RETURN(1); /* back to the scheduler */ + // was: BLOCK_NP(1); + FE_ +} + +/* --------------------------------------------------------------------------- + FETCH_ME_BQ + + On the first entry of a FETCH_ME closure, we turn the closure into + a FETCH_ME_BQ, which behaves just like a BLACKHOLE_BQ. Any thread + entering the FETCH_ME_BQ will be placed in the blocking queue. + When the data arrives from the remote PE, all waiting threads are + woken up and the FETCH_ME_BQ is overwritten with the fetched data. + + FETCH_ME_BQ_entry is a copy of BLACKHOLE_BQ_entry -- HWL + ------------------------------------------------------------------------ */ + +INFO_TABLE(FETCH_ME_BQ_info, FETCH_ME_BQ_entry,0,2,FETCH_ME_BQ,const,EF_,0,0); +//@cindex FETCH_ME_BQ_info +STGFUN(FETCH_ME_BQ_entry) +{ + FB_ + TICK_ENT_BH(); + + /* Put ourselves on the blocking queue for this black hole */ + CurrentTSO->block_info.closure = R1.cl; + CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue; + ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO; + +#if defined(PAR) + /* Save the Thread State here, before calling RTS routines below! */ + SAVE_THREAD_STATE(1); + + if (RtsFlags.ParFlags.ParStats.Full) { + /* Note that CURRENT_TIME may perform an unsafe call */ + //rtsTime now = CURRENT_TIME; /* Now */ + CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; + CurrentTSO->par.blockcount++; + CurrentTSO->par.blockedat = CURRENT_TIME; + DumpRawGranEvent(CURRENT_PROC, thisPE, + GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0); + } + + THREAD_RETURN(1); /* back to the scheduler */ +#else + /* stg_gen_block is too heavyweight, use a specialised one */ + BLOCK_NP(1); +#endif + FE_ +} + +/* --------------------------------------------------------------------------- + BLOCKED_FETCH_BQ + + A BLOCKED_FETCH closure only ever exists in the blocking queue of a + globally visible closure i.e. one with a GA. A BLOCKED_FETCH closure + indicates that a TSO on another PE is waiting for the result of this + computation. Thus, when updating the closure, the result has to be sent + to that PE. The relevant routines handling that are awaken_blocked_queue + and blockFetch (for putting BLOCKED_FETCH closure into a BQ). +*/ + +//@cindex BLOCKED_FETCH_info +INFO_TABLE(BLOCKED_FETCH_info, BLOCKED_FETCH_entry,0,2,BLOCKED_FETCH,const,EF_,0,0); +//@cindex BLOCKED_FETCH_entry +STGFUN(BLOCKED_FETCH_entry) +{ + FB_ + /* see NON_ENTERABLE_ENTRY_CODE in StgMiscClosures.hc */ + fprintf(stderr,"Qagh: BLOCKED_FETCH entered!\n"); + STGCALL1(raiseError, errorHandler); + stg_exit(EXIT_FAILURE); /* not executed */ + FE_ +} + +#endif /* PAR */ + +//@node Index, , Info tables +//@subsection Index + +//@index +//* BLOCKED_FETCH_entry:: @cindex\s-+BLOCKED_FETCH_entry +//* BLOCKED_FETCH_info:: @cindex\s-+BLOCKED_FETCH_info +//* FETCH_ME_BQ_info:: @cindex\s-+FETCH_ME_BQ_info +//* FETCH_ME_entry:: @cindex\s-+FETCH_ME_entry +//* FETCH_ME_info:: @cindex\s-+FETCH_ME_info +//@end index diff --git a/ghc/rts/parallel/Global.c b/ghc/rts/parallel/Global.c new file mode 100644 index 0000000..59eda0b --- /dev/null +++ b/ghc/rts/parallel/Global.c @@ -0,0 +1,828 @@ +/* --------------------------------------------------------------------------- + Time-stamp: + $Id: Global.c,v 1.2 2000/01/13 14:34:06 hwloidl Exp $ + + (c) The AQUA/Parade Projects, Glasgow University, 1995 + The GdH/APART 624 Projects, Heriot-Watt University, Edinburgh, 1999 + + Global Address Manipulation. + + The GALA and LAGA tables for mapping global addresses to local addresses + (i.e. heap pointers) are defined here. We use the generic hash tables + defined in Hash.c. + ------------------------------------------------------------------------- */ + +#ifdef PAR /* whole file */ + +//@menu +//* Includes:: +//* Global tables and lists:: +//* Fcts on GALA tables:: +//* Interface to taskId-PE table:: +//* Interface to LAGA table:: +//* Interface to GALA table:: +//* GC functions for GALA tables:: +//* Index:: +//@end menu + +//@node Includes, Global tables and lists, Global Address Manipulation, Global Address Manipulation +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "Storage.h" +#include "Hash.h" +#include "ParallelRts.h" + +/* + @globalAddr@ structures are allocated in chunks to reduce malloc overhead. +*/ + +//@node Global tables and lists, Fcts on GALA tables, Includes, Global Address Manipulation +//@subsection Global tables and lists + +//@cindex thisPE +int thisPE; + +//@menu +//* Free lists:: +//* Hash tables:: +//@end menu + +//@node Free lists, Hash tables, Global tables and lists, Global tables and lists +//@subsubsection Free lists + +/* Free list of GALA entries */ +GALA *freeGALAList = NULL; + +/* Number of globalAddr cells to allocate in one go */ +#define GCHUNK (1024 * sizeof(StgWord) / sizeof(GALA)) + +/* Free list of indirections */ + +//@cindex nextIndirection +static StgInt nextIndirection = 0; +//@cindex freeIndirections +GALA *freeIndirections = NULL; + +/* The list of live indirections has to be marked for GC (see makeGlobal) */ +//@cindex liveIndirections +GALA *liveIndirections = NULL; + +/* The list of remote indirections has to be marked for GC (see setRemoteGA) */ +//@cindex liveRemoteGAs +GALA *liveRemoteGAs = NULL; + +//@node Hash tables, , Free lists, Global tables and lists +//@subsubsection Hash tables + +/* Mapping global task ids PEs */ +//@cindex taskIDtoPEtable +HashTable *taskIDtoPEtable = NULL; + +static int nextPE = 0; + +/* LAGA table: StgClosure* -> globalAddr* + (Remember: globalAddr = (GlobalTaskId, Slot, Weight)) + Mapping local to global addresses (see interface below) +*/ + +//@cindex LAtoGALAtable +HashTable *LAtoGALAtable = NULL; + +/* GALA table: globalAddr* -> StgClosure* + (Remember: globalAddr = (GlobalTaskId, Slot, Weight)) + Mapping global to local addresses (see interface below) +*/ + +//@cindex pGAtoGALAtable +HashTable *pGAtoGALAtable = NULL; + +//@node Fcts on GALA tables, Interface to taskId-PE table, Global tables and lists, Global Address Manipulation +//@subsection Fcts on GALA tables + +//@cindex allocGALA +static GALA * +allocGALA(void) +{ + GALA *gl, *p; + + if ((gl = freeGALAList) != NULL) { + freeGALAList = gl->next; + } else { + gl = (GALA *) stgMallocBytes(GCHUNK * sizeof(GALA), "allocGALA"); + + freeGALAList = gl + 1; + for (p = freeGALAList; p < gl + GCHUNK - 1; p++) + p->next = p + 1; + p->next = NULL; + } + return gl; +} + +//@node Interface to taskId-PE table, Interface to LAGA table, Fcts on GALA tables, Global Address Manipulation +//@subsection Interface to taskId-PE table + +/* + We don't really like GLOBAL_TASK_ID, so we keep a table of TASK_ID to + PE mappings. The idea is that a PE identifier will fit in 16 bits, whereas + a TASK_ID may not. +*/ + +//@cindex taskIDtoPE +PEs +taskIDtoPE(GlobalTaskId gtid) +{ + return (PEs) lookupHashTable(taskIDtoPEtable, gtid); +} + +//@cindex registerTask +void +registerTask(gtid) +GlobalTaskId gtid; +{ + if (gtid == mytid) + thisPE = nextPE; + + insertHashTable(taskIDtoPEtable, gtid, (void *) (StgWord) nextPE++); +} + +//@node Interface to LAGA table, Interface to GALA table, Interface to taskId-PE table, Global Address Manipulation +//@subsection Interface to LAGA table + +/* + The local address to global address mapping returns a globalAddr structure + (pe task id, slot, weight) for any closure in the local heap which has a + global identity. Such closures may be copies of normal form objects with + a remote `master' location, @FetchMe@ nodes referencing remote objects, or + globally visible objects in the local heap (for which we are the master). +*/ + +//@cindex LAGAlookup +globalAddr * +LAGAlookup(addr) +StgClosure *addr; +{ + GALA *gala; + + /* We never look for GA's on indirections */ + ASSERT(IS_INDIRECTION(addr) == NULL); + if ((gala = lookupHashTable(LAtoGALAtable, (StgWord) addr)) == NULL) + return NULL; + else + return &(gala->ga); +} + +//@node Interface to GALA table, GC functions for GALA tables, Interface to LAGA table, Global Address Manipulation +//@subsection Interface to GALA table + +/* + We also manage a mapping of global addresses to local addresses, so that + we can ``common up'' multiple references to the same object as they arrive + in data packets from remote PEs. + + The global address to local address mapping is actually managed via a + ``packed global address'' to GALA hash table. The packed global + address takes the interesting part of the @globalAddr@ structure + (i.e. the pe and slot fields) and packs them into a single word + suitable for hashing. +*/ + +//@cindex GALAlookup +StgClosure * +GALAlookup(ga) +globalAddr *ga; +{ + StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot); + GALA *gala; + + if ((gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga)) == NULL) + return NULL; + else { + /* + * Bypass any indirections when returning a local closure to + * the caller. Note that we do not short-circuit the entry in + * the GALA tables right now, because we would have to do a + * hash table delete and insert in the LAtoGALAtable to keep + * that table up-to-date for preferred GALA pairs. That's + * probably a bit expensive. + */ + return UNWIND_IND((StgClosure *)(gala->la)); + } +} + +/* + External references to our globally-visible closures are managed through an + indirection table. The idea is that the closure may move about as the result + of local garbage collections, but its global identity is determined by its + slot in the indirection table, which never changes. + + The indirection table is maintained implicitly as part of the global + address to local address table. We need only keep track of the + highest numbered indirection index allocated so far, along with a free + list of lower numbered indices no longer in use. +*/ + +/* + Allocate an indirection slot for the closure currently at address @addr@. +*/ + +//@cindex allocIndirection +static GALA * +allocIndirection(StgPtr addr) +{ + GALA *gala; + + if ((gala = freeIndirections) != NULL) { + freeIndirections = gala->next; + } else { + gala = allocGALA(); + gala->ga.payload.gc.gtid = mytid; + gala->ga.payload.gc.slot = nextIndirection++; + } + gala->ga.weight = MAX_GA_WEIGHT; + gala->la = addr; + return gala; +} + +/* + Make a local closure at @addr@ globally visible. We have to allocate an + indirection slot for it, and update both the local address to global address + and global address to local address maps. +*/ + +//@cindex makeGlobal +globalAddr * +makeGlobal(addr, preferred) +StgClosure *addr; +rtsBool preferred; +{ + GALA *oldGALA = lookupHashTable(LAtoGALAtable, (StgWord) addr); + GALA *newGALA = allocIndirection((StgPtr)addr); + StgWord pga = PackGA(thisPE, newGALA->ga.payload.gc.slot); + + ASSERT(HEAP_ALLOCED(addr)); // check that addr might point into the heap + ASSERT(GALAlookup(&(newGALA->ga)) == NULL); + + newGALA->la = addr; + newGALA->preferred = preferred; + + if (preferred) { + /* The new GA is now the preferred GA for the LA */ + if (oldGALA != NULL) { + oldGALA->preferred = rtsFalse; + (void) removeHashTable(LAtoGALAtable, (StgWord) addr, (void *) oldGALA); + } + insertHashTable(LAtoGALAtable, (StgWord) addr, (void *) newGALA); + } + + /* put the new GALA entry on the list of live indirections */ + newGALA->next = liveIndirections; + liveIndirections = newGALA; + + insertHashTable(pGAtoGALAtable, pga, (void *) newGALA); + + return &(newGALA->ga); +} + +/* + Assign an existing remote global address to an existing closure. + We do not retain the @globalAddr@ structure that's passed in as an argument, + so it can be a static in the calling routine. +*/ + +//@cindex setRemoteGA +globalAddr * +setRemoteGA(addr, ga, preferred) +StgClosure *addr; +globalAddr *ga; +rtsBool preferred; +{ + GALA *oldGALA = lookupHashTable(LAtoGALAtable, (StgWord) addr); + GALA *newGALA = allocGALA(); + StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot); + + ASSERT(ga->payload.gc.gtid != mytid); + ASSERT(ga->weight > 0); + ASSERT(GALAlookup(ga) == NULL); + + newGALA->ga = *ga; + newGALA->la = addr; + newGALA->preferred = preferred; + + if (preferred) { + /* The new GA is now the preferred GA for the LA */ + if (oldGALA != NULL) { + oldGALA->preferred = rtsFalse; + (void) removeHashTable(LAtoGALAtable, (StgWord) addr, (void *) oldGALA); + } + insertHashTable(LAtoGALAtable, (StgWord) addr, (void *) newGALA); + } + newGALA->next = liveRemoteGAs; + liveRemoteGAs = newGALA; + + insertHashTable(pGAtoGALAtable, pga, (void *) newGALA); + + ga->weight = 0; + + return &(newGALA->ga); +} + +/* + Give me a bit of weight to give away on a new reference to a particular + global address. If we run down to nothing, we have to assign a new GA. +*/ + +//@cindex splitWeight +void +splitWeight(to, from) +globalAddr *to, *from; +{ + /* Make sure we have enough weight to split */ + if (from->weight == 1) + from = makeGlobal(GALAlookup(from), rtsTrue); + + to->payload = from->payload; + + if (from->weight == 0) + to->weight = 1L << (BITS_IN(unsigned) - 1); + else + to->weight = from->weight / 2; + + from->weight -= to->weight; +} + +/* + Here, I am returning a bit of weight that a remote PE no longer needs. +*/ + +//@cindex addWeight +globalAddr * +addWeight(ga) +globalAddr *ga; +{ + StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot); + GALA *gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga); + + IF_PAR_DEBUG(weight, + fprintf(stderr, "@* Adding weight %x to ", ga->weight); + printGA(&(gala->ga)); + fputc('\n', stderr)); + + gala->ga.weight += ga->weight; + ga->weight = 0; + + return &(gala->ga); +} + +/* + Initialize all of the global address structures: the task ID to PE id + map, the local address to global address map, the global address to + local address map, and the indirection table. +*/ + +//@cindex initGAtables +void +initGAtables(void) +{ + taskIDtoPEtable = allocHashTable(); + LAtoGALAtable = allocHashTable(); + pGAtoGALAtable = allocHashTable(); +} + +//@cindex PackGA +StgWord +PackGA (pe, slot) +StgWord pe; +int slot; +{ + int pe_shift = (BITS_IN(StgWord)*3)/4; + int pe_bits = BITS_IN(StgWord) - pe_shift; + + if ( pe_bits < 8 || slot >= (1L << pe_shift) ) { /* big trouble */ + fflush(stdout); + fprintf(stderr, "PackGA: slot# too big (%d) or not enough pe_bits (%d)\n", + slot,pe_bits); + stg_exit(EXIT_FAILURE); + } + + return((((StgWord)(pe)) << pe_shift) | ((StgWord)(slot))); + + /* the idea is to use 3/4 of the bits (e.g., 24) for indirection- + table "slot", and 1/4 for the pe# (e.g., 8). + + We check for too many bits in "slot", and double-check (at + compile-time?) that we have enough bits for "pe". We *don't* + check for too many bits in "pe", because SysMan enforces a + MAX_PEs limit at the very very beginning. + + Phil & Will 95/08 + */ +} + +//@node GC functions for GALA tables, Debugging routines, Interface to GALA table, Global Address Manipulation +//@subsection GC functions for GALA tables + +/* + When we do a copying collection, we want to evacuate all of the local + entries in the GALA table for which there are outstanding remote + pointers (i.e. for which the weight is not MAX_GA_WEIGHT.) +*/ +//@cindex markLocalGAs +void +markLocalGAs(rtsBool full) +{ + GALA *gala; + GALA *next; + GALA *prev = NULL; + StgPtr old_la, new_la; + nat n=0, m=0; // debugging only + + IF_DEBUG(gc, + belch("@@ markLocalGAs: Marking LIVE INDIRECTIONS in GALA table starting with GALA at %p\n", + liveIndirections); + printLAGAtable()); + + for (gala = liveIndirections, m=0; gala != NULL; gala = next, m++) { + IF_DEBUG(gc, + printGA(&(gala->ga)); + fprintf(stderr, ";@ %d: LA: %p (%s) ", + m, gala->la, info_type(gala->la))); + next = gala->next; + old_la = gala->la; + ASSERT(gala->ga.payload.gc.gtid == mytid); /* it's supposed to be local */ + if (get_itbl((StgClosure *)old_la)->type == EVACUATED) { + /* somebody else already evacuated this closure */ + new_la = ((StgEvacuated *)old_la)->evacuee; + IF_DEBUG(gc, + belch(" already evacuated to %p\n", new_la)); + } else { + StgClosure *foo ; // debugging only + n++; + IF_PAR_DEBUG(verbose, + if (IS_INDIRECTION((StgClosure *)old_la)) + belch("{markLocalGAs}Daq ghuH: trying to mark an indirection %p (%s) -> %p (%s); [closure=%p]", + old_la, info_type(old_la), + (foo = UNWIND_IND((StgClosure *)old_la)), info_type(foo), + old_la)); + new_la = MarkRoot(UNWIND_IND((StgClosure *)old_la)); // or just evacuate(old_ga) + IF_DEBUG(gc, + belch(" evacuated %p to %p\n", old_la, new_la)); + } + + gala->la = new_la; + /* remove old LA and replace with new LA */ + //(void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala); + //insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala); + + gala->next = prev; + prev = gala; + } + liveIndirections = prev; /* list has been reversed during the marking */ + + IF_PAR_DEBUG(verbose, + belch("@@ markLocalGAs: %d of %d GALAs marked on PE %x", + n, m, mytid)); + + /* -------------------------------------------------------------------- */ + + n=0; m=0; // debugging only + + IF_DEBUG(gc, + belch("@@ markLocalGAs: Marking LIVE REMOTE GAs in GALA table starting with GALA at %p\n", + liveRemoteGAs)); + + for (gala = liveRemoteGAs, prev = NULL; gala != NULL; gala = next) { + IF_DEBUG(gc, + printGA(&(gala->ga))); + next = gala->next; + old_la = gala->la; + ASSERT(gala->ga.payload.gc.gtid != mytid); /* it's supposed to be remote */ + if (get_itbl((StgClosure *)old_la)->type == EVACUATED) { + /* somebody else already evacuated this closure */ + new_la = ((StgEvacuated *)old_la)->evacuee; + } else { + n++; + new_la = MarkRoot((StgClosure *)old_la); // or just evacuate(old_ga) + } + + gala->la = new_la; + /* remove old LA and replace with new LA */ + //(void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala); + //insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala); + + gala->next = prev; + prev = gala; + } + liveRemoteGAs = prev; /* list is reversed during marking */ + + /* If we have any remaining FREE messages to send off, do so now */ + // sendFreeMessages(); + + IF_DEBUG(gc, + belch("@@ markLocalGAs: GALA after marking"); + printLAGAtable(); + belch("--------------------------------------")); + +} + +void +OLDmarkLocalGAs(rtsBool full) +{ + extern StgClosure *MarkRootHWL(StgClosure *root); + + GALA *gala; + GALA *next; + GALA *prev = NULL; + StgPtr new_la; + nat n=0, m=0; // debugging only + + IF_DEBUG(gc, + belch("@@ markLocalGAs: Marking entries in GALA table starting with GALA at %p", + liveIndirections); + printLAGAtable()); + + for (gala = liveIndirections; gala != NULL; gala = next) { + IF_DEBUG(gc, + printGA(&(gala->ga)); + fprintf(stderr, " LA: %p (%s) ", + gala->la, info_type(gala->la))); + next = gala->next; + ASSERT(gala->ga.payload.gc.gtid == mytid); /* it's supposed to be local */ + if (gala->ga.weight != MAX_GA_WEIGHT) { + /* Remote references exist, so we must evacuate the local closure */ + StgPtr old_la = gala->la; + + if (get_itbl((StgClosure *)old_la)->type != EVACUATED) { // track evacuee!?? + n++; + IF_DEBUG(gc, + fprintf(stderr, " marking as root\n")); + new_la = MarkRoot((StgClosure *)old_la); // or just evacuate(old_ga) + //IF_DEBUG(gc, + // fprintf(stderr, " new LA is %p ", new_la)); + if (!full && gala->preferred && new_la != old_la) { + IF_DEBUG(gc, + fprintf(stderr, " replacing %p with %p in LAGA table\n", + old_la, new_la)); + (void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala); + insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala); + } + } else { + IF_DEBUG(gc, + fprintf(stderr, " EVAC ")); + new_la = ((StgEvacuated *)old_la)->evacuee; + IF_DEBUG(gc, + fprintf(stderr, " replacing %p with %p in LAGA table\n", + old_la, new_la)); + (void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala); + insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala); + } + gala->next = prev; + prev = gala; + } else { + /* Since we have all of the weight, this GA is no longer needed */ + StgWord pga = PackGA(thisPE, gala->ga.payload.gc.slot); + + m++; + IF_DEBUG(gc, + fprintf(stderr, " freeing slot %d", + gala->ga.payload.gc.slot)); + + /* put the now redundant GALA onto the free list */ + gala->next = freeIndirections; + freeIndirections = gala; + /* remove the GALA from the GALA table; now it's just local */ + (void) removeHashTable(pGAtoGALAtable, pga, (void *) gala); + if (!full && gala->preferred) + (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala); + +#ifdef DEBUG + gala->ga.weight = 0x0d0d0d0d; + gala->la = (StgWord) 0x0bad0bad; +#endif + } + } + liveIndirections = prev; /* list has been reversed during the marking */ + + IF_PAR_DEBUG(verbose, + belch("@@ markLocalGAs: %d GALAs marked, %d GALAs nuked on PE %x", + n, m, mytid)); + +} + +//@cindex RebuildGAtables +void +RebuildGAtables(rtsBool full) +{ + GALA *gala; + GALA *next; + GALA *prev; + StgClosure *closure, *last, *new_closure; + + //prepareFreeMsgBuffers(); + + if (full) + RebuildLAGAtable(); + + IF_DEBUG(gc, + belch("@@ RebuildGAtables: After ReBuilding GALA table starting with GALA at %p", + liveRemoteGAs); + printLAGAtable()); +} + +void +OLDRebuildGAtables(rtsBool full) +{ + GALA *gala; + GALA *next; + GALA *prev; + StgClosure *closure, *last, *new_closure; + + prepareFreeMsgBuffers(); + + for (gala = liveRemoteGAs, prev = NULL; gala != NULL; gala = next) { + IF_DEBUG(gc, + printGA(&(gala->ga))); + next = gala->next; + ASSERT(gala->ga.payload.gc.gtid != mytid); /* it's supposed to be remote */ + + closure = (StgClosure *) (gala->la); + + /* + * If the old closure has not been forwarded, we let go. Note that this + * approach also drops global aliases for PLCs. + */ + + if (!full && gala->preferred) + (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala); + + /* Follow indirection chains to the end, just in case */ + closure = UNWIND_IND(closure); + + /* + if (get_itbl(closure)->type != EVACUATED) { // (new_closure = isAlive(closure)) == NULL) { // (W_) Forward_Ref_info) + // closure is not alive any more, thus remove GA + int pe = taskIDtoPE(gala->ga.payload.gc.gtid); + StgWord pga = PackGA(pe, gala->ga.payload.gc.slot); + + IF_DEBUG(gc, + fprintf(stderr, " (LA: %p (%s)) is unused on this PE -> sending free\n", + closure, info_type(closure))); + + (void) removeHashTable(pGAtoGALAtable, pga, (void *) gala); + freeRemoteGA(pe, &(gala->ga)); + gala->next = freeGALAList; + freeGALAList = gala; + } else { + */ + if (get_itbl(closure)->type == EVACUATED) { + IF_DEBUG(gc, + fprintf(stderr, " EVAC %p (%s)\n", + closure, info_type(closure))); + closure = ((StgEvacuated *)closure)->evacuee; + } else { + IF_DEBUG(gc, + fprintf(stderr, " !EVAC %p (%s)\n", + closure, info_type(closure))); + } + gala->la = closure; + if (!full && gala->preferred) + insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala); + gala->next = prev; + prev = gala; + } + //} + liveRemoteGAs = prev; /* list is reversed during marking */ + + /* If we have any remaining FREE messages to send off, do so now */ + sendFreeMessages(); + + if (full) + RebuildLAGAtable(); + + IF_DEBUG(gc, + belch("@@ RebuildGAtables: After ReBuilding GALA table starting with GALA at %p", + liveRemoteGAs); + printLAGAtable()); +} + +/* + Rebuild the LA->GA table, assuming that the addresses in the GALAs are + correct. +*/ + +//@cindex RebuildLAGAtable +void +RebuildLAGAtable(void) +{ + GALA *gala; + nat n=0, m=0; // debugging + + /* The old LA->GA table is worthless */ + freeHashTable(LAtoGALAtable, NULL); + LAtoGALAtable = allocHashTable(); + + IF_DEBUG(gc, + belch("@@ RebuildLAGAtable: new LAGA table at %p", + LAtoGALAtable)); + + for (gala = liveIndirections; gala != NULL; gala = gala->next) { + n++; + if (gala->preferred) + insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala); + } + + for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) { + m++; + if (gala->preferred) + insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala); + } + + IF_DEBUG(gc, + belch("@@ RebuildLAGAtable: inserted %d entries from liveIndirections and %d entries from liveRemoteGAs", + n,m)); + +} + +//@node Debugging routines, Index, GC functions for GALA tables, Global Address Manipulation +//@subsection Debugging routines + +//@cindex printGA +void +printGA (globalAddr *ga) +{ + fprintf(stderr, "((%x, %d, %x))", + ga->payload.gc.gtid, + ga->payload.gc.slot, + ga->weight); +} + +//@cindex printGALA +void +printGALA (GALA *gala) +{ + printGA(&(gala->ga)); + fprintf(stderr, " -> %p (%s)", (StgPtr)gala->la, info_type(gala->la)); + fprintf(stderr, " %s", (gala->preferred) ? "PREF" : "____"); +} + +/* + Printing the LA->GA table. +*/ + +//@cindex DebugPrintLAGAtable +void +printLAGAtable(void) +{ + GALA *gala; + nat n=0, m=0; // debugging + + belch("@@ LAGAtable (%p) with liveIndirections=%p, liveRemoteGAs=%p:", + LAtoGALAtable, liveIndirections, liveRemoteGAs); + + for (gala = liveIndirections; gala != NULL; gala = gala->next) { + n++; + printGALA(gala); + fputc('\n', stderr); + } + + for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) { + m++; + printGALA(gala); + fputc('\n', stderr); + } + belch("@@ LAGAtable has %d liveIndirections entries and %d liveRemoteGAs entries", + n, m); +} + +#endif /* PAR -- whole file */ + +//@node Index, , Debugging routines, Global Address Manipulation +//@subsection Index + +//@index +//* GALAlookup:: @cindex\s-+GALAlookup +//* LAGAlookup:: @cindex\s-+LAGAlookup +//* LAtoGALAtable:: @cindex\s-+LAtoGALAtable +//* PackGA:: @cindex\s-+PackGA +//* RebuildGAtables:: @cindex\s-+RebuildGAtables +//* RebuildLAGAtable:: @cindex\s-+RebuildLAGAtable +//* addWeight:: @cindex\s-+addWeight +//* allocGALA:: @cindex\s-+allocGALA +//* allocIndirection:: @cindex\s-+allocIndirection +//* freeIndirections:: @cindex\s-+freeIndirections +//* initGAtables:: @cindex\s-+initGAtables +//* liveIndirections:: @cindex\s-+liveIndirections +//* liveRemoteGAs:: @cindex\s-+liveRemoteGAs +//* makeGlobal:: @cindex\s-+makeGlobal +//* markLocalGAs:: @cindex\s-+markLocalGAs +//* nextIndirection:: @cindex\s-+nextIndirection +//* pGAtoGALAtable:: @cindex\s-+pGAtoGALAtable +//* registerTask:: @cindex\s-+registerTask +//* setRemoteGA:: @cindex\s-+setRemoteGA +//* splitWeight:: @cindex\s-+splitWeight +//* taskIDtoPE:: @cindex\s-+taskIDtoPE +//* taskIDtoPEtable:: @cindex\s-+taskIDtoPEtable +//* thisPE:: @cindex\s-+thisPE +//@end index diff --git a/ghc/rts/parallel/GranSim.c b/ghc/rts/parallel/GranSim.c new file mode 100644 index 0000000..8d08fb6 --- /dev/null +++ b/ghc/rts/parallel/GranSim.c @@ -0,0 +1,3005 @@ +/* + Time-stamp: + $Id: GranSim.c,v 1.2 2000/01/13 14:34:06 hwloidl Exp $ + + Variables and functions specific to GranSim the parallelism simulator + for GPH. +*/ + +//@node GranSim specific code, , , +//@section GranSim specific code + +/* + Macros for dealing with the new and improved GA field for simulating + parallel execution. Based on @CONCURRENT@ package. The GA field now + contains a mask, where the n-th bit stands for the n-th processor, where + this data can be found. In case of multiple copies, several bits are + set. The total number of processors is bounded by @MAX_PROC@, which + should be <= the length of a word in bits. -- HWL +*/ + +//@menu +//* Includes:: +//* Prototypes and externs:: +//* Constants and Variables:: +//* Initialisation:: +//* Global Address Operations:: +//* Global Event Queue:: +//* Spark queue functions:: +//* Scheduling functions:: +//* Thread Queue routines:: +//* GranSim functions:: +//* GranSimLight routines:: +//* Code for Fetching Nodes:: +//* Idle PEs:: +//* Routines directly called from Haskell world:: +//* Emiting profiling info for GrAnSim:: +//* Dumping routines:: +//* Index:: +//@end menu + +//@node Includes, Prototypes and externs, GranSim specific code, GranSim specific code +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "StgMiscClosures.h" +#include "StgTypes.h" +#include "Schedule.h" +#include "SchedAPI.h" // for pushClosure +#include "GC.h" +#include "GranSimRts.h" +#include "GranSim.h" +#include "ParallelRts.h" +#include "ParallelDebug.h" +#include "Storage.h" // for recordMutable + + +//@node Prototypes and externs, Constants and Variables, Includes, GranSim specific code +//@subsection Prototypes and externs + +#if defined(GRAN) + +/* Prototypes */ +static inline PEs ga_to_proc(StgWord); +static inline rtsBool any_idle(void); +static inline nat idlers(void); + PEs where_is(StgClosure *node); + +static rtsBool stealSomething(PEs proc, rtsBool steal_spark, rtsBool steal_thread); +static inline rtsBool stealSpark(PEs proc); +static inline rtsBool stealThread(PEs proc); +static rtsBool stealSparkMagic(PEs proc); +static rtsBool stealThreadMagic(PEs proc); +/* subsumed by stealSomething +static void stealThread(PEs proc); +static void stealSpark(PEs proc); +*/ +static rtsTime sparkStealTime(void); +static nat natRandom(nat from, nat to); +static PEs findRandomPE(PEs proc); +static void sortPEsByTime (PEs proc, PEs *pes_by_time, + nat *firstp, nat *np); + +void GetRoots(void); + +#endif /* GRAN */ + +//@node Constants and Variables, Initialisation, Prototypes and externs, GranSim specific code +//@subsection Constants and Variables + +#if defined(GRAN) || defined(PAR) +/* See GranSim.h for the definition of the enum gran_event_types */ +char *gran_event_names[] = { + "START", "START(Q)", + "STEALING", "STOLEN", "STOLEN(Q)", + "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)", + "SCHEDULE", "DESCHEDULE", + "END", + "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED", + "ALLOC", + "TERMINATE", + "SYSTEM_START", "SYSTEM_END", /* only for debugging */ + "??" +}; +#endif + +#if defined(GRAN) /* whole file */ +char *proc_status_names[] = { + "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy", + "UnknownProcStatus" +}; + +/* For internal use (event statistics) only */ +char *event_names[] = + { "ContinueThread", "StartThread", "ResumeThread", + "MoveSpark", "MoveThread", "FindWork", + "FetchNode", "FetchReply", + "GlobalBlock", "UnblockThread" + }; + +//@cindex CurrentProc +PEs CurrentProc = 0; + +/* + ToDo: Create a structure for the processor status and put all the + arrays below into it. + -- HWL */ + +//@cindex CurrentTime +/* One clock for each PE */ +rtsTime CurrentTime[MAX_PROC]; + +/* Useful to restrict communication; cf fishing model in GUM */ +nat OutstandingFetches[MAX_PROC], OutstandingFishes[MAX_PROC]; + +/* Status of each PE (new since but independent of GranSim Light) */ +rtsProcStatus procStatus[MAX_PROC]; + +# if defined(GRAN) && defined(GRAN_CHECK) +/* To check if the RTS ever tries to run a thread that should be blocked + because of fetching remote data */ +StgTSO *BlockedOnFetch[MAX_PROC]; +# define FETCH_MASK_TSO 0x08000000 /* only bits 0, 1, 2 should be used */ +# endif + +nat SparksAvail = 0; /* How many sparks are available */ +nat SurplusThreads = 0; /* How many excess threads are there */ + +/* Do we need to reschedule following a fetch? */ +rtsBool NeedToReSchedule = rtsFalse, IgnoreEvents = rtsFalse, IgnoreYields = rtsFalse; +rtsTime TimeOfNextEvent, TimeOfLastEvent, EndOfTimeSlice; /* checked from the threaded world! */ + +//@cindex spark queue +/* GranSim: a globally visible array of spark queues */ +rtsSparkQ pending_sparks_hds[MAX_PROC]; +rtsSparkQ pending_sparks_tls[MAX_PROC]; + +nat sparksIgnored = 0, sparksCreated = 0; + +GlobalGranStats globalGranStats; + +nat gran_arith_cost, gran_branch_cost, gran_load_cost, + gran_store_cost, gran_float_cost; + +/* +Old comment from 0.29. ToDo: Check and update -- HWL + +The following variables control the behaviour of GrAnSim. In general, there +is one RTS option for enabling each of these features. In getting the +desired setup of GranSim the following questions have to be answered: +\begin{itemize} +\item {\em Which scheduling algorithm} to use (@RtsFlags.GranFlags.DoFairSchedule@)? + Currently only unfair scheduling is supported. +\item What to do when remote data is fetched (@RtsFlags.GranFlags.DoAsyncFetch@)? + Either block and wait for the + data or reschedule and do some other work. + Thus, if this variable is true, asynchronous communication is + modelled. Block on fetch mainly makes sense for incremental fetching. + + There is also a simplified fetch variant available + (@RtsFlags.GranFlags.SimplifiedFetch@). This variant does not use events to model + communication. It is faster but the results will be less accurate. +\item How aggressive to be in getting work after a reschedule on fetch + (@RtsFlags.GranFlags.FetchStrategy@)? + This is determined by the so-called {\em fetching + strategy\/}. Currently, there are four possibilities: + \begin{enumerate} + \item Only run a runnable thread. + \item Turn a spark into a thread, if necessary. + \item Steal a remote spark, if necessary. + \item Steal a runnable thread from another processor, if necessary. + \end{itemize} + The variable @RtsFlags.GranFlags.FetchStrategy@ determines how far to go in this list + when rescheduling on a fetch. +\item Should sparks or threads be stolen first when looking for work + (@RtsFlags.GranFlags.DoStealThreadsFirst@)? + The default is to steal sparks first (much cheaper). +\item Should the RTS use a lazy thread creation scheme + (@RtsFlags.GranFlags.DoAlwaysCreateThreads@)? By default yes i.e.\ sparks are only + turned into threads when work is needed. Also note, that sparks + can be discarded by the RTS (this is done in the case of an overflow + of the spark pool). Setting @RtsFlags.GranFlags.DoAlwaysCreateThreads@ to @True@ forces + the creation of threads at the next possibility (i.e.\ when new work + is demanded the next time). +\item Should data be fetched closure-by-closure or in packets + (@RtsFlags.GranFlags.DoBulkFetching@)? The default strategy is a GRIP-like incremental + (i.e.\ closure-by-closure) strategy. This makes sense in a + low-latency setting but is bad in a high-latency system. Setting + @RtsFlags.GranFlags.DoBulkFetching@ to @True@ enables bulk (packet) fetching. Other + parameters determine the size of the packets (@pack_buffer_size@) and the number of + thunks that should be put into one packet (@RtsFlags.GranFlags.ThunksToPack@). +\item If there is no other possibility to find work, should runnable threads + be moved to an idle processor (@RtsFlags.GranFlags.DoThreadMigration@)? In any case, the + RTS tried to get sparks (either local or remote ones) first. Thread + migration is very expensive, since a whole TSO has to be transferred + and probably data locality becomes worse in the process. Note, that + the closure, which will be evaluated next by that TSO is not + transferred together with the TSO (that might block another thread). +\item Should the RTS distinguish between sparks created by local nodes and + stolen sparks (@RtsFlags.GranFlags.PreferSparksOfLocalNodes@)? The idea is to improve + data locality by preferring sparks of local nodes (it is more likely + that the data for those sparks is already on the local processor). + However, such a distinction also imposes an overhead on the spark + queue management, and typically a large number of sparks are + generated during execution. By default this variable is set to @False@. +\item Should the RTS use granularity control mechanisms? The idea of a + granularity control mechanism is to make use of granularity + information provided via annotation of the @par@ construct in order + to prefer bigger threads when either turning a spark into a thread or + when choosing the next thread to schedule. Currently, three such + mechanisms are implemented: + \begin{itemize} + \item Cut-off: The granularity information is interpreted as a + priority. If a threshold priority is given to the RTS, then + only those sparks with a higher priority than the threshold + are actually created. Other sparks are immediately discarded. + This is similar to a usual cut-off mechanism often used in + parallel programs, where parallelism is only created if the + input data is lage enough. With this option, the choice is + hidden in the RTS and only the threshold value has to be + provided as a parameter to the runtime system. + \item Priority Sparking: This mechanism keeps priorities for sparks + and chooses the spark with the highest priority when turning + a spark into a thread. After that the priority information is + discarded. The overhead of this mechanism comes from + maintaining a sorted spark queue. + \item Priority Scheduling: This mechanism keeps the granularity + information for threads, to. Thus, on each reschedule the + largest thread is chosen. This mechanism has a higher + overhead, as the thread queue is sorted, too. + \end{itemize} +\end{itemize} +*/ + +//@node Initialisation, Global Address Operations, Constants and Variables, GranSim specific code +//@subsection Initialisation + +void +init_gr_stats (void) { + memset(&globalGranStats, '\0', sizeof(GlobalGranStats)); +#if 0 + /* event stats */ + globalGranStats.noOfEvents = 0; + for (i=0; i not unique + else + unique = rtsTrue; // found 1st instance + ASSERT(unique); // otherwise returned from within loop + return (unique); +} + +//@cindex any_idle +static inline rtsBool +any_idle(void) { /* any (map (\ i -> procStatus[i] == Idle)) [0,..,MAX_PROC] */ + PEs i; + rtsBool any_idle; + for(i=0, any_idle=rtsFalse; + !any_idle && ievttype]++; + } + + entry = EventHd; + + IF_GRAN_DEBUG(event_trace, + print_event(entry)); + + EventHd = EventHd->next; + return(entry); +} + +/* When getting the time of the next event we ignore CONTINUETHREAD events: + we don't want to be interrupted before the end of the current time slice + unless there is something important to handle. +*/ +//@cindex get_time_of_next_event +rtsTime +get_time_of_next_event(void) +{ + rtsEventQ event = EventHd; + + while (event != NULL && event->evttype==ContinueThread) { + event = event->next; + } + if(event == NULL) + return ((rtsTime) 0); + else + return (event->time); +} + +/* ToDo: replace malloc/free with a free list */ +//@cindex insert_event +void +insert_event(newentry) +rtsEvent *newentry; +{ + rtsEventType evttype = newentry->evttype; + rtsEvent *event, **prev; + + /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */ + + /* Search the queue and insert at the right point: + FINDWORK before everything, CONTINUETHREAD after everything. + + This ensures that we find any available work after all threads have + executed the current cycle. This level of detail would normally be + irrelevant, but matters for ridiculously low latencies... + */ + + /* Changed the ordering: Now FINDWORK comes after everything but + CONTINUETHREAD. This makes sure that a MOVESPARK comes before a + FINDWORK. This is important when a GranSimSparkAt happens and + DoAlwaysCreateThreads is turned on. Also important if a GC occurs + when trying to build a new thread (see much_spark) -- HWL 02/96 */ + + if(EventHd == NULL) + EventHd = newentry; + else { + for (event = EventHd, prev=(rtsEvent**)&EventHd; + event != NULL; + prev = (rtsEvent**)&(event->next), event = event->next) { + switch (evttype) { + case FindWork: if ( event->time < newentry->time || + ( (event->time == newentry->time) && + (event->evttype != ContinueThread) ) ) + continue; + else + break; + case ContinueThread: if ( event->time <= newentry->time ) + continue; + else + break; + default: if ( event->time < newentry->time || + ((event->time == newentry->time) && + (event->evttype == newentry->evttype)) ) + continue; + else + break; + } + /* Insert newentry here (i.e. before event) */ + *prev = newentry; + newentry->next = event; + break; + } + if (event == NULL) + *prev = newentry; + } +} + +//@cindex new_event +void +new_event(proc,creator,time,evttype,tso,node,spark) +PEs proc, creator; +rtsTime time; +rtsEventType evttype; +StgTSO *tso; +StgClosure *node; +rtsSpark *spark; +{ + rtsEvent *newentry = (rtsEvent *) stgMallocBytes(sizeof(rtsEvent), "new_event"); + + newentry->proc = proc; + newentry->creator = creator; + newentry->time = time; + newentry->evttype = evttype; + newentry->tso = tso; + newentry->node = node; + newentry->spark = spark; + newentry->gc_info = 0; + newentry->next = NULL; + + insert_event(newentry); + + IF_DEBUG(gran, + fprintf(stderr, "GRAN: new_event: \n"); + print_event(newentry)) +} + +//@cindex prepend_event +void +prepend_event(event) /* put event at beginning of EventQueue */ +rtsEvent *event; +{ /* only used for GC! */ + event->next = EventHd; + EventHd = event; +} + +//@cindex grab_event +rtsEventQ +grab_event(void) /* undo prepend_event i.e. get the event */ +{ /* at the head of EventQ but don't free anything */ + rtsEventQ event = EventHd; + + if (EventHd == NULL) { + barf("No next event (in grab_event). This may be caused by a circular data dependency in the program."); + } + + EventHd = EventHd->next; + return (event); +} + +//@cindex traverse_eventq_for_gc +void +traverse_eventq_for_gc(void) +{ + rtsEventQ event = EventHd; + StgWord bufsize; + StgClosure *closurep; + StgTSO *tsop; + StgPtr buffer, bufptr; + PEs proc, creator; + + /* Traverse eventq and replace every FETCHREPLY by a FETCHNODE for the + orig closure (root of packed graph). This means that a graph, which is + between processors at the time of GC is fetched again at the time when + it would have arrived, had there been no GC. Slightly inaccurate but + safe for GC. + This is only needed for GUM style fetchng. -- HWL */ + if (!RtsFlags.GranFlags.DoBulkFetching) + return; + + for(event = EventHd; event!=NULL; event=event->next) { + if (event->evttype==FetchReply) { + buffer = stgCast(StgPtr,event->node); + ASSERT(buffer[PACK_FLAG_LOCN]==MAGIC_PACK_FLAG); /* It's a pack buffer */ + bufsize = buffer[PACK_SIZE_LOCN]; + closurep = stgCast(StgClosure*,buffer[PACK_HDR_SIZE]); + tsop = stgCast(StgTSO*,buffer[PACK_TSO_LOCN]); + proc = event->proc; + creator = event->creator; /* similar to unpacking */ + for (bufptr=buffer+PACK_HDR_SIZE; + bufptr<(buffer+bufsize); + bufptr++) { + // if ( (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_SPEC_RBH_TYPE) || + // (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_GEN_RBH_TYPE) ) { + if ( GET_INFO(stgCast(StgClosure*,bufptr)) ) { + convertFromRBH(stgCast(StgClosure *,bufptr)); + } + } + free(buffer); + event->evttype = FetchNode; + event->proc = creator; + event->creator = proc; + event->node = closurep; + event->tso = tsop; + event->gc_info = 0; + } + } +} + +void +markEventQueue(void) +{ + StgClosure *MarkRoot(StgClosure *root); // prototype + + rtsEventQ event = EventHd; + nat len; + + /* iterate over eventq and register relevant fields in event as roots */ + for(event = EventHd, len = 0; event!=NULL; event=event->next, len++) { + switch (event->evttype) { + case ContinueThread: + event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); + break; + case StartThread: + event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); + event->node = (StgClosure *)MarkRoot((StgClosure *)event->node); + break; + case ResumeThread: + event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); + event->node = (StgClosure *)MarkRoot((StgClosure *)event->node); + break; + case MoveSpark: + event->spark->node = (StgClosure *)MarkRoot((StgClosure *)event->spark->node); + break; + case MoveThread: + event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); + break; + case FindWork: + break; + case FetchNode: + event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); + event->node = (StgClosure *)MarkRoot((StgClosure *)event->node); + break; + case FetchReply: + event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); + if (RtsFlags.GranFlags.DoBulkFetching) + // ToDo: traverse_eventw_for_gc if GUM-Fetching!!! HWL + belch("ghuH: packets in BulkFetching not marked as roots; mayb be fatal"); + else + event->node = (StgTSO *)MarkRoot((StgClosure *)event->node); + break; + case GlobalBlock: + event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); + event->node = (StgClosure *)MarkRoot((StgClosure *)event->node); + break; + case UnblockThread: + event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); + event->node = (StgClosure *)MarkRoot((StgClosure *)event->node); + break; + default: + barf("markEventQueue: trying to mark unknown event @ %p", event); + }} + IF_DEBUG(gc, + belch("GC: markEventQueue: %d events in queue", len)); +} + +/* + Prune all ContinueThread events related to tso or node in the eventq. + Currently used if a thread leaves STG land with ThreadBlocked status, + i.e. it blocked on a closure and has been put on its blocking queue. It + will be reawakended via a call to awaken_blocked_queue. Until then no + event effecting this tso should appear in the eventq. A bit of a hack, + because ideally we shouldn't generate such spurious ContinueThread events + in the first place. +*/ +//@cindex prune_eventq +void +prune_eventq(tso, node) +StgTSO *tso; +StgClosure *node; +{ rtsEventQ prev = (rtsEventQ)NULL, event = EventHd; + + /* node unused for now */ + ASSERT(node==NULL); + /* tso must be valid, then */ + ASSERT(tso!=END_TSO_QUEUE); + while (event != NULL) { + if (event->evttype==ContinueThread && + (event->tso==tso)) { + IF_GRAN_DEBUG(event_trace, // ToDo: use another debug flag + belch("prune_eventq: pruning ContinueThread event for TSO %d (%p) on PE %d @ %lx (%p)", + event->tso->id, event->tso, event->proc, event->time, event)); + if (prev==(rtsEventQ)NULL) { // beginning of eventq + EventHd = event->next; + free(event); + event = EventHd; + } else { + prev->next = event->next; + free(event); + event = prev->next; + } + } else { // no pruning necessary; go to next event + prev = event; + event = event->next; + } + } +} + +//@cindex print_event +void +print_event(event) +rtsEvent *event; +{ + char str_tso[16], str_node[16]; + StgThreadID tso_id; + + if (event->tso==END_TSO_QUEUE) { + strcpy(str_tso, "______"); + tso_id = 0; + } else { + sprintf(str_tso, "%p", event->tso); + tso_id = (event->tso==NULL) ? 0 : event->tso->id; + } + if (event->node==(StgClosure*)NULL) { + strcpy(str_node, "______"); + } else { + sprintf(str_node, "%p", event->node); + } + // HWL: shouldn't be necessary; ToDo: nuke + //str_tso[6]='\0'; + //str_node[6]='\0'; + + if (event==NULL) + fprintf(stderr,"Evt: NIL\n"); + else + fprintf(stderr, "Evt: %s (%u), PE %u [%u], Time %lu, TSO %d (%s), Node %s\n", //"Evt: %s (%u), PE %u [%u], Time %u, TSO %s (%#l), Node %s\n", + event_names[event->evttype], event->evttype, + event->proc, event->creator, event->time, + tso_id, str_tso, str_node + /*, event->spark, event->next */ ); + +} + +//@cindex print_eventq +void +print_eventq(hd) +rtsEvent *hd; +{ + rtsEvent *x; + + fprintf(stderr,"Event Queue with root at %p:\n", hd); + for (x=hd; x!=NULL; x=x->next) { + print_event(x); + } +} + +/* + Spark queue functions are now all in Sparks.c!! +*/ +//@node Scheduling functions, Thread Queue routines, Spark queue functions, GranSim specific code +//@subsection Scheduling functions + +/* + These functions are variants of thread initialisation and therefore + related to initThread and friends in Schedule.c. However, they are + specific to a GranSim setup in storing more info in the TSO's statistics + buffer and sorting the thread queues etc. +*/ + +/* + A large portion of startThread deals with maintaining a sorted thread + queue, which is needed for the Priority Sparking option. Without that + complication the code boils down to FIFO handling. +*/ +//@cindex insertThread +void +insertThread(tso, proc) +StgTSO* tso; +PEs proc; +{ + StgTSO *prev = NULL, *next = NULL; + nat count = 0; + rtsBool found = rtsFalse; + + ASSERT(CurrentProc==proc); + ASSERT(!is_on_queue(tso,proc)); + /* Idle proc: put the thread on the run queue + same for pri spark and basic version */ + if (run_queue_hds[proc] == END_TSO_QUEUE) + { + /* too strong! + ASSERT((CurrentProc==MainProc && + CurrentTime[MainProc]==0 && + procStatus[MainProc]==Idle) || + procStatus[proc]==Starting); + */ + run_queue_hds[proc] = run_queue_tls[proc] = tso; + + CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadqueuetime; + + /* new_event of ContinueThread has been moved to do_the_startthread */ + + /* too strong! + ASSERT(procStatus[proc]==Idle || + procStatus[proc]==Fishing || + procStatus[proc]==Starting); + procStatus[proc] = Busy; + */ + return; + } + + if (RtsFlags.GranFlags.Light) + GranSimLight_insertThread(tso, proc); + + /* Only for Pri Scheduling: find place where to insert tso into queue */ + if (RtsFlags.GranFlags.DoPriorityScheduling && tso->gran.pri!=0) + /* {add_to_spark_queue}vo' jInIHta'; Qu' wa'DIch yIleghQo' */ + for (prev = run_queue_hds[proc], next = run_queue_hds[proc]->link, count=0; + (next != END_TSO_QUEUE) && + !(found = tso->gran.pri >= next->gran.pri); + prev = next, next = next->link, count++) + { + ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) && + (prev==(StgTSO*)NULL || prev->link==next)); + } + + ASSERT(!found || next != END_TSO_QUEUE); + ASSERT(procStatus[proc]!=Idle); + + if (found) { + /* found can only be rtsTrue if pri scheduling enabled */ + ASSERT(RtsFlags.GranFlags.DoPriorityScheduling); + if (RtsFlags.GranFlags.GranSimStats.Global) + globalGranStats.non_end_add_threads++; + /* Add tso to ThreadQueue between prev and next */ + tso->link = next; + if ( next == (StgTSO*)END_TSO_QUEUE ) { + run_queue_tl = tso; + } else { + /* no back link for TSO chain */ + } + + if ( prev == (StgTSO*)END_TSO_QUEUE ) { + /* Never add TSO as first elem of thread queue; the first */ + /* element should be the one that is currently running -- HWL */ + IF_DEBUG(gran, + belch("GRAN: Qagh: NewThread (w/ PriorityScheduling): Trying to add TSO %p (PRI=%d) as first elem of threadQ (%p) on proc %u (@ %u)\n", + tso, tso->gran.pri, run_queue_hd, proc, + CurrentTime[proc])); + } else { + prev->link = tso; + } + } else { /* !found */ /* or not pri sparking! */ + /* Add TSO to the end of the thread queue on that processor */ + run_queue_tls[proc]->link = tso; + run_queue_tls[proc] = tso; + } + ASSERT(RtsFlags.GranFlags.DoPriorityScheduling || count==0); + CurrentTime[proc] += count * RtsFlags.GranFlags.Costs.pri_sched_overhead + + RtsFlags.GranFlags.Costs.threadqueuetime; + + /* ToDo: check if this is still needed -- HWL + if (RtsFlags.GranFlags.DoThreadMigration) + ++SurplusThreads; + + if (RtsFlags.GranFlags.GranSimStats.Full && + !(( event_type == GR_START || event_type == GR_STARTQ) && + RtsFlags.GranFlags.labelling) ) + DumpRawGranEvent(proc, creator, event_type+1, tso, node, + tso->gran.sparkname, spark_queue_len(proc)); + */ + +# if defined(GRAN_CHECK) + /* Check if thread queue is sorted. Only for testing, really! HWL */ + if ( RtsFlags.GranFlags.DoPriorityScheduling && + (RtsFlags.GranFlags.Debug.sortedQ) ) { + rtsBool sorted = rtsTrue; + StgTSO *prev, *next; + + if (run_queue_hds[proc]==END_TSO_QUEUE || + run_queue_hds[proc]->link==END_TSO_QUEUE) { + /* just 1 elem => ok */ + } else { + /* Qu' wa'DIch yIleghQo' (ignore first elem)! */ + for (prev = run_queue_hds[proc]->link, next = prev->link; + (next != END_TSO_QUEUE) ; + prev = next, next = prev->link) { + ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) && + (prev==(StgTSO*)NULL || prev->link==next)); + sorted = sorted && + (prev->gran.pri >= next->gran.pri); + } + } + if (!sorted) { + fprintf(stderr,"Qagh: THREADQ on PE %d is not sorted:\n", + CurrentProc); + G_THREADQ(run_queue_hd,0x1); + } + } +# endif +} + +/* + insertThread, which is only used for GranSim Light, is similar to + startThread in that it adds a TSO to a thread queue. However, it assumes + that the thread queue is sorted by local clocks and it inserts the TSO at + the right place in the queue. Don't create any event, just insert. +*/ +//@cindex GranSimLight_insertThread +rtsBool +GranSimLight_insertThread(tso, proc) +StgTSO* tso; +PEs proc; +{ + StgTSO *prev, *next; + nat count = 0; + rtsBool found = rtsFalse; + + ASSERT(RtsFlags.GranFlags.Light); + + /* In GrAnSim-Light we always have an idle `virtual' proc. + The semantics of the one-and-only thread queue is different here: + all threads in the queue are running (each on its own virtual processor); + the queue is only needed internally in the simulator to interleave the + reductions of the different processors. + The one-and-only thread queue is sorted by the local clocks of the TSOs. + */ + ASSERT(run_queue_hds[proc] != END_TSO_QUEUE); + ASSERT(tso->link == END_TSO_QUEUE); + + /* If only one thread in queue so far we emit DESCHEDULE in debug mode */ + if (RtsFlags.GranFlags.GranSimStats.Full && + (RtsFlags.GranFlags.Debug.checkLight) && + (run_queue_hd->link == END_TSO_QUEUE)) { + DumpRawGranEvent(proc, proc, GR_DESCHEDULE, + run_queue_hds[proc], (StgClosure*)NULL, + tso->gran.sparkname, spark_queue_len(proc)); // ToDo: check spar_queue_len + // resched = rtsTrue; + } + + /* this routine should only be used in a GrAnSim Light setup */ + /* && CurrentProc must be 0 in GrAnSim Light setup */ + ASSERT(RtsFlags.GranFlags.Light && CurrentProc==0); + + /* Idle proc; same for pri spark and basic version */ + if (run_queue_hd==END_TSO_QUEUE) + { + run_queue_hd = run_queue_tl = tso; + /* MAKE_BUSY(CurrentProc); */ + return rtsTrue; + } + + for (prev = run_queue_hds[proc], next = run_queue_hds[proc]->link, count = 0; + (next != END_TSO_QUEUE) && + !(found = (tso->gran.clock < next->gran.clock)); + prev = next, next = next->link, count++) + { + ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) && + (prev==(StgTSO*)NULL || prev->link==next)); + } + + /* found can only be rtsTrue if pri sparking enabled */ + if (found) { + /* Add tso to ThreadQueue between prev and next */ + tso->link = next; + if ( next == END_TSO_QUEUE ) { + run_queue_tls[proc] = tso; + } else { + /* no back link for TSO chain */ + } + + if ( prev == END_TSO_QUEUE ) { + run_queue_hds[proc] = tso; + } else { + prev->link = tso; + } + } else { /* !found */ /* or not pri sparking! */ + /* Add TSO to the end of the thread queue on that processor */ + run_queue_tls[proc]->link = tso; + run_queue_tls[proc] = tso; + } + + if ( prev == END_TSO_QUEUE ) { /* new head of queue */ + new_event(proc, proc, CurrentTime[proc], + ContinueThread, + tso, (StgClosure*)NULL, (rtsSpark*)NULL); + } + /* + if (RtsFlags.GranFlags.GranSimStats.Full && + !(( event_type == GR_START || event_type == GR_STARTQ) && + RtsFlags.GranFlags.labelling) ) + DumpRawGranEvent(proc, creator, gr_evttype, tso, node, + tso->gran.sparkname, spark_queue_len(proc)); + */ + return rtsTrue; +} + +/* + endThread is responsible for general clean-up after the thread tso has + finished. This includes emitting statistics into the profile etc. +*/ +void +endThread(StgTSO *tso, PEs proc) +{ + ASSERT(procStatus[proc]==Busy); // coming straight out of STG land + ASSERT(tso->whatNext==ThreadComplete); + // ToDo: prune ContinueThreads for this TSO from event queue + DumpEndEvent(proc, tso, rtsFalse /* not mandatory */); + + /* if this was the last thread on this PE then make it Idle */ + if (run_queue_hds[proc]==END_TSO_QUEUE) { + procStatus[CurrentProc] = Idle; + } +} + +//@node Thread Queue routines, GranSim functions, Scheduling functions, GranSim specific code +//@subsection Thread Queue routines + +/* + Check whether given tso resides on the run queue of the current processor. + Only used for debugging. +*/ + +//@cindex is_on_queue +rtsBool +is_on_queue (StgTSO *tso, PEs proc) +{ + StgTSO *t; + rtsBool found; + + for (t=run_queue_hds[proc], found=rtsFalse; + t!=END_TSO_QUEUE && !(found = t==tso); + t=t->link) + /* nothing */ ; + + return found; +} + +/* This routine is only used for keeping a statistics of thread queue + lengths to evaluate the impact of priority scheduling. -- HWL + {spark_queue_len}vo' jInIHta' +*/ +//@cindex thread_queue_len +nat +thread_queue_len(PEs proc) +{ + StgTSO *prev, *next; + nat len; + + for (len = 0, prev = END_TSO_QUEUE, next = run_queue_hds[proc]; + next != END_TSO_QUEUE; + len++, prev = next, next = prev->link) + {} + + return (len); +} + +//@node GranSim functions, GranSimLight routines, Thread Queue routines, GranSim specific code +//@subsection GranSim functions + +/* ----------------------------------------------------------------- */ +/* The main event handling functions; called from Schedule.c (schedule) */ +/* ----------------------------------------------------------------- */ + +//@cindex do_the_globalblock + +void +do_the_globalblock(rtsEvent* event) +{ + PEs proc = event->proc; /* proc that requested node */ + StgTSO *tso = event->tso; /* tso that requested node */ + StgClosure *node = event->node; /* requested, remote node */ + + IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the GlobalBlock\n")); + /* There should be no GLOBALBLOCKs in GrAnSim Light setup */ + ASSERT(!RtsFlags.GranFlags.Light); + /* GlobalBlock events only valid with GUM fetching */ + ASSERT(RtsFlags.GranFlags.DoBulkFetching); + + IF_GRAN_DEBUG(bq, // globalBlock, + if (IS_LOCAL_TO(PROCS(node),proc)) { + belch("## Qagh: GlobalBlock: Blocking TSO %d (%p) on LOCAL node %p (PE %d).\n", + tso->id, tso, node, proc); + }); + + /* CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.munpacktime; */ + if ( blockFetch(tso,proc,node) != 0 ) + return; /* node has become local by now */ + +#if 0 + ToDo: check whether anything has to be done at all after blockFetch -- HWL + + if (!RtsFlags.GranFlags.DoAsyncFetch) { /* head of queue is next thread */ + StgTSO* tso = run_queue_hds[proc]; /* awaken next thread */ + if (tso != (StgTSO*)NULL) { + new_event(proc, proc, CurrentTime[proc], + ContinueThread, + tso, (StgClosure*)NULL, (rtsSpark*)NULL); + CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcontextswitchtime; + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(proc, CurrentProc, GR_SCHEDULE, tso, + (StgClosure*)NULL, tso->gran.sparkname, spark_queue_len(CurrentProc)); // ToDo: check sparkname and spar_queue_len + procStatus[proc] = Busy; /* might have been fetching */ + } else { + procStatus[proc] = Idle; /* no work on proc now */ + } + } else { /* RtsFlags.GranFlags.DoAsyncFetch i.e. block-on-fetch */ + /* other thread is already running */ + /* 'oH 'utbe' 'e' vIHar ; I think that's not needed -- HWL + new_event(proc,proc,CurrentTime[proc], + CONTINUETHREAD,EVENT_TSO(event), + (RtsFlags.GranFlags.DoBulkFetching ? closure : + EVENT_NODE(event)),NULL); + */ + } +#endif +} + +//@cindex do_the_unblock + +void +do_the_unblock(rtsEvent* event) +{ + PEs proc = event->proc, /* proc that requested node */ + creator = event->creator; /* proc that requested node */ + StgTSO* tso = event->tso; /* tso that requested node */ + StgClosure* node = event->node; /* requested, remote node */ + + IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the UnBlock\n")) + /* There should be no UNBLOCKs in GrAnSim Light setup */ + ASSERT(!RtsFlags.GranFlags.Light); + /* UnblockThread means either FetchReply has arrived or + a blocking queue has been awakened; + ToDo: check with assertions + ASSERT(procStatus[proc]==Fetching || IS_BLACK_HOLE(event->node)); + */ + if (!RtsFlags.GranFlags.DoAsyncFetch) { /* block-on-fetch */ + /* We count block-on-fetch as normal block time */ + tso->gran.blocktime += CurrentTime[proc] - tso->gran.blockedat; + /* Dumping now done when processing the event + No costs for contextswitch or thread queueing in this case + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(proc, CurrentProc, GR_RESUME, tso, + (StgClosure*)NULL, tso->gran.sparkname, spark_queue_len(CurrentProc)); + */ + /* Maybe do this in FetchReply already + if (procStatus[proc]==Fetching) + procStatus[proc] = Busy; + */ + /* + new_event(proc, proc, CurrentTime[proc], + ContinueThread, + tso, node, (rtsSpark*)NULL); + */ + } else { + /* Asynchr comm causes additional costs here: */ + /* Bring the TSO from the blocked queue into the threadq */ + } + /* In all cases, the UnblockThread causes a ResumeThread to be scheduled */ + new_event(proc, proc, + CurrentTime[proc]+RtsFlags.GranFlags.Costs.threadqueuetime, + ResumeThread, + tso, node, (rtsSpark*)NULL); +} + +//@cindex do_the_fetchnode + +void +do_the_fetchnode(rtsEvent* event) +{ + PEs proc = event->proc, /* proc that holds the requested node */ + creator = event->creator; /* proc that requested node */ + StgTSO* tso = event->tso; + StgClosure* node = event->node; /* requested, remote node */ + rtsFetchReturnCode rc; + + ASSERT(CurrentProc==proc); + /* There should be no FETCHNODEs in GrAnSim Light setup */ + ASSERT(!RtsFlags.GranFlags.Light); + + IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the FetchNode\n")); + + CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime; + + /* ToDo: check whether this is the right place for dumping the event */ + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(creator, proc, GR_FETCH, tso, node, (StgInt)0, 0); + + do { + rc = handleFetchRequest(node, proc, creator, tso); + if (rc == OutOfHeap) { /* trigger GC */ +# if defined(GRAN_CHECK) && defined(GRAN) + if (RtsFlags.GcFlags.giveStats) + fprintf(RtsFlags.GcFlags.statsFile,"***** veQ boSwI' PackNearbyGraph(node %p, tso %p (%d))\n", + node, tso, tso->id); +# endif + prepend_event(event); + GarbageCollect(GetRoots); + // HWL: ToDo: check whether a ContinueThread has to be issued + // HWL old: ReallyPerformThreadGC(PACK_HEAP_REQUIRED, rtsFalse); +# if defined(GRAN_CHECK) && defined(GRAN) + if (RtsFlags.GcFlags.giveStats) { + fprintf(RtsFlags.GcFlags.statsFile,"***** SAVE_Hp=%p, SAVE_HpLim=%p, PACK_HEAP_REQUIRED=%d\n", + Hp, HpLim, 0) ; // PACK_HEAP_REQUIRED); ??? + fprintf(stderr,"***** No. of packets so far: %d (total size: %d)\n", + globalGranStats.tot_packets, globalGranStats.tot_packet_size); + } +# endif + event = grab_event(); + // Hp -= PACK_HEAP_REQUIRED; // ??? + + /* GC knows that events are special and follows the pointer i.e. */ + /* events are valid even if they moved. An EXIT is triggered */ + /* if there is not enough heap after GC. */ + } + } while (rc == OutOfHeap); +} + +//@cindex do_the_fetchreply +void +do_the_fetchreply(rtsEvent* event) +{ + PEs proc = event->proc, /* proc that requested node */ + creator = event->creator; /* proc that holds the requested node */ + StgTSO* tso = event->tso; + StgClosure* node = event->node; /* requested, remote node */ + StgClosure* closure=(StgClosure*)NULL; + + ASSERT(CurrentProc==proc); + ASSERT(RtsFlags.GranFlags.DoAsyncFetch || procStatus[proc]==Fetching); + + IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the FetchReply\n")); + /* There should be no FETCHREPLYs in GrAnSim Light setup */ + ASSERT(!RtsFlags.GranFlags.Light); + + /* assign message unpack costs *before* dumping the event */ + CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime; + + /* ToDo: check whether this is the right place for dumping the event */ + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(proc, creator, GR_REPLY, tso, node, + tso->gran.sparkname, spark_queue_len(proc)); + + /* THIS SHOULD NEVER HAPPEN + If tso is in the BQ of node this means that it actually entered the + remote closure, due to a missing GranSimFetch at the beginning of the + entry code; therefore, this is actually a faked fetch, triggered from + within GranSimBlock; + since tso is both in the EVQ and the BQ for node, we have to take it out + of the BQ first before we can handle the FetchReply; + ToDo: special cases in awaken_blocked_queue, since the BQ magically moved. + */ + if (tso->blocked_on!=(StgClosure*)NULL) { + IF_GRAN_DEBUG(bq, + belch("## ghuH: TSO %d (%p) in FetchReply is blocked on node %p (shouldn't happen AFAIK)", + tso->id, tso, node)); + // unlink_from_bq(tso, node); + } + + if (RtsFlags.GranFlags.DoBulkFetching) { /* bulk (packet) fetching */ + rtsPackBuffer *buffer = (rtsPackBuffer*)node; + nat size = buffer->size; + + /* NB: Fetch misses can't occur with GUM fetching, as */ + /* updatable closure are turned into RBHs and therefore locked */ + /* for other processors that try to grab them. */ + + closure = UnpackGraph(buffer); + CurrentTime[proc] += size * RtsFlags.GranFlags.Costs.munpacktime; + } else // incremental fetching + /* Copy or move node to CurrentProc */ + if (fetchNode(node, creator, proc)) { + /* Fetch has failed i.e. node has been grabbed by another PE */ + PEs p = where_is(node); + rtsTime fetchtime; + + if (RtsFlags.GranFlags.GranSimStats.Global) + globalGranStats.fetch_misses++; + + IF_GRAN_DEBUG(thunkStealing, + belch("== Qu'vatlh! fetch miss @ %u: node %p is at proc %u (rather than proc %u)\n", + CurrentTime[proc],node,p,creator)); + + CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime; + + /* Count fetch again !? */ + ++(tso->gran.fetchcount); + tso->gran.fetchtime += RtsFlags.GranFlags.Costs.fetchtime; + + fetchtime = stg_max(CurrentTime[CurrentProc],CurrentTime[p]) + + RtsFlags.GranFlags.Costs.latency; + + /* Chase the grabbed node */ + new_event(p, proc, fetchtime, + FetchNode, + tso, node, (rtsSpark*)NULL); + +# if 0 && defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ + IF_GRAN_DEBUG(blockOnFetch, + BlockedOnFetch[CurrentProc] = tso;) /*-rtsTrue;-*/ + + IF_GRAN_DEBUG(blockOnFetch_sanity, + tso->type |= FETCH_MASK_TSO;) +# endif + + CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime; + + return; /* NB: no REPLy has been processed; tso still sleeping */ + } + + /* -- Qapla'! Fetch has been successful; node is here, now */ + ++(event->tso->gran.fetchcount); + event->tso->gran.fetchtime += RtsFlags.GranFlags.Costs.fetchtime; + + /* this is now done at the beginning of this routine + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(proc,event->creator, GR_REPLY, event->tso, + (RtsFlags.GranFlags.DoBulkFetching ? + closure : + event->node), + tso->gran.sparkname, spark_queue_len(proc)); + */ + + --OutstandingFetches[proc]; + ASSERT(OutstandingFetches[proc] >= 0); + new_event(proc, proc, CurrentTime[proc], + ResumeThread, + event->tso, (RtsFlags.GranFlags.DoBulkFetching ? + closure : + event->node), + (rtsSpark*)NULL); +} + +//@cindex do_the_movethread + +void +do_the_movethread(rtsEvent* event) { + PEs proc = event->proc, /* proc that requested node */ + creator = event->creator; /* proc that holds the requested node */ + StgTSO* tso = event->tso; + + IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the MoveThread\n")); + + ASSERT(CurrentProc==proc); + /* There should be no MOVETHREADs in GrAnSim Light setup */ + ASSERT(!RtsFlags.GranFlags.Light); + /* MOVETHREAD events should never occur without -bM */ + ASSERT(RtsFlags.GranFlags.DoThreadMigration); + /* Bitmask of moved thread should be 0 */ + ASSERT(PROCS(tso)==0); + ASSERT(procStatus[proc] == Fishing || + RtsFlags.GranFlags.DoAsyncFetch); + ASSERT(OutstandingFishes[proc]>0); + + /* ToDo: exact costs for unpacking the whole TSO */ + CurrentTime[proc] += 5l * RtsFlags.GranFlags.Costs.munpacktime; + + /* ToDo: check whether this is the right place for dumping the event */ + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(proc, creator, + GR_STOLEN, tso, (StgClosure*)NULL, (StgInt)0, 0); + + // ToDo: check cost functions + --OutstandingFishes[proc]; + SET_GRAN_HDR(tso, ThisPE); // adjust the bitmask for the TSO + insertThread(tso, proc); + + if (procStatus[proc]==Fishing) + procStatus[proc] = Idle; + + if (RtsFlags.GranFlags.GranSimStats.Global) + globalGranStats.tot_TSOs_migrated++; +} + +//@cindex do_the_movespark + +void +do_the_movespark(rtsEvent* event) { + PEs proc = event->proc, /* proc that requested spark */ + creator = event->creator; /* proc that holds the requested spark */ + StgTSO* tso = event->tso; + rtsSparkQ spark = event->spark; + + IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the MoveSpark\n")) + + ASSERT(CurrentProc==proc); + ASSERT(spark!=NULL); + ASSERT(procStatus[proc] == Fishing || + RtsFlags.GranFlags.DoAsyncFetch); + ASSERT(OutstandingFishes[proc]>0); + + CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime; + + /* record movement of spark only if spark profiling is turned on */ + if (RtsFlags.GranFlags.GranSimStats.Sparks) + DumpRawGranEvent(proc, creator, + SP_ACQUIRED, + tso, spark->node, spark->name, spark_queue_len(proc)); + + /* global statistics */ + if ( RtsFlags.GranFlags.GranSimStats.Global && + !closure_SHOULD_SPARK(spark->node)) + globalGranStats.withered_sparks++; + /* Not adding the spark to the spark queue would be the right */ + /* thing here, but it also would be cheating, as this info can't be */ + /* available in a real system. -- HWL */ + + --OutstandingFishes[proc]; + + add_to_spark_queue(spark); + + IF_GRAN_DEBUG(randomSteal, // ToDo: spark-distribution flag + print_sparkq_stats()); + + /* Should we treat stolen sparks specially? Currently, we don't. */ + + if (procStatus[proc]==Fishing) + procStatus[proc] = Idle; + + /* add_to_spark_queue will increase the time of the current proc. */ + /* + If proc was fishing, it is Idle now with the new spark in its spark + pool. This means that the next time handleIdlePEs is called, a local + FindWork will be created on this PE to turn the spark into a thread. Of + course another PE might steal the spark in the meantime (that's why we + are using events rather than inlining all the operations in the first + place). */ +} + +/* + In the Constellation class version of GranSim the semantics of StarThread + events has changed. Now, StartThread has to perform 3 basic operations: + - create a new thread (previously this was done in ActivateSpark); + - insert the thread into the run queue of the current processor + - generate a new event for actually running the new thread + Note that the insertThread is called via createThread. +*/ + +//@cindex do_the_startthread + +void +do_the_startthread(rtsEvent *event) +{ + PEs proc = event->proc; /* proc that requested node */ + StgTSO *tso = event->tso; /* tso that requested node */ + StgClosure *node = event->node; /* requested, remote node */ + rtsSpark *spark = event->spark; + GranEventType gr_evttype; + + ASSERT(CurrentProc==proc); + ASSERT(!RtsFlags.GranFlags.Light || CurrentProc==0); + ASSERT(event->evttype == ResumeThread || event->evttype == StartThread); + /* if this was called via StartThread: */ + ASSERT(event->evttype!=StartThread || tso == END_TSO_QUEUE); // not yet created + // ToDo: check: ASSERT(event->evttype!=StartThread || procStatus[proc]==Starting); + /* if this was called via ResumeThread: */ + ASSERT(event->evttype!=ResumeThread || + RtsFlags.GranFlags.DoAsyncFetch ||!is_on_queue(tso,proc)); + + /* startThread may have been called from the main event handler upon + finding either a ResumeThread or a StartThread event; set the + gr_evttype (needed for writing to .gr file) accordingly */ + // gr_evttype = (event->evttype == ResumeThread) ? GR_RESUME : GR_START; + + if ( event->evttype == StartThread ) { + GranEventType gr_evttype = (run_queue_hds[proc]==END_TSO_QUEUE) ? + GR_START : GR_STARTQ; + + tso = createThread(BLOCK_SIZE_W, spark->gran_info);// implicit insertThread! + pushClosure(tso, node); + + // ToDo: fwd info on local/global spark to thread -- HWL + // tso->gran.exported = spark->exported; + // tso->gran.locked = !spark->global; + tso->gran.sparkname = spark->name; + + ASSERT(CurrentProc==proc); + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpGranEvent(gr_evttype,tso); + + CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcreatetime; + } else { // event->evttype == ResumeThread + GranEventType gr_evttype = (run_queue_hds[proc]==END_TSO_QUEUE) ? + GR_RESUME : GR_RESUMEQ; + + insertThread(tso, proc); + + ASSERT(CurrentProc==proc); + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpGranEvent(gr_evttype,tso); + } + + ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE); // non-empty run queue + procStatus[proc] = Busy; + /* make sure that this thread is actually run */ + new_event(proc, proc, + CurrentTime[proc], + ContinueThread, + tso, node, (rtsSpark*)NULL); + + /* A wee bit of statistics gathering */ + if (RtsFlags.GranFlags.GranSimStats.Global) { + globalGranStats.tot_add_threads++; + globalGranStats.tot_tq_len += thread_queue_len(CurrentProc); + } + +} + +//@cindex do_the_findwork +void +do_the_findwork(rtsEvent* event) +{ + PEs proc = event->proc, /* proc to search for work */ + creator = event->creator; /* proc that requested work */ + rtsSparkQ spark = event->spark; + /* ToDo: check that this size is safe -- HWL */ + nat req_heap = sizeofW(StgTSO) + MIN_STACK_WORDS; + // add this? -- HWL:RtsFlags.ConcFlags.stkChunkSize; + + IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the Findwork\n")); + + /* If GUM style fishing is enabled, the contents of the spark field says + what to steal (spark(1) or thread(2)); */ + ASSERT(!(RtsFlags.GranFlags.Fishing && event->spark==(rtsSpark*)0)); + + /* Make sure that we have enough heap for creating a new + thread. This is a conservative estimate of the required heap. + This eliminates special checks for GC around NewThread within + ActivateSpark. */ + + if (Hp + req_heap > HpLim ) { + IF_DEBUG(gc, + belch("GC: Doing GC from within Findwork handling (that's bloody dangerous if you ask me)");) + GarbageCollect(GetRoots); + // ReallyPerformThreadGC(req_heap, rtsFalse); old -- HWL + Hp -= req_heap; + if (procStatus[CurrentProc]==Sparking) + procStatus[CurrentProc]=Idle; + return; + } + + if ( RtsFlags.GranFlags.DoAlwaysCreateThreads || + RtsFlags.GranFlags.Fishing || + ((procStatus[proc]==Idle || procStatus[proc]==Sparking) && + (RtsFlags.GranFlags.FetchStrategy >= 2 || + OutstandingFetches[proc] == 0)) ) + { + rtsBool found; + rtsSparkQ prev, spark; + + /* ToDo: check */ + ASSERT(procStatus[proc]==Sparking || + RtsFlags.GranFlags.DoAlwaysCreateThreads || + RtsFlags.GranFlags.Fishing); + + /* SImmoHwI' yInej! Search spark queue! */ + /* gimme_spark (event, &found, &spark); */ + findLocalSpark(event, &found, &spark); + + if (!found) { /* pagh vumwI' */ + /* + If no spark has been found this can mean 2 things: + 1/ The FindWork was a fish (i.e. a message sent by another PE) and + the spark pool of the receiver is empty + --> the fish has to be forwarded to another PE + 2/ The FindWork was local to this PE (i.e. no communication; in this + case creator==proc) and the spark pool of the PE is not empty + contains only sparks of closures that should not be sparked + (note: if the spark pool were empty, handleIdlePEs wouldn't have + generated a FindWork in the first place) + --> the PE has to be made idle to trigger stealing sparks the next + time handleIdlePEs is performed + */ + + ASSERT(pending_sparks_hds[proc]==(rtsSpark*)NULL); + if (creator==proc) { + /* local FindWork */ + if (procStatus[proc]==Busy) { + belch("ghuH: PE %d in Busy state while processing local FindWork (spark pool is empty!) @ %lx", + proc, CurrentTime[proc]); + procStatus[proc] = Idle; + } + } else { + /* global FindWork i.e. a Fish */ + ASSERT(RtsFlags.GranFlags.Fishing); + /* actually this generates another request from the originating PE */ + ASSERT(OutstandingFishes[creator]>0); + OutstandingFishes[creator]--; + /* ToDo: assign costs for sending fish to proc not to creator */ + stealSpark(creator); /* might steal from same PE; ToDo: fix */ + ASSERT(RtsFlags.GranFlags.maxFishes!=1 || procStatus[creator] == Fishing); + /* any assertions on state of proc possible here? */ + } + } else { + /* DaH chu' Qu' yIchen! Now create new work! */ + IF_GRAN_DEBUG(findWork, + belch("+- munching spark %p; creating thread for node %p", + spark, spark->node)); + activateSpark (event, spark); + ASSERT(spark != (rtsSpark*)NULL); + spark = delete_from_sparkq (spark, proc, rtsTrue); + } + + IF_GRAN_DEBUG(findWork, + belch("+- Contents of spark queues at the end of FindWork @ %lx", + CurrentTime[proc]); + print_sparkq_stats()); + + /* ToDo: check ; not valid if GC occurs in ActivateSpark */ + ASSERT(!found || + /* forward fish or */ + (proc!=creator || + /* local spark or */ + (proc==creator && procStatus[proc]==Starting)) || + //(!found && procStatus[proc]==Idle) || + RtsFlags.GranFlags.DoAlwaysCreateThreads); + } else { + IF_GRAN_DEBUG(findWork, + belch("+- RTS refuses to findWork on PE %d @ %lx", + proc, CurrentTime[proc]); + belch(" procStatus[%d]=%s, fetch strategy=%d, outstanding fetches[%d]=%d", + proc, proc_status_names[procStatus[proc]], + RtsFlags.GranFlags.FetchStrategy, + proc, OutstandingFetches[proc])); + } +} + +//@node GranSimLight routines, Code for Fetching Nodes, GranSim functions, GranSim specific code +//@subsection GranSimLight routines + +/* + This code is called from the central scheduler after having rgabbed a + new event and is only needed for GranSim-Light. It mainly adjusts the + ActiveTSO so that all costs that have to be assigned from within the + scheduler are assigned to the right TSO. The choice of ActiveTSO depends + on the type of event that has been found. +*/ + +void +GranSimLight_enter_system(event, ActiveTSOp) +rtsEvent *event; +StgTSO **ActiveTSOp; +{ + StgTSO *ActiveTSO = *ActiveTSOp; + + ASSERT (RtsFlags.GranFlags.Light); + + /* Restore local clock of the virtual processor attached to CurrentTSO. + All costs will be associated to the `virt. proc' on which the tso + is living. */ + if (ActiveTSO != NULL) { /* already in system area */ + ActiveTSO->gran.clock = CurrentTime[CurrentProc]; + if (RtsFlags.GranFlags.DoFairSchedule) + { + if (RtsFlags.GranFlags.GranSimStats.Full && + RtsFlags.GranFlags.Debug.checkLight) + DumpGranEvent(GR_SYSTEM_END,ActiveTSO); + } + } + switch (event->evttype) + { + case ContinueThread: + case FindWork: /* inaccurate this way */ + ActiveTSO = run_queue_hd; + break; + case ResumeThread: + case StartThread: + case MoveSpark: /* has tso of virt proc in tso field of event */ + ActiveTSO = event->tso; + break; + default: barf("Illegal event type %s (%d) in GrAnSim Light setup\n", + event_names[event->evttype],event->evttype); + } + CurrentTime[CurrentProc] = ActiveTSO->gran.clock; + if (RtsFlags.GranFlags.DoFairSchedule) { + if (RtsFlags.GranFlags.GranSimStats.Full && + RtsFlags.GranFlags.Debug.checkLight) + DumpGranEvent(GR_SYSTEM_START,ActiveTSO); + } +} + +void +GranSimLight_leave_system(event, ActiveTSOp) +rtsEvent *event; +StgTSO **ActiveTSOp; +{ + StgTSO *ActiveTSO = *ActiveTSOp; + + ASSERT(RtsFlags.GranFlags.Light); + + /* Save time of `virt. proc' which was active since last getevent and + restore time of `virt. proc' where CurrentTSO is living on. */ + if(RtsFlags.GranFlags.DoFairSchedule) { + if (RtsFlags.GranFlags.GranSimStats.Full && + RtsFlags.GranFlags.Debug.checkLight) // ToDo: clean up flags + DumpGranEvent(GR_SYSTEM_END,ActiveTSO); + } + ActiveTSO->gran.clock = CurrentTime[CurrentProc]; + ActiveTSO = (StgTSO*)NULL; + CurrentTime[CurrentProc] = CurrentTSO->gran.clock; + if (RtsFlags.GranFlags.DoFairSchedule /* && resched */ ) { + // resched = rtsFalse; + if (RtsFlags.GranFlags.GranSimStats.Full && + RtsFlags.GranFlags.Debug.checkLight) + DumpGranEvent(GR_SCHEDULE,run_queue_hd); + } + /* + if (TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure && + (TimeOfNextEvent == 0 || + TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000header.gran.procs |= PE_NUMBER(to); /* Copy node */ + else + node->header.gran.procs = PE_NUMBER(to); /* Move node */ + + return Ok; +} + +/* + Process a fetch request. + + Cost of sending a packet of size n = C + P*n + where C = packet construction constant, + P = cost of packing one word into a packet + [Should also account for multiple packets]. +*/ + +//@cindex handleFetchRequest + +rtsFetchReturnCode +handleFetchRequest(node,to,from,tso) +StgClosure* node; // the node which is requested +PEs to, from; // fetch request: from -> to +StgTSO* tso; // the tso which needs the node +{ + ASSERT(!RtsFlags.GranFlags.Light); + /* ToDo: check assertion */ + ASSERT(OutstandingFetches[from]>0); + + /* probably wrong place; */ + ASSERT(CurrentProc==to); + + if (IS_LOCAL_TO(PROCS(node), from)) /* Somebody else moved node already => */ + { /* start tso */ + IF_GRAN_DEBUG(thunkStealing, + fprintf(stderr,"ghuH: handleFetchRequest entered with local node %p (%s) (PE %d)\n", + node, info_type(node), from)); + + if (RtsFlags.GranFlags.DoBulkFetching) { + nat size; + rtsPackBuffer *graph; + + /* Create a 1-node-buffer and schedule a FETCHREPLY now */ + graph = PackOneNode(node, tso, &size); + new_event(from, to, CurrentTime[to], + FetchReply, + tso, graph, (rtsSpark*)NULL); + } else { + new_event(from, to, CurrentTime[to], + FetchReply, + tso, node, (rtsSpark*)NULL); + } + IF_GRAN_DEBUG(thunkStealing, + belch("== majQa'! closure %p is local on PE %d already (this is a good thing)", node, from)); + return (NodeIsLocal); + } + else if (IS_LOCAL_TO(PROCS(node), to) ) /* Is node still here? */ + { + if (RtsFlags.GranFlags.DoBulkFetching) { /* {GUM}vo' ngoqvam vInIHta' */ + nat size; /* (code from GUM) */ + StgClosure* graph; + + if (IS_BLACK_HOLE(node)) { /* block on BH or RBH */ + new_event(from, to, CurrentTime[to], + GlobalBlock, + tso, node, (rtsSpark*)NULL); + /* Note: blockFetch is done when handling GLOBALBLOCK event; + make sure the TSO stays out of the run queue */ + /* When this thread is reawoken it does the usual: it tries to + enter the updated node and issues a fetch if it's remote. + It has forgotten that it has sent a fetch already (i.e. a + FETCHNODE is swallowed by a BH, leaving the thread in a BQ) */ + --OutstandingFetches[from]; + + IF_GRAN_DEBUG(thunkStealing, + belch("== majQa'! closure %p on PE %d is a BH (demander=PE %d); faking a FMBQ", + node, to, from)); + if (RtsFlags.GranFlags.GranSimStats.Global) { + globalGranStats.tot_FMBQs++; + } + return (NodeIsBH); + } + + /* The tso requesting the node is blocked and cannot be on a run queue */ + ASSERT(!is_on_queue(tso, from)); + + if ((graph = PackNearbyGraph(node, tso, &size)) == NULL) + return (OutOfHeap); /* out of heap */ + + /* Actual moving/copying of node is done on arrival; see FETCHREPLY */ + /* Send a reply to the originator */ + /* ToDo: Replace that by software costs for doing graph packing! */ + CurrentTime[to] += size * RtsFlags.GranFlags.Costs.mpacktime; + + new_event(from, to, + CurrentTime[to]+RtsFlags.GranFlags.Costs.latency, + FetchReply, + tso, (StgClosure *)graph, (rtsSpark*)NULL); + + CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime; + return (Ok); + } else { /* incremental (single closure) fetching */ + /* Actual moving/copying of node is done on arrival; see FETCHREPLY */ + /* Send a reply to the originator */ + CurrentTime[to] += RtsFlags.GranFlags.Costs.mpacktime; + + new_event(from, to, + CurrentTime[to]+RtsFlags.GranFlags.Costs.latency, + FetchReply, + tso, node, (rtsSpark*)NULL); + + CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime; + return (Ok); + } + } + else /* Qu'vatlh! node has been grabbed by another proc => forward */ + { + PEs node_loc = where_is(node); + rtsTime fetchtime; + + IF_GRAN_DEBUG(thunkStealing, + belch("== Qu'vatlh! node %p has been grabbed by PE %d from PE %d (demander=%d) @ %d\n", + node,node_loc,to,from,CurrentTime[to])); + if (RtsFlags.GranFlags.GranSimStats.Global) { + globalGranStats.fetch_misses++; + } + + /* Prepare FORWARD message to proc p_new */ + CurrentTime[to] += RtsFlags.GranFlags.Costs.mpacktime; + + fetchtime = stg_max(CurrentTime[to], CurrentTime[node_loc]) + + RtsFlags.GranFlags.Costs.latency; + + new_event(node_loc, from, fetchtime, + FetchNode, + tso, node, (rtsSpark*)NULL); + + CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime; + + return (NodeHasMoved); + } +} + +/* + blockFetch blocks a BlockedFetch node on some kind of black hole. + + Taken from gum/HLComms.lc. [find a better place for that ?] -- HWL + + {\bf Note:} In GranSim we don't have @FETCHME@ nodes and therefore don't + create @FMBQ@'s (FetchMe blocking queues) to cope with global + blocking. Instead, non-local TSO are put into the BQ in the same way as + local TSOs. However, we have to check if a TSO is local or global in + order to account for the latencies involved and for keeping track of the + number of fetches that are really going on. +*/ + +//@cindex blockFetch + +rtsFetchReturnCode +blockFetch(tso, proc, bh) +StgTSO* tso; /* TSO which gets blocked */ +PEs proc; /* PE where that tso was running */ +StgClosure* bh; /* closure to block on (BH, RBH, BQ) */ +{ + StgInfoTable *info; + + IF_GRAN_DEBUG(bq, + fprintf(stderr,"## blockFetch: blocking TSO %p (%d)[PE %d] on node %p (%s) [PE %d]. No graph is packed!\n", + tso, tso->id, proc, bh, info_type(bh), where_is(bh))); + + if (!IS_BLACK_HOLE(bh)) { /* catches BHs and RBHs */ + IF_GRAN_DEBUG(bq, + fprintf(stderr,"## blockFetch: node %p (%s) is not a BH => awakening TSO %p (%d) [PE %u]\n", + bh, info_type(bh), tso, tso->id, proc)); + + /* No BH anymore => immediately unblock tso */ + new_event(proc, proc, CurrentTime[proc], + UnblockThread, + tso, bh, (rtsSpark*)NULL); + + /* Is this always a REPLY to a FETCH in the profile ? */ + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(proc, proc, GR_REPLY, tso, bh, (StgInt)0, 0); + return (NodeIsNoBH); + } + + /* DaH {BQ}Daq Qu' Suq 'e' wISov! + Now we know that we have to put the tso into the BQ. + 2 cases: If block-on-fetch, tso is at head of threadq => + => take it out of threadq and into BQ + If reschedule-on-fetch, tso is only pointed to be event + => just put it into BQ + + ngoq ngo'!! + if (!RtsFlags.GranFlags.DoAsyncFetch) { + GranSimBlock(tso, proc, bh); + } else { + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(proc, where_is(bh), GR_BLOCK, tso, bh, (StgInt)0, 0); + ++(tso->gran.blockcount); + tso->gran.blockedat = CurrentTime[proc]; + } + */ + + /* after scheduling the GlobalBlock event the TSO is not put into the + run queue again; it is only pointed to via the event we are + processing now; in GranSim 4.xx there is no difference between + synchr and asynchr comm here */ + ASSERT(!is_on_queue(tso, proc)); + ASSERT(tso->link == END_TSO_QUEUE); + + GranSimBlock(tso, proc, bh); /* GranSim statistics gathering */ + + /* Now, put tso into BQ (similar to blocking entry codes) */ + info = get_itbl(bh); + switch (info -> type) { + case RBH: + case BLACKHOLE: + case CAF_BLACKHOLE: // ToDo: check whether this is a possibly ITBL here + case SE_BLACKHOLE: // ToDo: check whether this is a possibly ITBL here + case SE_CAF_BLACKHOLE:// ToDo: check whether this is a possibly ITBL here + /* basically an inlined version of BLACKHOLE_entry -- HWL */ + /* Change the BLACKHOLE into a BLACKHOLE_BQ */ + ((StgBlockingQueue *)bh)->header.info = &BLACKHOLE_BQ_info; + /* Put ourselves on the blocking queue for this black hole */ + // tso->link=END_TSO_QUEUE; not necessary; see assertion above + ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)tso; + tso->blocked_on = bh; + recordMutable((StgMutClosure *)bh); + break; + + case BLACKHOLE_BQ: + /* basically an inlined version of BLACKHOLE_BQ_entry -- HWL */ + tso->link = (StgTSO *) (((StgBlockingQueue*)bh)->blocking_queue); + ((StgBlockingQueue*)bh)->blocking_queue = (StgBlockingQueueElement *)tso; + recordMutable((StgMutClosure *)bh); + +# if 0 && defined(GC_MUT_REQUIRED) + ToDo: check whether recordMutable is necessary -- HWL + /* + * If we modify a black hole in the old generation, we have to make + * sure it goes on the mutables list + */ + + if (bh <= StorageMgrInfo.OldLim) { + MUT_LINK(bh) = (W_) StorageMgrInfo.OldMutables; + StorageMgrInfo.OldMutables = bh; + } else + MUT_LINK(bh) = MUT_NOT_LINKED; +# endif + break; + + case FETCH_ME_BQ: + barf("Qagh: FMBQ closure (%p) found in GrAnSim (TSO=%p (%d))\n", + bh, tso, tso->id); + + default: + { + G_PRINT_NODE(bh); + barf("Qagh: thought %p was a black hole (IP %p (%s))", + bh, info, info_type(get_itbl(bh))); + } + } + return (Ok); +} + + +//@node Idle PEs, Routines directly called from Haskell world, Code for Fetching Nodes, GranSim specific code +//@subsection Idle PEs + +/* + Export work to idle PEs. This function is called from @ReSchedule@ + before dispatching on the current event. @HandleIdlePEs@ iterates over + all PEs, trying to get work for idle PEs. Note, that this is a + simplification compared to GUM's fishing model. We try to compensate for + that by making the cost for stealing work dependent on the number of + idle processors and thereby on the probability with which a randomly + sent fish would find work. +*/ + +//@cindex handleIdlePEs + +void +handleIdlePEs(void) +{ + PEs p; + + IF_DEBUG(gran, fprintf(stderr, "GRAN: handling Idle PEs\n")) + + /* Should never be entered in GrAnSim Light setup */ + ASSERT(!RtsFlags.GranFlags.Light); + + /* Could check whether there are idle PEs if it's a cheap check */ + for (p = 0; p < RtsFlags.GranFlags.proc; p++) + if (procStatus[p]==Idle) /* && IS_SPARKING(p) && IS_STARTING(p) */ + /* First look for local work i.e. examine local spark pool! */ + if (pending_sparks_hds[p]!=(rtsSpark *)NULL) { + new_event(p, p, CurrentTime[p], + FindWork, + (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL); + procStatus[p] = Sparking; + } else if ((RtsFlags.GranFlags.maxFishes==0 || + OutstandingFishes[p]= 4 || OutstandingFetches[p] == 0)) + { + if (SurplusThreads > 0l) /* Steal a thread */ + stealThread(p); + + if (procStatus[p]!=Idle) + break; + } + + if (SparksAvail > 0 && + (RtsFlags.GranFlags.FetchStrategy >= 3 || OutstandingFetches[p] == 0)) /* Steal a spark */ + stealSpark(p); + + if (SurplusThreads > 0 && + (RtsFlags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[p] == 0)) /* Steal a thread */ + stealThread(p); + } +} + +/* + Steal a spark and schedule moving it to proc. We want to look at PEs in + clock order -- most retarded first. Currently sparks are only stolen + from the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, + this should be changed to first steal from the former then from the + latter. + + We model a sort of fishing mechanism by counting the number of sparks + and threads we are currently stealing. */ + +/* + Return a random nat value in the intervall [from, to) +*/ +static nat +natRandom(from, to) +nat from, to; +{ + nat r, d; + + ASSERT(from<=to); + d = to - from; + /* random returns a value in [0, RAND_MAX] */ + r = (nat) ((float)from + ((float)random()*(float)d)/(float)RAND_MAX); + r = (r==to) ? from : r; + ASSERT(from<=r && (r CurrentTime[pes_by_time[j]]) { + rtsTime temp = pes_by_time[i]; + pes_by_time[i] = pes_by_time[j]; + pes_by_time[j] = temp; + } + + /* Choose random processor to steal spark from; first look at processors */ + /* that are earlier than the current one (i.e. proc) */ + for(first=0; + (first < n) && (CurrentTime[pes_by_time[first]] <= CurrentTime[proc]); + ++first) + /* nothing */ ; + + /* if the assertion below is true we can get rid of first */ + /* ASSERT(first==n); */ + /* ToDo: check if first is really needed; find cleaner solution */ + + *firstp = first; + *np = n; +} + +/* + Steal a spark (piece of work) from any processor and bring it to proc. +*/ +//@cindex stealSpark +static inline rtsBool +stealSpark(PEs proc) { stealSomething(proc, rtsTrue, rtsFalse); } + +/* + Steal a thread from any processor and bring it to proc i.e. thread migration +*/ +//@cindex stealThread +static inline rtsBool +stealThread(PEs proc) { stealSomething(proc, rtsFalse, rtsTrue); } + +/* + Steal a spark or a thread and schedule moving it to proc. +*/ +//@cindex stealSomething +static rtsBool +stealSomething(proc, steal_spark, steal_thread) +PEs proc; // PE that needs work (stealer) +rtsBool steal_spark, steal_thread; // should a spark and/or thread be stolen +{ + PEs p; + rtsTime fish_arrival_time; + rtsSpark *spark, *prev, *next; + rtsBool stolen = rtsFalse; + + ASSERT(steal_spark || steal_thread); + + /* Should never be entered in GrAnSim Light setup */ + ASSERT(!RtsFlags.GranFlags.Light); + ASSERT(!steal_thread || RtsFlags.GranFlags.DoThreadMigration); + + if (!RtsFlags.GranFlags.Fishing) { + // ToDo: check if stealing threads is prefered over stealing sparks + if (steal_spark) { + if (stealSparkMagic(proc)) + return rtsTrue; + else // no spark found + if (steal_thread) + return stealThreadMagic(proc); + else // no thread found + return rtsFalse; + } else { // ASSERT(steal_thread); + return stealThreadMagic(proc); + } + barf("stealSomething: never reached"); + } + + /* The rest of this function does GUM style fishing */ + + p = findRandomPE(proc); /* find a random PE other than proc */ + + /* Message packing costs for sending a Fish; qeq jabbI'ID */ + CurrentTime[proc] += RtsFlags.GranFlags.Costs.mpacktime; + + /* use another GranEvent for requesting a thread? */ + if (steal_spark && RtsFlags.GranFlags.GranSimStats.Sparks) + DumpRawGranEvent(p, proc, SP_REQUESTED, + (StgTSO*)NULL, (StgClosure *)NULL, (StgInt)0, 0); + + /* time of the fish arrival on the remote PE */ + fish_arrival_time = CurrentTime[proc] + RtsFlags.GranFlags.Costs.latency; + + /* Phps use an own Fish event for that? */ + /* The contents of the spark component is a HACK: + 1 means give me a spark; + 2 means give me a thread + 0 means give me nothing (this should never happen) + */ + new_event(p, proc, fish_arrival_time, + FindWork, + (StgTSO*)NULL, (StgClosure*)NULL, + (steal_spark ? (rtsSpark*)1 : steal_thread ? (rtsSpark*)2 : (rtsSpark*)0)); + + ++OutstandingFishes[proc]; + /* only with Async fetching? */ + if (procStatus[proc]==Idle) + procStatus[proc]=Fishing; + + /* time needed to clean up buffers etc after sending a message */ + CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime; + + /* If GUM style fishing stealing always succeeds because it only consists + of sending out a fish; of course, when the fish may return + empty-handed! */ + return rtsTrue; +} + +/* + This version of stealing a spark makes use of the global info on all + spark pools etc which is not available in a real parallel system. + This could be extended to test e.g. the impact of perfect load information. +*/ +//@cindex stealSparkMagic +static rtsBool +stealSparkMagic(proc) +PEs proc; +{ + PEs p, i, j, n, first, upb; + rtsSpark *spark, *next; + PEs pes_by_time[MAX_PROC]; + rtsBool stolen = rtsFalse; + rtsTime stealtime; + + /* Should never be entered in GrAnSim Light setup */ + ASSERT(!RtsFlags.GranFlags.Light); + + sortPEsByTime(proc, pes_by_time, &first, &n); + + while (!stolen && n>0) { + upb = (first==0) ? n : first; + i = natRandom(0,upb); /* choose a random eligible PE */ + p = pes_by_time[i]; + + IF_GRAN_DEBUG(randomSteal, + belch("^^ stealSparkMagic (random_steal, not fishing): stealing spark from PE %d (current proc is %d)", + p, proc)); + + ASSERT(pending_sparks_hds[p]!=(rtsSpark *)NULL); /* non-empty spark pool */ + + /* Now go through rtsSparkQ and steal the first eligible spark */ + + spark = pending_sparks_hds[p]; + while (!stolen && spark != (rtsSpark*)NULL) + { + /* NB: no prev pointer is needed here because all sparks that are not + chosen are pruned + */ + if ((procStatus[p]==Idle || procStatus[p]==Sparking || procStatus[p] == Fishing) && + spark->next==(rtsSpark*)NULL) + { + /* Be social! Don't steal the only spark of an idle processor + not {spark} neH yInIH !! */ + break; /* next PE */ + } + else if (closure_SHOULD_SPARK(spark->node)) + { + /* Don't Steal local sparks; + ToDo: optionally prefer local over global sparks + if (!spark->global) { + prev=spark; + continue; next spark + } + */ + /* found a spark! */ + + /* Prepare message for sending spark */ + CurrentTime[p] += RtsFlags.GranFlags.Costs.mpacktime; + + if (RtsFlags.GranFlags.GranSimStats.Sparks) + DumpRawGranEvent(p, (PEs)0, SP_EXPORTED, + (StgTSO*)NULL, spark->node, + spark->name, spark_queue_len(p)); + + stealtime = (CurrentTime[p] > CurrentTime[proc] ? + CurrentTime[p] : + CurrentTime[proc]) + + sparkStealTime(); + + new_event(proc, p /* CurrentProc */, stealtime, + MoveSpark, + (StgTSO*)NULL, spark->node, spark); + + stolen = rtsTrue; + ++OutstandingFishes[proc]; /* no. of sparks currently on the fly */ + if (procStatus[proc]==Idle) + procStatus[proc] = Fishing; + ++(spark->global); /* record that this is a global spark */ + ASSERT(SparksAvail>0); + --SparksAvail; /* on-the-fly sparks are not available */ + next = delete_from_sparkq(spark, p, rtsFalse); // don't dispose! + CurrentTime[p] += RtsFlags.GranFlags.Costs.mtidytime; + } + else /* !(closure_SHOULD_SPARK(SPARK_NODE(spark))) */ + { + IF_GRAN_DEBUG(checkSparkQ, + belch("^^ pruning spark %p (node %p) in stealSparkMagic", + spark, spark->node)); + + /* if the spark points to a node that should not be sparked, + prune the spark queue at this point */ + if (RtsFlags.GranFlags.GranSimStats.Sparks) + DumpRawGranEvent(p, (PEs)0, SP_PRUNED, + (StgTSO*)NULL, spark->node, + spark->name, spark_queue_len(p)); + if (RtsFlags.GranFlags.GranSimStats.Global) + globalGranStats.pruned_sparks++; + + ASSERT(SparksAvail>0); + --SparksAvail; + spark = delete_from_sparkq(spark, p, rtsTrue); + } + /* unlink spark (may have been freed!) from sparkq; + if (prev == NULL) // spark was head of spark queue + pending_sparks_hds[p] = spark->next; + else + prev->next = spark->next; + if (spark->next == NULL) + pending_sparks_tls[p] = prev; + else + next->prev = prev; + */ + } /* while ... iterating over sparkq */ + + /* ToDo: assert that PE p still has work left after stealing the spark */ + + if (!stolen && (n>0)) { /* nothing stealable from proc p :( */ + ASSERT(pes_by_time[i]==p); + + /* remove p from the list (at pos i) */ + for (j=i; j+10) && + (CurrentTime[pes_by_time[first-1]]>CurrentTime[proc]); + first--) + /* nothing */ ; + } + } /* while ... iterating over PEs in pes_by_time */ + + IF_GRAN_DEBUG(randomSteal, + if (stolen) + belch("^^ stealSparkMagic: spark %p (node=%p) stolen by PE %d from PE %d (SparksAvail=%d; idlers=%d)", + spark, spark->node, proc, p, + SparksAvail, idlers()); + else + belch("^^ stealSparkMagic: nothing stolen by PE %d (sparkq len after pruning=%d)(SparksAvail=%d; idlers=%d)", + proc, SparksAvail, idlers())); + + if (RtsFlags.GranFlags.GranSimStats.Global && + stolen && (i!=0)) { /* only for statistics */ + globalGranStats.rs_sp_count++; + globalGranStats.ntimes_total += n; + globalGranStats.fl_total += first; + globalGranStats.no_of_steals++; + } + + return stolen; +} + +/* + The old stealThread code, which makes use of global info and does not + send out fishes. + NB: most of this is the same as in stealSparkMagic; + only the pieces specific to processing thread queues are different; + long live polymorphism! +*/ + +//@cindex stealThreadMagic +static rtsBool +stealThreadMagic(proc) +PEs proc; +{ + PEs p, i, j, n, first, upb; + StgTSO *tso; + PEs pes_by_time[MAX_PROC]; + rtsBool stolen = rtsFalse; + rtsTime stealtime; + + /* Should never be entered in GrAnSim Light setup */ + ASSERT(!RtsFlags.GranFlags.Light); + + sortPEsByTime(proc, pes_by_time, &first, &n); + + while (!stolen && n>0) { + upb = (first==0) ? n : first; + i = natRandom(0,upb); /* choose a random eligible PE */ + p = pes_by_time[i]; + + IF_GRAN_DEBUG(randomSteal, + belch("^^ stealThreadMagic (random_steal, not fishing): stealing thread from PE %d (current proc is %d)", + p, proc)); + + /* Steal the first exportable thread in the runnable queue but + never steal the first in the queue for social reasons; + not Qu' wa'DIch yInIH !! + */ + /* Would be better to search through queue and have options which of + the threads to pick when stealing */ + if (run_queue_hds[p] == END_TSO_QUEUE) { + IF_GRAN_DEBUG(randomSteal, + belch("^^ stealThreadMagic: No thread to steal from PE %d (stealer=PE %d)", + p, proc)); + } else { + tso = run_queue_hds[p]->link; /* tso is *2nd* thread in thread queue */ + /* Found one */ + stolen = rtsTrue; + + /* update links in queue */ + run_queue_hds[p]->link = tso->link; + if (run_queue_tls[p] == tso) + run_queue_tls[p] = run_queue_hds[p]; + + /* ToDo: Turn magic constants into params */ + + CurrentTime[p] += 5l * RtsFlags.GranFlags.Costs.mpacktime; + + stealtime = (CurrentTime[p] > CurrentTime[proc] ? + CurrentTime[p] : + CurrentTime[proc]) + + sparkStealTime() + + 4l * RtsFlags.GranFlags.Costs.additional_latency + + 5l * RtsFlags.GranFlags.Costs.munpacktime; + + /* Move the thread; set bitmask to 0 while TSO is `on-the-fly' */ + SET_GRAN_HDR(tso,Nowhere /* PE_NUMBER(proc) */); + + /* Move from one queue to another */ + new_event(proc, p, stealtime, + MoveThread, + tso, (StgClosure*)NULL, (rtsSpark*)NULL); + + /* MAKE_BUSY(proc); not yet; only when thread is in threadq */ + ++OutstandingFishes[proc]; + if (procStatus[proc]) + procStatus[proc] = Fishing; + --SurplusThreads; + + if(RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(p, proc, + GR_STEALING, + tso, (StgClosure*)NULL, (StgInt)0, 0); + + /* costs for tidying up buffer after having sent it */ + CurrentTime[p] += 5l * RtsFlags.GranFlags.Costs.mtidytime; + } + + /* ToDo: assert that PE p still has work left after stealing the spark */ + + if (!stolen && (n>0)) { /* nothing stealable from proc p :( */ + ASSERT(pes_by_time[i]==p); + + /* remove p from the list (at pos i) */ + for (j=i; j+10) && + (CurrentTime[pes_by_time[first-1]]>CurrentTime[proc]); + first--) + /* nothing */ ; + } + } /* while ... iterating over PEs in pes_by_time */ + + IF_GRAN_DEBUG(randomSteal, + if (stolen) + belch("^^ stealThreadMagic: stolen TSO %d (%p) by PE %d from PE %d (SparksAvail=%d; idlers=%d)", + tso->id, tso, proc, p, + SparksAvail, idlers()); + else + belch("stealThreadMagic: nothing stolen by PE %d (SparksAvail=%d; idlers=%d)", + proc, SparksAvail, idlers())); + + if (RtsFlags.GranFlags.GranSimStats.Global && + stolen && (i!=0)) { /* only for statistics */ + /* ToDo: more statistics on avg thread queue lenght etc */ + globalGranStats.rs_t_count++; + globalGranStats.no_of_migrates++; + } + + return stolen; +} + +//@cindex sparkStealTime +static rtsTime +sparkStealTime(void) +{ + double fishdelay, sparkdelay, latencydelay; + fishdelay = (double)RtsFlags.GranFlags.proc/2; + sparkdelay = fishdelay - + ((fishdelay-1)/(double)(RtsFlags.GranFlags.proc-1))*(double)idlers(); + latencydelay = sparkdelay*((double)RtsFlags.GranFlags.Costs.latency); + + return((rtsTime)latencydelay); +} + +//@node Routines directly called from Haskell world, Emiting profiling info for GrAnSim, Idle PEs, GranSim specific code +//@subsection Routines directly called from Haskell world +/* +The @GranSim...@ routines in here are directly called via macros from the +threaded world. + +First some auxiliary routines. +*/ + +/* Take the current thread off the thread queue and thereby activate the + next thread. It's assumed that the next ReSchedule after this uses + NEW_THREAD as param. + This fct is called from GranSimBlock and GranSimFetch +*/ + +//@cindex ActivateNextThread + +void +ActivateNextThread (proc) +PEs proc; +{ + StgTSO *t; + /* + This routine is entered either via GranSimFetch or via GranSimBlock. + It has to prepare the CurrentTSO for being blocked and update the + run queue and other statistics on PE proc. The actual enqueuing to the + blocking queue (if coming from GranSimBlock) is done in the entry code + of the BLACKHOLE and BLACKHOLE_BQ closures (see StgMiscClosures.hc). + */ + /* ToDo: add assertions here!! */ + //ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE); + + // Only necessary if the running thread is at front of the queue + // run_queue_hds[proc] = run_queue_hds[proc]->link; + ASSERT(CurrentProc==proc); + ASSERT(!is_on_queue(CurrentTSO,proc)); + if (run_queue_hds[proc]==END_TSO_QUEUE) { + /* NB: this routine is only entered with asynchr comm (see assertion) */ + procStatus[proc] = Idle; + } else { + /* ToDo: check cost assignment */ + CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcontextswitchtime; + if (RtsFlags.GranFlags.GranSimStats.Full && + (!RtsFlags.GranFlags.Light || RtsFlags.GranFlags.Debug.checkLight)) + /* right flag !?? ^^^ */ + DumpRawGranEvent(proc, 0, GR_SCHEDULE, run_queue_hds[proc], + (StgClosure*)NULL, (StgInt)0, 0); + } +} + +/* + The following GranSim fcts are stg-called from the threaded world. +*/ + +/* Called from HP_CHK and friends (see StgMacros.h) */ +//@cindex GranSimAllocate +void +GranSimAllocate(n) +StgInt n; +{ + CurrentTSO->gran.allocs += n; + ++(CurrentTSO->gran.basicblocks); + + if (RtsFlags.GranFlags.GranSimStats.Heap) { + DumpRawGranEvent(CurrentProc, 0, GR_ALLOC, CurrentTSO, + (StgClosure*)NULL, (StgInt)0, n); + } + + CurrentTSO->gran.exectime += RtsFlags.GranFlags.Costs.heapalloc_cost; + CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.heapalloc_cost; +} + +/* + Subtract the values added above, if a heap check fails and + so has to be redone. +*/ +//@cindex GranSimUnallocate +void +GranSimUnallocate(n) +StgInt n; +{ + CurrentTSO->gran.allocs -= n; + --(CurrentTSO->gran.basicblocks); + + CurrentTSO->gran.exectime -= RtsFlags.GranFlags.Costs.heapalloc_cost; + CurrentTime[CurrentProc] -= RtsFlags.GranFlags.Costs.heapalloc_cost; +} + +/* NB: We now inline this code via GRAN_EXEC rather than calling this fct */ +//@cindex GranSimExec +void +GranSimExec(ariths,branches,loads,stores,floats) +StgWord ariths,branches,loads,stores,floats; +{ + StgWord cost = RtsFlags.GranFlags.Costs.arith_cost*ariths + + RtsFlags.GranFlags.Costs.branch_cost*branches + + RtsFlags.GranFlags.Costs.load_cost * loads + + RtsFlags.GranFlags.Costs.store_cost*stores + + RtsFlags.GranFlags.Costs.float_cost*floats; + + CurrentTSO->gran.exectime += cost; + CurrentTime[CurrentProc] += cost; +} + +/* + Fetch the node if it isn't local + -- result indicates whether fetch has been done. + + This is GRIP-style single item fetching. +*/ + +//@cindex GranSimFetch +StgInt +GranSimFetch(node /* , liveness_mask */ ) +StgClosure *node; +/* StgInt liveness_mask; */ +{ + /* reset the return value (to be checked within STG land) */ + NeedToReSchedule = rtsFalse; + + if (RtsFlags.GranFlags.Light) { + /* Always reschedule in GrAnSim-Light to prevent one TSO from + running off too far + new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + ContinueThread,CurrentTSO,node,NULL); + */ + return(0); + } + + /* Faking an RBH closure: + If the bitmask of the closure is 0 then this node is a fake RBH; + */ + if (node->header.gran.procs == Nowhere) { + IF_GRAN_DEBUG(bq, + belch("## Found fake RBH (node %p); delaying TSO %d (%p)", + node, CurrentTSO->id, CurrentTSO)); + + new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc]+10000, + ContinueThread, CurrentTSO, node, (rtsSpark*)NULL); + + /* Rescheduling (GranSim internal) is necessary */ + NeedToReSchedule = rtsTrue; + + return(1); + } + + /* Note: once a node has been fetched, this test will be passed */ + if (!IS_LOCAL_TO(PROCS(node),CurrentProc)) + { + PEs p = where_is(node); + rtsTime fetchtime; + + IF_GRAN_DEBUG(thunkStealing, + if (p==CurrentProc) + belch("GranSimFetch: Trying to fetch from own processor%u\n", p);); + + CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime; + /* NB: Fetch is counted on arrival (FetchReply) */ + + fetchtime = stg_max(CurrentTime[CurrentProc],CurrentTime[p]) + + RtsFlags.GranFlags.Costs.latency; + + new_event(p, CurrentProc, fetchtime, + FetchNode, CurrentTSO, node, (rtsSpark*)NULL); + + if (fetchtimegran.blockedat = CurrentTime[CurrentProc]; + + ++OutstandingFetches[CurrentProc]; + + if (RtsFlags.GranFlags.DoAsyncFetch) + /* if asynchr comm is turned on, activate the next thread in the q */ + ActivateNextThread(CurrentProc); + else + procStatus[CurrentProc] = Fetching; + +#if 0 + /* ToDo: nuke the entire if (anything special for fair schedule?) */ + if (RtsFlags.GranFlags.DoAsyncFetch) + { + /* Remove CurrentTSO from the queue -- assumes head of queue == CurrentTSO */ + if(!RtsFlags.GranFlags.DoFairSchedule) + { + /* now done in do_the_fetchnode + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(CurrentProc, p, GR_FETCH, CurrentTSO, + node, (StgInt)0, 0); + */ + ActivateNextThread(CurrentProc); + +# if 0 && defined(GRAN_CHECK) + if (RtsFlags.GranFlags.Debug.blockOnFetch_sanity) { + if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) { + fprintf(stderr,"FetchNode: TSO 0x%x has fetch-mask set @ %d\n", + CurrentTSO,CurrentTime[CurrentProc]); + stg_exit(EXIT_FAILURE); + } else { + TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO; + } + } +# endif + CurrentTSO->link = END_TSO_QUEUE; + /* CurrentTSO = END_TSO_QUEUE; */ + + /* CurrentTSO is pointed to by the FetchNode event; it is + on no run queue any more */ + } else { /* fair scheduling currently not supported -- HWL */ + barf("Asynchr communication is not yet compatible with fair scheduling\n"); + } + } else { /* !RtsFlags.GranFlags.DoAsyncFetch */ + procStatus[CurrentProc] = Fetching; // ToDo: BlockedOnFetch; + /* now done in do_the_fetchnode + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(CurrentProc, p, + GR_FETCH, CurrentTSO, node, (StgInt)0, 0); + */ + IF_GRAN_DEBUG(blockOnFetch, + BlockedOnFetch[CurrentProc] = CurrentTSO;); /*- rtsTrue; -*/ + } +#endif /* 0 */ + + CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mtidytime; + + /* Rescheduling (GranSim internal) is necessary */ + NeedToReSchedule = rtsTrue; + + return(1); + } + return(0); +} + +//@cindex GranSimSpark +void +GranSimSpark(local,node) +StgInt local; +StgClosure *node; +{ + /* ++SparksAvail; Nope; do that in add_to_spark_queue */ + if (RtsFlags.GranFlags.GranSimStats.Sparks) + DumpRawGranEvent(CurrentProc, (PEs)0, SP_SPARK, + END_TSO_QUEUE, node, (StgInt)0, spark_queue_len(CurrentProc)-1); + + /* Force the PE to take notice of the spark */ + if(RtsFlags.GranFlags.DoAlwaysCreateThreads) { + new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc], + FindWork, + END_TSO_QUEUE, (StgClosure*)NULL, (rtsSpark*)NULL); + if (CurrentTime[CurrentProc]gran.localsparks; + else + ++CurrentTSO->gran.globalsparks; +} + +//@cindex GranSimSparkAt +void +GranSimSparkAt(spark,where,identifier) +rtsSpark *spark; +StgClosure *where; /* This should be a node; alternatively could be a GA */ +StgInt identifier; +{ + PEs p = where_is(where); + GranSimSparkAtAbs(spark,p,identifier); +} + +//@cindex GranSimSparkAtAbs +void +GranSimSparkAtAbs(spark,proc,identifier) +rtsSpark *spark; +PEs proc; +StgInt identifier; +{ + rtsTime exporttime; + + if (spark == (rtsSpark *)NULL) /* Note: Granularity control might have */ + return; /* turned a spark into a NULL. */ + + /* ++SparksAvail; Nope; do that in add_to_spark_queue */ + if(RtsFlags.GranFlags.GranSimStats.Sparks) + DumpRawGranEvent(proc,0,SP_SPARKAT, + END_TSO_QUEUE, spark->node, (StgInt)0, spark_queue_len(proc)); + + if (proc!=CurrentProc) { + CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime; + exporttime = (CurrentTime[proc] > CurrentTime[CurrentProc]? + CurrentTime[proc]: CurrentTime[CurrentProc]) + + RtsFlags.GranFlags.Costs.latency; + } else { + exporttime = CurrentTime[CurrentProc]; + } + + if ( RtsFlags.GranFlags.Light ) + /* Need CurrentTSO in event field to associate costs with creating + spark even in a GrAnSim Light setup */ + new_event(proc, CurrentProc, exporttime, + MoveSpark, + CurrentTSO, spark->node, spark); + else + new_event(proc, CurrentProc, exporttime, + MoveSpark, (StgTSO*)NULL, spark->node, spark); + /* Bit of a hack to treat placed sparks the same as stolen sparks */ + ++OutstandingFishes[proc]; + + /* Force the PE to take notice of the spark (FINDWORK is put after a + MoveSpark into the sparkq!) */ + if (RtsFlags.GranFlags.DoAlwaysCreateThreads) { + new_event(CurrentProc,CurrentProc,exporttime+1, + FindWork, + (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL); + } + + if (exporttimegran.globalsparks; + } else { + ++CurrentTSO->gran.localsparks; + } +} + +/* + This function handles local and global blocking. It's called either + from threaded code (RBH_entry, BH_entry etc) or from blockFetch when + trying to fetch an BH or RBH +*/ + +//@cindex GranSimBlock +void +GranSimBlock(tso, proc, node) +StgTSO *tso; +PEs proc; +StgClosure *node; +{ + PEs node_proc = where_is(node), tso_proc = where_is(tso); + + ASSERT(tso_proc==CurrentProc); + // ASSERT(node_proc==CurrentProc); + IF_GRAN_DEBUG(bq, + if (node_proc!=CurrentProc) + belch("## ghuH: TSO %d (%lx) [PE %d] blocks on non-local node %p [PE %d] (no simulation of FETCHMEs)", + tso->id, tso, tso_proc, node, node_proc)); + ASSERT(tso->link==END_TSO_QUEUE); + ASSERT(!is_on_queue(tso,proc)); // tso must not be on run queue already! + //ASSERT(tso==run_queue_hds[proc]); + + IF_DEBUG(gran, + belch("GRAN: TSO %d (%p) [PE %d] blocks on closure %p @ %lx", + tso->id, tso, proc, node, CurrentTime[proc]);) + + + /* THIS SHOULD NEVER HAPPEN! + If tso tries to block on a remote node (i.e. node_proc!=CurrentProc) + we have missed a GranSimFetch before entering this closure; + we hack around it for now, faking a FetchNode; + because GranSimBlock is entered via a BLACKHOLE(_BQ) closure, + tso will be blocked on this closure until the FetchReply occurs. + + ngoq Dogh! + + if (node_proc!=CurrentProc) { + StgInt ret; + ret = GranSimFetch(node); + IF_GRAN_DEBUG(bq, + if (ret) + belch(".. GranSimBlock: faking a FetchNode of node %p from %d to %d", + node, node_proc, CurrentProc);); + return; + } + */ + + if (RtsFlags.GranFlags.GranSimStats.Full) + DumpRawGranEvent(proc,node_proc,GR_BLOCK,tso,node,(StgInt)0,0); + + ++(tso->gran.blockcount); + /* Distinction between local and global block is made in blockFetch */ + tso->gran.blockedat = CurrentTime[proc]; + + CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadqueuetime; + ActivateNextThread(proc); + /* tso->link = END_TSO_QUEUE; not really necessary; only for testing */ +} + +#endif /* GRAN */ + +//@node Index, , Dumping routines, GranSim specific code +//@subsection Index + +//@index +//* ActivateNextThread:: @cindex\s-+ActivateNextThread +//* CurrentProc:: @cindex\s-+CurrentProc +//* CurrentTime:: @cindex\s-+CurrentTime +//* GranSimAllocate:: @cindex\s-+GranSimAllocate +//* GranSimBlock:: @cindex\s-+GranSimBlock +//* GranSimExec:: @cindex\s-+GranSimExec +//* GranSimFetch:: @cindex\s-+GranSimFetch +//* GranSimLight_insertThread:: @cindex\s-+GranSimLight_insertThread +//* GranSimSpark:: @cindex\s-+GranSimSpark +//* GranSimSparkAt:: @cindex\s-+GranSimSparkAt +//* GranSimSparkAtAbs:: @cindex\s-+GranSimSparkAtAbs +//* GranSimUnallocate:: @cindex\s-+GranSimUnallocate +//* any_idle:: @cindex\s-+any_idle +//* blockFetch:: @cindex\s-+blockFetch +//* do_the_fetchnode:: @cindex\s-+do_the_fetchnode +//* do_the_fetchreply:: @cindex\s-+do_the_fetchreply +//* do_the_findwork:: @cindex\s-+do_the_findwork +//* do_the_globalblock:: @cindex\s-+do_the_globalblock +//* do_the_movespark:: @cindex\s-+do_the_movespark +//* do_the_movethread:: @cindex\s-+do_the_movethread +//* do_the_startthread:: @cindex\s-+do_the_startthread +//* do_the_unblock:: @cindex\s-+do_the_unblock +//* fetchNode:: @cindex\s-+fetchNode +//* ga_to_proc:: @cindex\s-+ga_to_proc +//* get_next_event:: @cindex\s-+get_next_event +//* get_time_of_next_event:: @cindex\s-+get_time_of_next_event +//* grab_event:: @cindex\s-+grab_event +//* handleFetchRequest:: @cindex\s-+handleFetchRequest +//* handleIdlePEs:: @cindex\s-+handleIdlePEs +//* idlers:: @cindex\s-+idlers +//* insertThread:: @cindex\s-+insertThread +//* insert_event:: @cindex\s-+insert_event +//* is_on_queue:: @cindex\s-+is_on_queue +//* is_unique:: @cindex\s-+is_unique +//* new_event:: @cindex\s-+new_event +//* prepend_event:: @cindex\s-+prepend_event +//* print_event:: @cindex\s-+print_event +//* print_eventq:: @cindex\s-+print_eventq +//* prune_eventq :: @cindex\s-+prune_eventq +//* spark queue:: @cindex\s-+spark queue +//* sparkStealTime:: @cindex\s-+sparkStealTime +//* stealSomething:: @cindex\s-+stealSomething +//* stealSpark:: @cindex\s-+stealSpark +//* stealSparkMagic:: @cindex\s-+stealSparkMagic +//* stealThread:: @cindex\s-+stealThread +//* stealThreadMagic:: @cindex\s-+stealThreadMagic +//* thread_queue_len:: @cindex\s-+thread_queue_len +//* traverse_eventq_for_gc:: @cindex\s-+traverse_eventq_for_gc +//* where_is:: @cindex\s-+where_is +//@end index diff --git a/ghc/rts/parallel/GranSimRts.h b/ghc/rts/parallel/GranSimRts.h new file mode 100644 index 0000000..585291a --- /dev/null +++ b/ghc/rts/parallel/GranSimRts.h @@ -0,0 +1,261 @@ +/* -------------------------------------------------------------------------- + Time-stamp: + $Id: GranSimRts.h,v 1.2 2000/01/13 14:34:07 hwloidl Exp $ + + Variables and functions specific to GranSim. + ----------------------------------------------------------------------- */ + +#ifndef GRANSIM_RTS_H +#define GRANSIM_RTS_H + +//@node Headers for GranSim objs used only in the RTS internally, , , +//@section Headers for GranSim objs used only in the RTS internally + +//@menu +//* Event queue:: +//* Spark handling routines:: +//* Processor related stuff:: +//* Local types:: +//* Statistics gathering:: +//* Prototypes:: +//@end menu + +//@node Event queue, Spark handling routines, Headers for GranSim objs used only in the RTS internally, Headers for GranSim objs used only in the RTS internally +//@subsection Event queue + +#if defined(GRAN) || defined(PAR) +/* Granularity event types for output (see DumpGranEvent) */ +typedef enum GranEventType_ { + GR_START = 0, GR_STARTQ, + GR_STEALING, GR_STOLEN, GR_STOLENQ, + GR_FETCH, GR_REPLY, GR_BLOCK, GR_RESUME, GR_RESUMEQ, + GR_SCHEDULE, GR_DESCHEDULE, + GR_END, + SP_SPARK, SP_SPARKAT, SP_USED, SP_PRUNED, SP_EXPORTED, SP_ACQUIRED, SP_REQUESTED, + GR_ALLOC, + GR_TERMINATE, + GR_SYSTEM_START, GR_SYSTEM_END, /* only for debugging */ + GR_EVENT_MAX +} GranEventType; + +extern char *gran_event_names[]; +#endif + +#if defined(GRAN) /* whole file */ + +/* Event Types (internal use only) */ +typedef enum rtsEventType_ { + ContinueThread = 0, /* Continue running the first thread in the queue */ + StartThread, /* Start a newly created thread */ + ResumeThread, /* Resume a previously running thread */ + MoveSpark, /* Move a spark from one PE to another */ + MoveThread, /* Move a thread from one PE to another */ + FindWork, /* Search for work */ + FetchNode, /* Fetch a node */ + FetchReply, /* Receive a node */ + GlobalBlock, /* Block a TSO on a remote node */ + UnblockThread /* Make a TSO runnable */ +} rtsEventType; + +/* Number of last event type */ +#define MAX_EVENT 9 + +typedef struct rtsEvent_ { + PEs proc; /* Processor id */ + PEs creator; /* Processor id of PE that created the event */ + rtsEventType evttype; /* rtsEvent type */ + rtsTime time; /* Time at which event happened */ + StgTSO *tso; /* Associated TSO, if relevant */ + StgClosure *node; /* Associated node, if relevant */ + rtsSpark *spark; /* Associated SPARK, if relevant */ + StgInt gc_info; /* Counter of heap objects to mark (used in GC only)*/ + struct rtsEvent_ *next; + } rtsEvent; + +typedef rtsEvent *rtsEventQ; + +extern rtsEventQ EventHd; + +/* Interface for ADT of Event Queue */ +rtsEvent *get_next_event(void); +rtsTime get_time_of_next_event(void); +void insert_event(rtsEvent *newentry); +void new_event(PEs proc, PEs creator, rtsTime time, + rtsEventType evttype, StgTSO *tso, + StgClosure *node, rtsSpark *spark); +void print_event(rtsEvent *event); +void print_eventq(rtsEvent *hd); +void prepend_event(rtsEvent *event); +rtsEventQ grab_event(void); +void prune_eventq(StgTSO *tso, StgClosure *node); + +void traverse_eventq_for_gc(void); +void markEventQueue(void); + +//@node Spark handling routines, Processor related stuff, Event queue, Headers for GranSim objs used only in the RTS internally +//@subsection Spark handling routines + +/* These functions are only used in the RTS internally; see GranSim.h for rest */ +void disposeSpark(rtsSpark *spark); +void disposeSparkQ(rtsSparkQ spark); +void print_spark(rtsSpark *spark); +void print_sparkq(PEs proc); +void print_sparkq_stats(void); +nat spark_queue_len(PEs proc); +rtsSpark *delete_from_sparkq (rtsSpark *spark, PEs p, rtsBool dispose_too); +void markSparkQueue(void); + +//@node Processor related stuff, Local types, Spark handling routines, Headers for GranSim objs used only in the RTS internally +//@subsection Processor related stuff + +typedef enum rtsProcStatus_ { + Idle = 0, /* empty threadq */ + Sparking, /* non-empty sparkq; FINDWORK has been issued */ + Starting, /* STARTTHREAD has been issue */ + Fetching, /* waiting for remote data (only if block-on-fetch) */ + Fishing, /* waiting for remote spark/thread */ + Busy /* non-empty threadq, with head of queue active */ +} rtsProcStatus; + +/* +#define IS_IDLE(proc) (procStatus[proc] == Idle) +#define IS_SPARKING(proc) (procStatus[proc] == Sparking) +#define IS_STARTING(proc) (procStatus[proc] == Starting) +#define IS_FETCHING(proc) (procStatus[proc] == Fetching) +#define IS_FISHING(proc) (procStatus[proc] == Fishing) +#define IS_BUSY(proc) (procStatus[proc] == Busy) +#define ANY_IDLE (any_idle()) +#define MAKE_IDLE(proc) procStatus[proc] = Idle +#define MAKE_SPARKING(proc) procStatus[proc] = Sparking +#define MAKE_STARTING(proc) procStatus[proc] = Starting +#define MAKE_FETCHING(proc) procStatus[proc] = Fetching +#define MAKE_FISHING(proc) procStatus[proc] = Fishing +#define MAKE_BUSY(proc) procStatus[proc] = Busy +*/ + +//@node Local types, Statistics gathering, Processor related stuff, Headers for GranSim objs used only in the RTS internally +//@subsection Local types + +/* Return codes of HandleFetchRequest: + 0 ... ok (FETCHREPLY event with a buffer containing addresses of the + nearby graph has been scheduled) + 1 ... node is already local (fetched by somebody else; no event is + scheduled in here) + 2 ... fetch request has been forwrded to the PE that now contains the + node + 3 ... node is a black hole (BH, BQ or RBH); no event is scheduled, and + the current TSO is put into the blocking queue of that node + 4 ... out of heap in PackNearbyGraph; GC should be triggered in calling + function to guarantee that the tso and node inputs are valid + (they may be moved during GC). + Return codes of blockFetch: + 0 ... ok; tso is now at beginning of BQ attached to the bh closure + 1 ... the bh closure is no BH any more; tso is immediately unblocked +*/ + +typedef enum rtsFetchReturnCode_ { + Ok = 0, + NodeIsLocal, + NodeHasMoved, + NodeIsBH, + NodeIsNoBH, + OutOfHeap, +} rtsFetchReturnCode; + +//@node Statistics gathering, Prototypes, Local types, Headers for GranSim objs used only in the RTS internally +//@subsection Statistics gathering + +extern unsigned int /* nat */ OutstandingFetches[], OutstandingFishes[]; +extern rtsProcStatus procStatus[]; +extern StgTSO *BlockedOnFetch[]; + +/* global structure for collecting statistics */ +typedef struct GlobalGranStats_ { + /* event stats */ + nat noOfEvents; + nat event_counts[MAX_EVENT]; + + /* communication stats */ + nat fetch_misses; + nat tot_fake_fetches; // GranSim internal; faked Fetches are a kludge!! + nat tot_low_pri_sparks; + + /* load distribution statistics */ + nat rs_sp_count, rs_t_count, ntimes_total, fl_total, + no_of_steals, no_of_migrates; + + /* spark queue stats */ + nat tot_sq_len, tot_sq_probes, tot_sparks; + nat tot_add_threads, tot_tq_len, non_end_add_threads; + + /* packet statistics */ + nat tot_packets, tot_packet_size, tot_cuts, tot_thunks; + + /* thread stats */ + nat tot_threads_created, threads_created_on_PE[MAX_PROC], + tot_TSOs_migrated; + + /* spark stats */ + nat pruned_sparks, withered_sparks; + nat tot_sparks_created, sparks_created_on_PE[MAX_PROC]; + + /* scheduling stats */ + nat tot_yields; + + /* blocking queue statistics */ + rtsTime tot_bq_processing_time; + nat tot_bq_len, tot_bq_len_local, tot_awbq, tot_FMBQs; +} GlobalGranStats; + +extern GlobalGranStats globalGranStats; + +//@node Prototypes, , Statistics gathering, Headers for GranSim objs used only in the RTS internally +//@subsection Prototypes + +/* Generally useful fcts */ +PEs where_is(StgClosure *node); +rtsBool is_unique(StgClosure *node); + +/* Prototypes of event handling functions; needed in Schedule.c:ReSchedule() */ +void do_the_globalblock (rtsEvent* event); +void do_the_unblock (rtsEvent* event); +void do_the_fetchnode (rtsEvent* event); +void do_the_fetchreply (rtsEvent* event); +void do_the_movethread (rtsEvent* event); +void do_the_movespark (rtsEvent* event); +void do_the_startthread(rtsEvent *event); +void do_the_findwork(rtsEvent* event); +void gimme_spark (rtsEvent *event, rtsBool *found_res, rtsSparkQ *spark_res); +rtsBool munch_spark (rtsEvent *event, rtsSparkQ spark); + +/* GranSimLight routines */ +void GranSimLight_enter_system(rtsEvent *event, StgTSO **ActiveTSOp); +void GranSimLight_leave_system(rtsEvent *event, StgTSO **ActiveTSOp); + +/* Communication related routines */ +rtsFetchReturnCode fetchNode(StgClosure* node, PEs from, PEs to); +rtsFetchReturnCode handleFetchRequest(StgClosure* node, PEs curr_proc, PEs p, StgTSO* tso); +rtsFetchReturnCode blockFetch(StgTSO* tso, PEs proc, StgClosure* bh); +void handleIdlePEs(void); + +long int random(void); /* used in stealSpark() and stealThread() in GranSim.c */ + +/* Scheduling fcts defined in GranSim.c */ +void insertThread(StgTSO *tso, PEs proc); +void endThread(StgTSO *tso, PEs proc); +rtsBool GranSimLight_insertThread(StgTSO *tso, PEs proc); +nat thread_queue_len(PEs proc); + +/* For debugging */ +rtsBool is_on_queue (StgTSO *tso, PEs proc); + +/* Interface for dumping routines (i.e. writing to log file) */ +void DumpGranEvent(GranEventType name, StgTSO *tso); +void DumpEndEvent(PEs proc, StgTSO *tso, rtsBool mandatory_thread); +void DumpTSO(StgTSO *tso); +void DumpRawGranEvent(PEs proc, PEs p, GranEventType name, + StgTSO *tso, StgClosure *node, StgInt sparkname, StgInt len); + +#endif + +#endif /* GRANSIM_RTS_H */ diff --git a/ghc/rts/parallel/HLC.h b/ghc/rts/parallel/HLC.h new file mode 100644 index 0000000..f2d98d4 --- /dev/null +++ b/ghc/rts/parallel/HLC.h @@ -0,0 +1,59 @@ +/* -------------------------------------------------------------------------- + Time-stamp: + $Id: HLC.h,v 1.2 2000/01/13 14:34:07 hwloidl Exp $ + + High Level Communications Header (HLC.h) + + Contains the high-level definitions (i.e. communication + subsystem independent) used by GUM + Phil Trinder, Glasgow University, 12 December 1994 + H-W. Loidl, Heriot-Watt, November 1999 + ----------------------------------------------------------------------- */ + +#ifndef __HLC_H +#define __HLC_H + +#ifdef PAR + +#include "LLC.h" + +#define NEW_FISH_AGE 0 +#define NEW_FISH_HISTORY 0 +#define NEW_FISH_HUNGER 0 +#define FISH_LIFE_EXPECTANCY 10 + + +//@node GUM Message Sending and Unpacking Functions +//@subsection GUM Message Sending and Unpacking Functions + +rtsBool initMoreBuffers(void); + +void sendFetch (globalAddr *ga, globalAddr *bqga, int load); +void sendResume(globalAddr *rga, int nelem, rtsPackBuffer *data); +void sendAck (GlobalTaskId task, int ngas, globalAddr *gagamap); +void sendFish (GlobalTaskId destPE, GlobalTaskId origPE, int age, int history, int hunger); +void sendFree (GlobalTaskId destPE, int nelem, P_ data); +void sendSchedule(GlobalTaskId origPE, int nelem, rtsPackBuffer *data); + +//@node Message-Processing Functions +//@subsection Message-Processing Functions + +void processMessages(void); +void processFetches(void); +void processTheRealFetches(void); + +//@node Miscellaneous Functions +//@subsection Miscellaneous Functions + +void prepareFreeMsgBuffers(void); +void freeRemoteGA (int pe, globalAddr *ga); +void sendFreeMessages(void); + +GlobalTaskId choosePE(void); +StgClosure *createBlockedFetch (globalAddr ga, globalAddr rga); +void waitForTermination(void); + +void DebugPrintGAGAMap (globalAddr *gagamap, int nGAs); + +#endif /* PAR */ +#endif /* __HLC_H */ diff --git a/ghc/rts/parallel/HLComms.c b/ghc/rts/parallel/HLComms.c new file mode 100644 index 0000000..bce0de7 --- /dev/null +++ b/ghc/rts/parallel/HLComms.c @@ -0,0 +1,1305 @@ +/* ---------------------------------------------------------------------------- + * Time-stamp: + * $Id: HLComms.c,v 1.2 2000/01/13 14:34:07 hwloidl Exp $ + * + * High Level Communications Routines (HLComms.lc) + * + * Contains the high-level routines (i.e. communication + * subsystem independent) used by GUM + * + * Phil Trinder, Glasgow University, 12 December 1994 + * Adapted for new RTS + * Phil Trinder, Simon Marlow July 1998 + * H-W. Loidl, Heriot-Watt University, November 1999 + * + * ------------------------------------------------------------------------- */ + +#ifdef PAR /* whole file */ + +//@node High Level Communications Routines, , , +//@section High Level Communications Routines + +//@menu +//* Macros etc:: +//* Includes:: +//* GUM Message Sending and Unpacking Functions:: +//* Message-Processing Functions:: +//* GUM Message Processor:: +//* Miscellaneous Functions:: +//* Index:: +//@end menu + +//@node Macros etc, Includes, High Level Communications Routines, High Level Communications Routines +//@subsection Macros etc + +# ifndef _AIX +# define NON_POSIX_SOURCE /* so says Solaris */ +# endif + +//@node Includes, GUM Message Sending and Unpacking Functions, Macros etc, High Level Communications Routines +//@subsection Includes + +#include "Rts.h" +#include "RtsUtils.h" +#include "RtsFlags.h" +#include "Storage.h" // for recordMutable +#include "HLC.h" +#include "Parallel.h" +#include "GranSimRts.h" +#include "ParallelRts.h" +#include "FetchMe.h" // for BLOCKED_FETCH_info etc +#if defined(DEBUG) +# include "ParallelDebug.h" +#endif +#include "StgMacros.h" // inlined IS_... fcts + +//@node GUM Message Sending and Unpacking Functions, Message-Processing Functions, Includes, High Level Communications Routines +//@subsection GUM Message Sending and Unpacking Functions + +/* + * GUM Message Sending and Unpacking Functions + */ + +/* + * Allocate space for message processing + */ + +//@cindex gumPackBuffer +static rtsPackBuffer *gumPackBuffer; + +//@cindex initMoreBuffers +rtsBool +initMoreBuffers(void) +{ + if ((gumPackBuffer = (rtsPackBuffer *)stgMallocWords(RtsFlags.ParFlags.packBufferSize, + "initMoreBuffers")) == NULL) + return rtsFalse; + return rtsTrue; +} + +/* + * SendFetch packs the two global addresses and a load into a message + + * sends it. + +//@cindex FETCH + + Structure of a FETCH message: + + | GA 1 | GA 2 | + +------------------------------------+------+ + | gtid | slot | weight | gtid | slot | load | + +------------------------------------+------+ + */ + +//@cindex sendFetch +void +sendFetch(globalAddr *rga, globalAddr *lga, int load) +{ + ASSERT(rga->weight > 0 && lga->weight > 0); + IF_PAR_DEBUG(fetch, + belch("** [%x] Sending Fetch for ((%x, %d, 0)); locally ((%x, %d, %x)), load = %d", + mytid, + rga->payload.gc.gtid, rga->payload.gc.slot, + lga->payload.gc.gtid, lga->payload.gc.slot, lga->weight, + load)); + + + /* ToDo: Dump event + DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(rga->payload.gc.gtid), + GR_FETCH, CurrentTSO, (StgClosure *)(lga->payload.gc.slot), + 0, spark_queue_len(ADVISORY_POOL)); + */ + + sendOpV(PP_FETCH, rga->payload.gc.gtid, 6, + (StgWord) rga->payload.gc.gtid, (StgWord) rga->payload.gc.slot, + (StgWord) lga->weight, (StgWord) lga->payload.gc.gtid, + (StgWord) lga->payload.gc.slot, (StgWord) load); +} + +/* + * unpackFetch unpacks a FETCH message into two Global addresses and a load + * figure. +*/ + +//@cindex unpackFetch +static void +unpackFetch(globalAddr *lga, globalAddr *rga, int *load) +{ + long buf[6]; + + GetArgs(buf, 6); + + IF_PAR_DEBUG(fetch, + belch("** [%x] Unpacking Fetch for ((%x, %d, 0)) to ((%x, %d, %x)), load = %d", + mytid, + (GlobalTaskId) buf[0], (int) buf[1], + (GlobalTaskId) buf[3], (int) buf[4], buf[2], buf[5])); + + lga->weight = 1; + lga->payload.gc.gtid = (GlobalTaskId) buf[0]; + lga->payload.gc.slot = (int) buf[1]; + + rga->weight = (unsigned) buf[2]; + rga->payload.gc.gtid = (GlobalTaskId) buf[3]; + rga->payload.gc.slot = (int) buf[4]; + + *load = (int) buf[5]; + + ASSERT(rga->weight > 0); +} + +/* + * SendResume packs the remote blocking queue's GA and data into a message + * and sends it. + +//@cindex RESUME + + Structure of a RESUME message: + + ------------------------------- + | weight | slot | n | data ... + ------------------------------- + + data is a packed graph represented as an rtsPackBuffer + n is the size of the graph (as returned by PackNearbyGraph) + packet hdr size + */ + +//@cindex sendResume +void +sendResume(globalAddr *rga, int nelem, rtsPackBuffer *data) // StgPtr data) +{ + IF_PAR_DEBUG(resume, + PrintPacket(data); + belch("[] [%x] Sending Resume for ((%x, %d, %x))", + mytid, + rga->payload.gc.gtid, rga->payload.gc.slot, rga->weight)); + + sendOpNV(PP_RESUME, rga->payload.gc.gtid, + nelem + PACK_BUFFER_HDR_SIZE, (StgPtr)data, + 2, (rtsWeight) rga->weight, (StgWord) rga->payload.gc.slot); +} + +/* + * unpackResume unpacks a Resume message into two Global addresses and + * a data array. + */ + +//@cindex unpackResume +static void +unpackResume(globalAddr *lga, int *nelem, rtsPackBuffer *data) +{ + long buf[3]; + + GetArgs(buf, 3); + + IF_PAR_DEBUG(resume, + belch("[] [%x] Unpacking Resume for ((%x, %d, %x))", + mytid, mytid, + (int) buf[1], (unsigned) buf[0])); + + /* + RESUME event is written in awaken_blocked_queue + DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(lga->payload.gc.gtid), + GR_RESUME, END_TSO_QUEUE, (StgClosure *)NULL, 0, 0); + */ + + lga->weight = (unsigned) buf[0]; + lga->payload.gc.gtid = mytid; + lga->payload.gc.slot = (int) buf[1]; + + *nelem = (int) buf[2]; // includes PACK_BUFFER_HDR_SIZE; + GetArgs(data, *nelem); + *nelem -= PACK_BUFFER_HDR_SIZE; +} + +/* + * SendAck packs the global address being acknowledged, together with + * an array of global addresses for any closures shipped and sends them. + +//@cindex ACK + + Structure of an ACK message: + + | GA 1 | GA 2 | + +---------------------------------------------+------- + | weight | gtid | slot | weight | gtid | slot | ..... ngas times + + --------------------------------------------+------- + + */ + +//@cindex sendAck +void +sendAck(GlobalTaskId task, int ngas, globalAddr *gagamap) +{ + static long *buffer; + long *p; + int i; + + buffer = (long *) gumPackBuffer; + + for(i = 0, p = buffer; i < ngas; i++, p += 6) { + ASSERT(gagamap[1].weight > 0); + p[0] = (long) gagamap->weight; + p[1] = (long) gagamap->payload.gc.gtid; + p[2] = (long) gagamap->payload.gc.slot; + gagamap++; + p[3] = (long) gagamap->weight; + p[4] = (long) gagamap->payload.gc.gtid; + p[5] = (long) gagamap->payload.gc.slot; + gagamap++; + } + IF_PAR_DEBUG(ack, + belch(",, [%x] Sending Ack (%d pairs) to PE %x\n", + mytid, ngas, task)); + + sendOpN(PP_ACK, task, p - buffer, buffer); +} + +/* + * unpackAck unpacks an Acknowledgement message into a Global address, + * a count of the number of global addresses following and a map of + * Global addresses + */ + +//@cindex unpackAck +static void +unpackAck(int *ngas, globalAddr *gagamap) +{ + long GAarraysize; + long buf[6]; + + GetArgs(&GAarraysize, 1); + + *ngas = GAarraysize / 6; + + IF_PAR_DEBUG(ack, + belch(",, [%x] Unpacking Ack (%d pairs) on %x\n", + mytid, *ngas, mytid)); + + while (GAarraysize > 0) { + GetArgs(buf, 6); + gagamap->weight = (rtsWeight) buf[0]; + gagamap->payload.gc.gtid = (GlobalTaskId) buf[1]; + gagamap->payload.gc.slot = (int) buf[2]; + gagamap++; + gagamap->weight = (rtsWeight) buf[3]; + gagamap->payload.gc.gtid = (GlobalTaskId) buf[4]; + gagamap->payload.gc.slot = (int) buf[5]; + ASSERT(gagamap->weight > 0); + gagamap++; + GAarraysize -= 6; + } +} + +/* + * SendFish packs the global address being acknowledged, together with + * an array of global addresses for any closures shipped and sends them. + +//@cindex FISH + + Structure of a FISH message: + + +----------------------------------+ + | orig PE | age | history | hunger | + +----------------------------------+ + */ + +//@cindex sendFish +void +sendFish(GlobalTaskId destPE, GlobalTaskId origPE, + int age, int history, int hunger) +{ + IF_PAR_DEBUG(fish, + belch("$$ [%x] Sending Fish to %x (%d outstanding fishes)", + mytid, destPE, outstandingFishes)); + + sendOpV(PP_FISH, destPE, 4, + (StgWord) origPE, (StgWord) age, (StgWord) history, (StgWord) hunger); + + if (origPE == mytid) { + //fishing = rtsTrue; + outstandingFishes++; + } +} + +/* + * unpackFish unpacks a FISH message into the global task id of the + * originating PE and 3 data fields: the age, history and hunger of the + * fish. The history + hunger are not currently used. + + */ + +//@cindex unpackFish +static void +unpackFish(GlobalTaskId *origPE, int *age, int *history, int *hunger) +{ + long buf[4]; + + GetArgs(buf, 4); + + IF_PAR_DEBUG(fish, + belch("$$ [%x] Unpacking Fish from PE %x (age=%d)", + mytid, (GlobalTaskId) buf[0], (int) buf[1])); + + *origPE = (GlobalTaskId) buf[0]; + *age = (int) buf[1]; + *history = (int) buf[2]; + *hunger = (int) buf[3]; +} + +/* + * SendFree sends (weight, slot) pairs for GAs that we no longer need + * references to. + +//@cindex FREE + + Structure of a FREE message: + + +----------------------------- + | n | weight_1 | slot_1 | ... + +----------------------------- + */ +//@cindex sendFree +void +sendFree(GlobalTaskId pe, int nelem, StgPtr data) +{ + IF_PAR_DEBUG(free, + belch("!! [%x] Sending Free (%d GAs) to %x", + mytid, nelem/2, pe)); + + sendOpN(PP_FREE, pe, nelem, data); +} + +/* + * unpackFree unpacks a FREE message into the amount of data shipped and + * a data block. + */ +//@cindex unpackFree +static void +unpackFree(int *nelem, rtsPackBuffer *data) +{ + long buf[1]; + + GetArgs(buf, 1); + *nelem = (int) buf[0]; + + IF_PAR_DEBUG(free, + belch("!! [%x] Unpacking Free (%d GAs)", + mytid, *nelem/2)); + + GetArgs(data, *nelem); +} + +/* + * SendSchedule sends a closure to be evaluated in response to a Fish + * message. The message is directed to the PE that originated the Fish + * (origPE), and includes the packed closure (data) along with its size + * (nelem). + +//@cindex SCHEDULE + + Structure of a SCHEDULE message: + + +------------------------------------ + | PE | n | pack buffer of a graph ... + +------------------------------------ + */ +//@cindex sendSchedule +void +sendSchedule(GlobalTaskId origPE, int nelem, rtsPackBuffer *data) // StgPtr data) +{ + IF_PAR_DEBUG(schedule, + PrintPacket(data); + belch("-- [%x] Sending Schedule (%d elems) to %x\n", + mytid, nelem, origPE)); + + sendOpN(PP_SCHEDULE, origPE, nelem + PACK_BUFFER_HDR_SIZE, (StgPtr)data); +} + +/* + * unpackSchedule unpacks a SCHEDULE message into the Global address of + * the closure shipped, the amount of data shipped (nelem) and the data + * block (data). + */ + +//@cindex unpackSchedule +static void +unpackSchedule(int *nelem, rtsPackBuffer *data) +{ + long buf[1]; + + GetArgs(buf, 1); + /* no. of elems, not counting the header of the pack buffer */ + *nelem = (int) buf[0] - PACK_BUFFER_HDR_SIZE; + + IF_PAR_DEBUG(schedule, + belch("-- [%x] Unpacking Schedule (%d elems) on %x\n", + mytid, *nelem)); + + /* automatic cast of flat pvm-data to rtsPackBuffer */ + GetArgs(data, *nelem + PACK_BUFFER_HDR_SIZE); +} + +//@node Message-Processing Functions, GUM Message Processor, GUM Message Sending and Unpacking Functions, High Level Communications Routines +//@subsection Message-Processing Functions + +/* + * Message-Processing Functions + * + * The following routines process incoming GUM messages. Often reissuing + * messages in response. + * + * processFish unpacks a fish message, reissuing it if it's our own, + * sending work if we have it or sending it onwards otherwise. + */ + +/* + * blockFetch blocks a BlockedFetch node on some kind of black hole. + */ +//@cindex blockFetch +static void +blockFetch(StgBlockedFetch *bf, StgClosure *bh) { + bf->node = bh; + switch (get_itbl(bh)->type) { + case BLACKHOLE: + bf->link = END_BQ_QUEUE; + //((StgBlockingQueue *)bh)->header.info = &BLACKHOLE_BQ_info; + SET_INFO(bh, &BLACKHOLE_BQ_info); // turn closure into a blocking queue + ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf; + + // put bh on the mutables list + recordMutable((StgMutClosure *)bh); + +# if 0 + /* + * If we modify a black hole in the old generation, we have to + * make sure it goes on the mutables list + */ + + if (bh <= StorageMgrInfo.OldLim) { + MUT_LINK(bh) = (StgWord) StorageMgrInfo.OldMutables; + StorageMgrInfo.OldMutables = bh; + } else + MUT_LINK(bh) = MUT_NOT_LINKED; +# endif + break; + + case BLACKHOLE_BQ: + /* enqueue bf on blocking queue of closure bh */ + bf->link = ((StgBlockingQueue *)bh)->blocking_queue; + ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf; + + // put bh on the mutables list; ToDo: check + recordMutable((StgMutClosure *)bh); + break; + + case FETCH_ME_BQ: + /* enqueue bf on blocking queue of closure bh */ + bf->link = ((StgFetchMeBlockingQueue *)bh)->blocking_queue; + ((StgFetchMeBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf; + + // put bh on the mutables list; ToDo: check + recordMutable((StgMutClosure *)bh); + break; + + case RBH: + /* enqueue bf on blocking queue of closure bh */ + bf->link = ((StgRBH *)bh)->blocking_queue; + ((StgRBH *)bh)->blocking_queue = (StgBlockingQueueElement *)bf; + + // put bh on the mutables list; ToDo: check + recordMutable((StgMutClosure *)bh); + break; + + default: + barf("Panic (blockFetch): thought %p was a black hole (IP %#lx, %s)", + (StgClosure *)bh, get_itbl((StgClosure *)bh), + info_type((StgClosure *)bh)); + } + IF_PAR_DEBUG(verbose, + belch("## blockFetch: after block the BQ of %p (%s) is:", + bh, info_type(bh)); + print_bq(bh)); +} + + +/* + * processFetches constructs and sends resume messages for every + * BlockedFetch which is ready to be awakened. + * awaken_blocked_queue (in Schedule.c) is responsible for moving + * BlockedFetches from a blocking queue to the PendingFetches queue. + */ +void GetRoots(void); +extern StgBlockedFetch *PendingFetches; + +nat +pending_fetches_len(void) +{ + StgBlockedFetch *bf; + nat n; + + for (n=0, bf=PendingFetches; bf != END_BF_QUEUE; n++, bf = (StgBlockedFetch *)(bf->link)) { + ASSERT(get_itbl(bf)->type==BLOCKED_FETCH); + } + return n; +} + +//@cindex processFetches +void +processFetches(void) { + StgBlockedFetch *bf; + StgClosure *closure, *next; + StgInfoTable *ip; + globalAddr rga; + static rtsPackBuffer *packBuffer; + + IF_PAR_DEBUG(verbose, + belch("__ processFetches: %d pending fetches", + pending_fetches_len())); + + for (bf = PendingFetches; + bf != END_BF_QUEUE; + bf=(StgBlockedFetch *)(bf->link)) { + /* the PendingFetches list contains only BLOCKED_FETCH closures */ + ASSERT(get_itbl(bf)->type==BLOCKED_FETCH); + + /* + * Find the target at the end of the indirection chain, and + * process it in much the same fashion as the original target + * of the fetch. Though we hope to find graph here, we could + * find a black hole (of any flavor) or even a FetchMe. + */ + closure = bf->node; + /* + HACK 312: bf->node may have been evacuated since filling it; follow + the evacuee in this case; the proper way to handle this is to + traverse the blocking queue and update the node fields of + BLOCKED_FETCH entries when evacuating an BLACKHOLE_BQ, FETCH_ME_BQ + or RBH (but it's late and I'm tired) + */ + if (get_itbl(closure)->type == EVACUATED) + closure = ((StgEvacuated *)closure)->evacuee; + + while ((next = IS_INDIRECTION(closure)) != NULL) { closure = next; } + + ip = get_itbl(closure); + if (ip->type == FETCH_ME) { + /* Forward the Fetch to someone else */ + rga.payload.gc.gtid = bf->ga.payload.gc.gtid; + rga.payload.gc.slot = bf->ga.payload.gc.slot; + rga.weight = bf->ga.weight; + + sendFetch(((StgFetchMe *)closure)->ga, &rga, 0 /* load */); + + IF_PAR_DEBUG(forward, + belch("__ processFetches: Forwarding fetch from %lx to %lx", + mytid, rga.payload.gc.gtid)); + + } else if (IS_BLACK_HOLE(closure)) { + IF_PAR_DEBUG(verbose, + belch("__ processFetches: trying to send a BLACK_HOLE => doign a blockFetch on closure %p (%s)", + closure, info_type(closure))); + bf->node = closure; + blockFetch(bf, closure); + } else { + /* We now have some local graph to send back */ + nat size; + + packBuffer = gumPackBuffer; + IF_PAR_DEBUG(verbose, + belch("__ processFetches: PackNearbyGraph of closure %p (%s)", + closure, info_type(closure))); + + if ((packBuffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size)) == NULL) { + // Put current BF back on list + bf->link = (StgBlockingQueueElement *)PendingFetches; + PendingFetches = (StgBlockedFetch *)bf; + // ToDo: check that nothing more has to be done to prepare for GC! + GarbageCollect(GetRoots); + bf = PendingFetches; + PendingFetches = (StgBlockedFetch *)(bf->link); + closure = bf->node; + packBuffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size); + ASSERT(packBuffer != (rtsPackBuffer *)NULL); + } + rga.payload.gc.gtid = bf->ga.payload.gc.gtid; + rga.payload.gc.slot = bf->ga.payload.gc.slot; + rga.weight = bf->ga.weight; + + sendResume(&rga, size, packBuffer); + } + } + PendingFetches = END_BF_QUEUE; +} + +#if 0 +/* + Alternatively to sending fetch messages directly from the FETCH_ME_entry + code we could just store the data about the remote data in a global + variable and send the fetch request from the main scheduling loop (similar + to processFetches above). This would save an expensive STGCALL in the entry + code because we have to go back to the scheduler anyway. +*/ +//@cindex processFetches +void +processTheRealFetches(void) { + StgBlockedFetch *bf; + StgClosure *closure, *next; + + IF_PAR_DEBUG(verbose, + belch("__ processTheRealFetches: "); + printGA(&theGlobalFromGA); + printGA(&theGlobalToGA)); + + ASSERT(theGlobalFromGA.payload.gc.gtid != 0 && + theGlobalToGA.payload.gc.gtid != 0); + + /* the old version did this in the FETCH_ME entry code */ + sendFetch(&theGlobalFromGA, &theGlobalToGA, 0/*load*/); + +#if DEBUG + theGlobalFromGA.payload.gc.gtid = 0; + theGlobalToGA.payload.gc.gtid = 0; +#endif DEBUG +} +#endif + + +/* + * processFish unpacks a fish message, reissuing it if it's our own, + * sending work if we have it or sending it onwards otherwise. + */ +//@cindex processFish +static void +processFish(void) +{ + GlobalTaskId origPE; + int age, history, hunger; + rtsSpark spark; + static rtsPackBuffer *packBuffer; + + unpackFish(&origPE, &age, &history, &hunger); + + if (origPE == mytid) { + //fishing = rtsFalse; // fish has come home + outstandingFishes--; + last_fish_arrived_at = CURRENT_TIME; // remember time (see schedule fct) + return; // that's all + } + + ASSERT(origPE != mytid); + IF_PAR_DEBUG(fish, + belch("$$ [%x] processing fish; %d sparks available", + mytid, spark_queue_len(ADVISORY_POOL))); + while ((spark = findLocalSpark(rtsTrue)) != NULL) { + nat size; + // StgClosure *graph; + + packBuffer = gumPackBuffer; + ASSERT(closure_SHOULD_SPARK((StgClosure *)spark)); + if ((packBuffer = PackNearbyGraph(spark, END_TSO_QUEUE, &size)) == NULL) { + IF_PAR_DEBUG(fish, + belch("$$ GC while trying to satisfy FISH via PackNearbyGraph of node %p", + (StgClosure *)spark)); + GarbageCollect(GetRoots); + /* Now go back and try again */ + } else { + IF_PAR_DEBUG(fish, + belch("$$ [%x] Replying to FISH from %x by sending graph @ %p (%s)", + mytid, origPE, + (StgClosure *)spark, info_type((StgClosure *)spark))); + sendSchedule(origPE, size, packBuffer); + disposeSpark(spark); + break; + } + } + if (spark == (rtsSpark)NULL) { + IF_PAR_DEBUG(fish, + belch("$$ [%x] No sparks available for FISH from %x", + mytid, origPE)); + /* We have no sparks to give */ + if (age < FISH_LIFE_EXPECTANCY) + /* and the fish is atill young, send it to another PE to look for work */ + sendFish(choosePE(), origPE, + (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER); + + /* otherwise, send it home to die */ + else + sendFish(origPE, origPE, (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER); + } +} /* processFish */ + +/* + * processFetch either returns the requested data (if available) + * or blocks the remote blocking queue on a black hole (if not). + */ + +//@cindex processFetch +static void +processFetch(void) +{ + globalAddr ga, rga; + int load; + StgClosure *closure; + StgInfoTable *ip; + + unpackFetch(&ga, &rga, &load); + IF_PAR_DEBUG(fetch, + belch("%% [%x] Rcvd Fetch for ((%x, %d, 0)), Resume ((%x, %d, %x)) (load %d) from %x", + mytid, + ga.payload.gc.gtid, ga.payload.gc.slot, + rga.payload.gc.gtid, rga.payload.gc.slot, rga.weight, load, + rga.payload.gc.gtid)); + + closure = GALAlookup(&ga); + ASSERT(closure != (StgClosure *)NULL); + ip = get_itbl(closure); + if (ip->type == FETCH_ME) { + /* Forward the Fetch to someone else */ + sendFetch(((StgFetchMe *)closure)->ga, &rga, load); + } else if (rga.payload.gc.gtid == mytid) { + /* Our own FETCH forwarded back around to us */ + StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)GALAlookup(&rga); + + IF_PAR_DEBUG(fetch, + belch("%% [%x] Fetch returned to sending PE; closure=%p (%s); receiver=%p (%s)", + mytid, closure, info_type(closure), fmbq, info_type(fmbq))); + /* We may have already discovered that the fetch target is our own. */ + if ((StgClosure *)fmbq != closure) + CommonUp((StgClosure *)fmbq, closure); + (void) addWeight(&rga); + } else if (IS_BLACK_HOLE(closure)) { + /* This includes RBH's and FMBQ's */ + StgBlockedFetch *bf; + + ASSERT(GALAlookup(&rga) == NULL); + + /* If we're hitting a BH or RBH or FMBQ we have to put a BLOCKED_FETCH + closure into the BQ in order to denote that when updating this node + the result should be sent to the originator of this fetch message. */ + bf = (StgBlockedFetch *)createBlockedFetch(ga, rga); + blockFetch(bf, closure); + + IF_PAR_DEBUG(fetch, + belch("%% [%x] Blocking Fetch ((%x, %d, %x)) on %p (%s)", + mytid, + rga.payload.gc.gtid, rga.payload.gc.slot, rga.weight, + closure, info_type(closure))); + } else { + /* The target of the FetchMe is some local graph */ + nat size; + // StgClosure *graph; + rtsPackBuffer *buffer = (rtsPackBuffer *)NULL; + + if ((buffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size)) == NULL) { + GarbageCollect(GetRoots); + closure = GALAlookup(&ga); + buffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size); + ASSERT(buffer != (rtsPackBuffer *)NULL); + } + sendResume(&rga, size, buffer); + } +} + +/* + * processFree unpacks a FREE message and adds the weights to our GAs. + */ +//@cindex processFree +static void +processFree(void) +{ + int nelem; + static StgWord *buffer; + int i; + globalAddr ga; + + buffer = (StgWord *)gumPackBuffer; + unpackFree(&nelem, buffer); + IF_PAR_DEBUG(free, + belch("!! [%x] Rcvd Free (%d GAs)", mytid, nelem / 2)); + + ga.payload.gc.gtid = mytid; + for (i = 0; i < nelem;) { + ga.weight = (rtsWeight) buffer[i++]; + ga.payload.gc.slot = (int) buffer[i++]; + IF_PAR_DEBUG(free, + fprintf(stderr, "!! [%x] Processing free ", mytid); + printGA(&ga); + fputc('\n', stderr); + ); + (void) addWeight(&ga); + } +} + +/* + * processResume unpacks a RESUME message into the graph, filling in + * the LA -> GA, and GA -> LA tables. Threads blocked on the original + * FetchMe (now a blocking queue) are awakened, and the blocking queue + * is converted into an indirection. Finally it sends an ACK in response + * which contains any newly allocated GAs. + */ + +//@cindex processResume +static void +processResume(GlobalTaskId sender) +{ + int nelem; + nat nGAs; + static rtsPackBuffer *packBuffer; + StgClosure *newGraph, *old; + globalAddr lga; + globalAddr *gagamap; + + packBuffer = gumPackBuffer; + unpackResume(&lga, &nelem, (StgPtr)packBuffer); + + IF_PAR_DEBUG(resume, + fprintf(stderr, "[] [%x] Rcvd Resume for ", mytid); + printGA(&lga); + fputc('\n', stderr); + PrintPacket((rtsPackBuffer *)packBuffer)); + + /* + * We always unpack the incoming graph, even if we've received the + * requested node in some other data packet (and already awakened + * the blocking queue). + if (SAVE_Hp + packBuffer[0] >= SAVE_HpLim) { + ReallyPerformThreadGC(packBuffer[0], rtsFalse); + SAVE_Hp -= packBuffer[0]; + } + */ + + // ToDo: Check for GC here !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + /* Do this *after* GC; we don't want to release the object early! */ + + if (lga.weight > 0) + (void) addWeight(&lga); + + old = GALAlookup(&lga); + + if (RtsFlags.ParFlags.ParStats.Full) { + // StgTSO *tso = END_TSO_QUEUE; + StgBlockingQueueElement *bqe; + + /* Write REPLY events to the log file, indicating that the remote + data has arrived */ + if (get_itbl(old)->type == FETCH_ME_BQ || + get_itbl(old)->type == RBH) + for (bqe = ((StgFetchMeBlockingQueue *)old)->blocking_queue; + bqe->link != END_BQ_QUEUE; + bqe = bqe->link) + if (get_itbl((StgClosure *)bqe)->type == TSO) + DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(sender), + GR_REPLY, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure, + 0, spark_queue_len(ADVISORY_POOL)); + } + + newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs); + ASSERT(newGraph != NULL); + + /* + * Sometimes, unpacking will common up the resumee with the + * incoming graph, but if it hasn't, we'd better do so now. + */ + + if (get_itbl(old)->type == FETCH_ME_BQ) + CommonUp(old, newGraph); + + IF_PAR_DEBUG(resume, + DebugPrintGAGAMap(gagamap, nGAs)); + + sendAck(sender, nGAs, gagamap); +} + +/* + * processSchedule unpacks a SCHEDULE message into the graph, filling + * in the LA -> GA, and GA -> LA tables. The root of the graph is added to + * the local spark queue. Finally it sends an ACK in response + * which contains any newly allocated GAs. + */ +//@cindex processSchedule +static void +processSchedule(GlobalTaskId sender) +{ + nat nelem, space_required, nGAs; + rtsBool success; + static rtsPackBuffer *packBuffer; + StgClosure *newGraph; + globalAddr *gagamap; + + packBuffer = gumPackBuffer; /* HWL */ + unpackSchedule(&nelem, packBuffer); + + IF_PAR_DEBUG(schedule, + belch("-- [%x] Rcvd Schedule (%d elems)", mytid, nelem); + PrintPacket(packBuffer)); + + /* + * For now, the graph is a closure to be sparked as an advisory + * spark, but in future it may be a complete spark with + * required/advisory status, priority etc. + */ + + /* + space_required = packBuffer[0]; + if (SAVE_Hp + space_required >= SAVE_HpLim) { + ReallyPerformThreadGC(space_required, rtsFalse); + SAVE_Hp -= space_required; + } + */ + // ToDo: check whether GC is necessary !!!!!!!!!!!!!!!!!!!!!1 + newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs); + ASSERT(newGraph != NULL); + success = add_to_spark_queue(newGraph, rtsFalse); + + IF_PAR_DEBUG(pack, + if (success) + belch("+* added spark to unpacked graph %p; %d sparks available on [%x]", + newGraph, spark_queue_len(ADVISORY_POOL), mytid); + else + belch("+* received non-sparkable closure %p; nothing added to spark pool; %d sparks available on [%x]", + newGraph, spark_queue_len(ADVISORY_POOL), mytid); + belch("-* Unpacked graph with root at %p (%s):", + newGraph, info_type(newGraph)); + PrintGraph(newGraph, 0)); + + IF_PAR_DEBUG(pack, + DebugPrintGAGAMap(gagamap, nGAs)); + + if (nGAs > 0) + sendAck(sender, nGAs, gagamap); + + //fishing = rtsFalse; + ASSERT(outstandingFishes>0); + outstandingFishes--; +} + +/* + * processAck unpacks an ACK, and uses the GAGA map to convert RBH's + * (which represent shared thunks that have been shipped) into fetch-mes + * to remote GAs. + */ +//@cindex processAck +static void +processAck(void) +{ + nat nGAs; + globalAddr *gaga; + globalAddr gagamap[256]; // ToDo: elim magic constant!! MAX_GAS * 2];?? + + unpackAck(&nGAs, gagamap); + + IF_PAR_DEBUG(ack, + belch(",, [%x] Rcvd Ack (%d pairs)", mytid, nGAs); + DebugPrintGAGAMap(gagamap, nGAs)); + + /* + * For each (oldGA, newGA) pair, set the GA of the corresponding + * thunk to the newGA, convert the thunk to a FetchMe, and return + * the weight from the oldGA. + */ + for (gaga = gagamap; gaga < gagamap + nGAs * 2; gaga += 2) { + StgClosure *old_closure = GALAlookup(gaga); + StgClosure *new_closure = GALAlookup(gaga + 1); + + ASSERT(old_closure != NULL); + if (new_closure == NULL) { + /* We don't have this closure, so we make a fetchme for it */ + globalAddr *ga = setRemoteGA(old_closure, gaga + 1, rtsTrue); + + /* convertToFetchMe should be done unconditionally here. + Currently, we assign GAs to CONSTRs, too, (a bit of a hack), + so we have to check whether it is an RBH before converting + + ASSERT(get_itbl(old_closure)==RBH); + */ + if (get_itbl(old_closure)->type==RBH) + convertToFetchMe(old_closure, ga); + } else { + /* + * Oops...we've got this one already; update the RBH to + * point to the object we already know about, whatever it + * happens to be. + */ + CommonUp(old_closure, new_closure); + + /* + * Increase the weight of the object by the amount just + * received in the second part of the ACK pair. + */ + (void) addWeight(gaga + 1); + } + (void) addWeight(gaga); + } +} + +//@node GUM Message Processor, Miscellaneous Functions, Message-Processing Functions, High Level Communications Routines +//@subsection GUM Message Processor + +/* + * GUM Message Processor + + * processMessages processes any messages that have arrived, calling + * appropriate routines depending on the message tag + * (opcode). N.B. Unless profiling it assumes that there {\em ARE} messages + * present and performs a blocking receive! During profiling it + * busy-waits in order to record idle time. + */ + +//@cindex processMessages +void +processMessages(void) +{ + rtsPacket packet; + OpCode opcode; + GlobalTaskId task; + + do { + packet = GetPacket(); /* Get next message; block until one available */ + getOpcodeAndSender(packet, &opcode, &task); + + switch (opcode) { + case PP_FINISH: + IF_PAR_DEBUG(verbose, + belch("== [%x] received FINISH", mytid)); + /* setting this global variables eventually terminates the main + scheduling loop for this PE and causes a shut-down, sending + PP_FINISH to SysMan */ + GlobalStopPending = rtsTrue; + break; + + case PP_FETCH: + processFetch(); + break; + + case PP_RESUME: + processResume(task); + break; + + case PP_ACK: + processAck(); + break; + + case PP_FISH: + processFish(); + break; + + case PP_FREE: + processFree(); + break; + + case PP_SCHEDULE: + processSchedule(task); + break; + + default: + /* Anything we're not prepared to deal with. */ + barf("Task %x: Unexpected opcode %x from %x", + mytid, opcode, task); + } /* switch */ + + } while (PacketsWaiting()); /* While there are messages: process them */ +} /* processMessages */ + +//@node Miscellaneous Functions, Index, GUM Message Processor, High Level Communications Routines +//@subsection Miscellaneous Functions + +/* + * ChoosePE selects a GlobalTaskId from the array of PEs 'at random'. + * Important properties: + * - it varies during execution, even if the PE is idle + * - it's different for each PE + * - we never send a fish to ourselves + */ +extern long lrand48 (void); + +//@cindex choosePE +GlobalTaskId +choosePE(void) +{ + long temp; + + temp = lrand48() % nPEs; + if (allPEs[temp] == mytid) { /* Never send a FISH to yourself */ + temp = (temp + 1) % nPEs; + } + return allPEs[temp]; +} + +/* + * allocate a BLOCKED_FETCH closure and fill it with the relevant fields + * of the ga argument; called from processFetch when the local closure is + * under evaluation + */ +//@cindex createBlockedFetch +StgClosure * +createBlockedFetch (globalAddr ga, globalAddr rga) +{ + StgBlockedFetch *bf; + StgClosure *closure; + + closure = GALAlookup(&ga); + if ((bf = (StgBlockedFetch *)allocate(FIXED_HS + sizeofW(StgBlockedFetch))) == NULL) { + GarbageCollect(GetRoots); + closure = GALAlookup(&ga); + bf = (StgBlockedFetch *)allocate(FIXED_HS + sizeofW(StgBlockedFetch)); + // ToDo: check whether really guaranteed to succeed 2nd time around + } + + ASSERT(bf != (StgClosure *)NULL); + SET_INFO((StgClosure *)bf, &BLOCKED_FETCH_info); + // ToDo: check whether other header info is needed + bf->node = closure; + bf->ga.payload.gc.gtid = rga.payload.gc.gtid; + bf->ga.payload.gc.slot = rga.payload.gc.slot; + bf->ga.weight = rga.weight; + // bf->link = NULL; debugging + + IF_PAR_DEBUG(fetch, + fprintf(stderr, "%% [%x] created BF: closure=%p (%s), GA: ", + mytid, closure, info_type(closure)); + printGA(&(bf->ga)); + fputc('\n',stderr)); + return bf; +} + +/* + * waitForTermination enters a loop ignoring spurious messages while + * waiting for the termination sequence to be completed. + */ +//@cindex waitForTermination +void +waitForTermination(void) +{ + do { + rtsPacket p = GetPacket(); + processUnexpected(p); + } while (rtsTrue); +} + +#ifdef DEBUG +//@cindex DebugPrintGAGAMap +void +DebugPrintGAGAMap(globalAddr *gagamap, int nGAs) +{ + int i; + + for (i = 0; i < nGAs; ++i, gagamap += 2) + fprintf(stderr, "gagamap[%d] = ((%x, %d, %x)) -> ((%x, %d, %x))\n", i, + gagamap[0].payload.gc.gtid, gagamap[0].payload.gc.slot, gagamap[0].weight, + gagamap[1].payload.gc.gtid, gagamap[1].payload.gc.slot, gagamap[1].weight); +} +#endif + +//@cindex freeMsgBuffer +static StgWord **freeMsgBuffer = NULL; +//@cindex freeMsgIndex +static int *freeMsgIndex = NULL; + +//@cindex prepareFreeMsgBuffers +void +prepareFreeMsgBuffers(void) +{ + int i; + + /* Allocate the freeMsg buffers just once and then hang onto them. */ + if (freeMsgIndex == NULL) { + freeMsgIndex = (int *) stgMallocBytes(nPEs * sizeof(int), + "prepareFreeMsgBuffers (Index)"); + freeMsgBuffer = (StgWord **) stgMallocBytes(nPEs * sizeof(long *), + "prepareFreeMsgBuffers (Buffer)"); + + for(i = 0; i < nPEs; i++) + if (i != thisPE) + freeMsgBuffer[i] = (StgPtr) stgMallocWords(RtsFlags.ParFlags.packBufferSize, + "prepareFreeMsgBuffers (Buffer #i)"); + } + + /* Initialize the freeMsg buffer pointers to point to the start of their + buffers */ + for (i = 0; i < nPEs; i++) + freeMsgIndex[i] = 0; +} + +//@cindex freeRemoteGA +void +freeRemoteGA(int pe, globalAddr *ga) +{ + int i; + + ASSERT(GALAlookup(ga) == NULL); + + if ((i = freeMsgIndex[pe]) + 2 >= RtsFlags.ParFlags.packBufferSize) { + IF_PAR_DEBUG(free, + belch("Filled a free message buffer (sending remaining messages indivisually)")); + + sendFree(ga->payload.gc.gtid, i, freeMsgBuffer[pe]); + i = 0; + } + freeMsgBuffer[pe][i++] = (StgWord) ga->weight; + freeMsgBuffer[pe][i++] = (StgWord) ga->payload.gc.slot; + freeMsgIndex[pe] = i; + +#ifdef DEBUG + ga->weight = 0x0f0f0f0f; + ga->payload.gc.gtid = 0x666; + ga->payload.gc.slot = 0xdeaddead; +#endif +} + +//@cindex sendFreeMessages +void +sendFreeMessages(void) +{ + int i; + + for (i = 0; i < nPEs; i++) + if (freeMsgIndex[i] > 0) + sendFree(allPEs[i], freeMsgIndex[i], freeMsgBuffer[i]); +} + +#endif /* PAR -- whole file */ + +//@node Index, , Miscellaneous Functions, High Level Communications Routines +//@subsection Index + +//@index +//* ACK:: @cindex\s-+ACK +//* DebugPrintGAGAMap:: @cindex\s-+DebugPrintGAGAMap +//* FETCH:: @cindex\s-+FETCH +//* FISH:: @cindex\s-+FISH +//* FREE:: @cindex\s-+FREE +//* RESUME:: @cindex\s-+RESUME +//* SCHEDULE:: @cindex\s-+SCHEDULE +//* blockFetch:: @cindex\s-+blockFetch +//* choosePE:: @cindex\s-+choosePE +//* freeMsgBuffer:: @cindex\s-+freeMsgBuffer +//* freeMsgIndex:: @cindex\s-+freeMsgIndex +//* freeRemoteGA:: @cindex\s-+freeRemoteGA +//* gumPackBuffer:: @cindex\s-+gumPackBuffer +//* initMoreBuffers:: @cindex\s-+initMoreBuffers +//* prepareFreeMsgBuffers:: @cindex\s-+prepareFreeMsgBuffers +//* processAck:: @cindex\s-+processAck +//* processFetch:: @cindex\s-+processFetch +//* processFetches:: @cindex\s-+processFetches +//* processFish:: @cindex\s-+processFish +//* processFree:: @cindex\s-+processFree +//* processMessages:: @cindex\s-+processMessages +//* processResume:: @cindex\s-+processResume +//* processSchedule:: @cindex\s-+processSchedule +//* sendAck:: @cindex\s-+sendAck +//* sendFetch:: @cindex\s-+sendFetch +//* sendFish:: @cindex\s-+sendFish +//* sendFree:: @cindex\s-+sendFree +//* sendFreeMessages:: @cindex\s-+sendFreeMessages +//* sendResume:: @cindex\s-+sendResume +//* sendSchedule:: @cindex\s-+sendSchedule +//* unpackAck:: @cindex\s-+unpackAck +//* unpackFetch:: @cindex\s-+unpackFetch +//* unpackFish:: @cindex\s-+unpackFish +//* unpackFree:: @cindex\s-+unpackFree +//* unpackResume:: @cindex\s-+unpackResume +//* unpackSchedule:: @cindex\s-+unpackSchedule +//* waitForTermination:: @cindex\s-+waitForTermination +//@end index diff --git a/ghc/rts/parallel/LLC.h b/ghc/rts/parallel/LLC.h new file mode 100644 index 0000000..eb63366 --- /dev/null +++ b/ghc/rts/parallel/LLC.h @@ -0,0 +1,128 @@ +/* -------------------------------------------------------------------------- + Time-stamp: + $Id: LLC.h,v 1.2 2000/01/13 14:34:07 hwloidl Exp $ + + Low Level Communications Header (LLC.h) + + Contains the definitions used by the Low-level Communications + module of the GUM Haskell runtime environment. + Based on the Graph for PVM implementation. + + Phil Trinder, Glasgow University, 13th Dec 1994 + Adapted for the 4.xx RTS + H-W. Loidl, Heriot-Watt, November 1999 + ----------------------------------------------------------------------- */ + +#ifndef __LLC_H +#define __LLC_H + +#ifdef PAR + +//@node Low Level Communications Header, , , +//@section Low Level Communications Header + +//@menu +//* Includes:: +//* Macros and Constants:: +//* PVM macros:: +//* Externs:: +//@end menu + +//@node Includes, Macros and Constants, Low Level Communications Header, Low Level Communications Header +//@subsection Includes + +#include "Rts.h" +#include "Parallel.h" + +#include "PEOpCodes.h" +#include "pvm3.h" + +//@node Macros and Constants, PVM macros, Includes, Low Level Communications Header +//@subsection Macros and Constants + +#define ANY_TASK (-1) /* receive messages from any task */ +#define ANY_GLOBAL_TASK ANY_TASK +#define ANY_OPCODE (-1) /* receive any opcode */ +#define ALL_GROUP (-1) /* wait for barrier from every group member */ + +#define PEGROUP "PE" + +#define MGRGROUP "MGR" +#define PECTLGROUP "PECTL" + + +#define PETASK "PE" + +//@node PVM macros, Externs, Macros and Constants, Low Level Communications Header +//@subsection PVM macros + +#define sync(gp,op) do { \ + broadcast(gp,op); \ + pvm_barrier(gp,ALL_GROUP); \ + } while(0) + +#define broadcast(gp,op) do { \ + pvm_initsend(PvmDataDefault); \ + pvm_bcast(gp,op); \ + } while(0) + +#define checkComms(c,s) do { \ + if ((c)<0) { \ + pvm_perror(s); \ + stg_exit(EXIT_FAILURE); \ + }} while(0) + +#define _my_gtid pvm_mytid() +#define GetPacket() pvm_recv(ANY_TASK,ANY_OPCODE) +#define PacketsWaiting() (pvm_probe(ANY_TASK,ANY_OPCODE) != 0) + +#define SPARK_THREAD_DESCRIPTOR 1 +#define GLOBAL_THREAD_DESCRIPTOR 2 + +#define _extract_jump_field(v) (v) + +#define MAX_DATA_WORDS_IN_PACKET 1024 + +/* basic PVM packing */ +#define PutArg1(a) pvm_pklong(&(a),1,1) +#define PutArg2(a) pvm_pklong(&(a),1,1) +#define PutArgN(n,a) pvm_pklong(&(a),1,1) +#define PutArgs(b,n) pvm_pklong(b,n,1) + +#define PutLit(l) { int a = l; PutArgN(?,a); } + +/* basic PVM unpacking */ +#define GetArg1(a) pvm_upklong(&(a),1,1) +#define GetArg2(a) pvm_upklong(&(a),1,1) +#define GetArgN(n,a) pvm_upklong(&(a),1,1) +#define GetArgs(b,n) pvm_upklong(b,n,1) + +//@node Externs, , PVM macros, Low Level Communications Header +//@subsection Externs + +/* basic message passing routines */ +extern void sendOp (OpCode,GlobalTaskId), + sendOp1 (OpCode,GlobalTaskId,StgWord), + sendOp2 (OpCode,GlobalTaskId,StgWord,StgWord), + sendOpV (OpCode,GlobalTaskId,int,...), + sendOpN (OpCode,GlobalTaskId,int,StgPtr), + sendOpNV (OpCode,GlobalTaskId,int,StgPtr,int,...); + +/* extracting data out of a packet */ +OpCode getOpcode (rtsPacket p); +void getOpcodeAndSender (rtsPacket p, OpCode *popcode, + GlobalTaskId *psender_id); +GlobalTaskId senderTask (rtsPacket p); +rtsPacket waitForPEOp (OpCode op, GlobalTaskId who); + +/* Init and shutdown routines */ +GlobalTaskId *startUpPE (unsigned nPEs); +void shutDownPE(void); + +/* aux functions */ +char *getOpName (unsigned op); // returns string of opcode +void processUnexpected (rtsPacket); +//void NullException(void); + +#endif /*PAR */ +#endif /*defined __LLC_H */ diff --git a/ghc/rts/parallel/LLComms.c b/ghc/rts/parallel/LLComms.c new file mode 100644 index 0000000..c40ae33 --- /dev/null +++ b/ghc/rts/parallel/LLComms.c @@ -0,0 +1,476 @@ +/* ---------------------------------------------------------------------------- + * Time-stamp: + * $Id: LLComms.c,v 1.2 2000/01/13 14:34:07 hwloidl Exp $ + * + * GUM Low-Level Inter-Task Communication + * + * This module defines PVM Routines for PE-PE communication. + * P. Trinder, December 5th. 1994. + * Adapted for the new RTS + * P. Trinder, July 1998 + * H-W. Loidl, November 1999 + --------------------------------------------------------------------------- */ + +#ifdef PAR /* whole file */ + +//@node GUM Low-Level Inter-Task Communication, , , +//@section GUM Low-Level Inter-Task Communication + +/* + *This module defines the routines which communicate between PEs. The + *code is based on Kevin Hammond's GRIP RTS. (OpCodes.h defines + *PEOp1 etc. in terms of sendOp1 etc.). + * + *Routine & Arguments + * & + *sendOp & 0 \\ + *sendOp1 & 1 \\ + *sendOp2 & 2 \\ + *sendOpN & vector \\ + *sendOpV & variable \\ + *sendOpNV & variable+ vector \\ + * + *First the standard include files. + */ + +//@menu +//* Macros etc:: +//* Includes:: +//* Auxiliary functions:: +//* Index:: +//@end menu + +//@node Macros etc, Includes, GUM Low-Level Inter-Task Communication, GUM Low-Level Inter-Task Communication +//@subsection Macros etc + +#define NON_POSIX_SOURCE /* so says Solaris */ +#define UNUSED /* nothing */ + +//@node Includes, Auxiliary functions, Macros etc, GUM Low-Level Inter-Task Communication +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "Parallel.h" +#include "ParallelRts.h" +#if defined(DEBUG) +# include "ParallelDebug.h" +#endif +#include "LLC.h" + +#ifdef __STDC__ +#include +#else +#include +#endif + +/* Cannot use std macro when compiling for SysMan */ +/* debugging enabled */ +// #define IF_PAR_DEBUG(c,s) { s; } +/* debugging disabled */ +#define IF_PAR_DEBUG(c,s) /* nothing */ + +//@node Auxiliary functions, Index, Includes, GUM Low-Level Inter-Task Communication +//@subsection Auxiliary functions + +/* + * heapChkCounter tracks the number of heap checks since the last probe. + * Not currently used! We check for messages when a thread is resheduled. + */ +int heapChkCounter = 0; + +/* + * Then some miscellaneous functions. + * getOpName returns the character-string name of any OpCode. + */ + +char *UserPEOpNames[] = { PEOP_NAMES }; + +//@cindex getOpName +char * +getOpName(nat op) +{ + if (op >= MIN_PEOPS && op <= MAX_PEOPS) + return (UserPEOpNames[op - MIN_PEOPS]); + else + return ("Unknown PE OpCode"); +} + +/* + * traceSendOp handles the tracing of messages. + */ + +//@cindex traceSendOp +static void +traceSendOp(OpCode op, GlobalTaskId dest UNUSED, + unsigned int data1 UNUSED, unsigned int data2 UNUSED) +{ + char *OpName; + + OpName = getOpName(op); + IF_PAR_DEBUG(trace, + fprintf(stderr," %s [%x,%x] sent from %x to %x", + OpName, data1, data2, mytid, dest)); +} + +/* + * sendOp sends a 0-argument message with OpCode {\em op} to + * the global task {\em task}. + */ + +//@cindex sendOp +void +sendOp(OpCode op, GlobalTaskId task) +{ + traceSendOp(op, task,0,0); + + pvm_initsend(PvmDataRaw); + pvm_send(task, op); +} + +/* + * sendOp1 sends a 1-argument message with OpCode {\em op} + * to the global task {\em task}. + */ + +//@cindex sendOp1 +void +sendOp1(OpCode op, GlobalTaskId task, StgWord arg1) +{ + traceSendOp(op, task, arg1,0); + + pvm_initsend(PvmDataRaw); + PutArg1(arg1); + pvm_send(task, op); +} + + +/* + * sendOp2 is used by the FP code only. + */ + +//@cindex sendOp2 +void +sendOp2(OpCode op, GlobalTaskId task, StgWord arg1, StgWord arg2) +{ + traceSendOp(op, task, arg1, arg2); + + pvm_initsend(PvmDataRaw); + PutArg1(arg1); + PutArg2(arg2); + pvm_send(task, op); +} + +/* + * + * sendOpV takes a variable number of arguments, as specified by {\em n}. + * For example, + * + * sendOpV( PP_STATS, StatsTask, 3, start_time, stop_time, sparkcount); + */ + +//@cindex sendOpV +void +sendOpV(OpCode op, GlobalTaskId task, int n, ...) +{ + va_list ap; + int i; + StgWord arg; + + va_start(ap, n); + + traceSendOp(op, task, 0, 0); + + pvm_initsend(PvmDataRaw); + + for (i = 0; i < n; ++i) { + arg = va_arg(ap, StgWord); + PutArgN(i, arg); + } + va_end(ap); + + pvm_send(task, op); +} + +/* + * + * sendOpNV takes a variable-size datablock, as specified by {\em + * nelem} and a variable number of arguments, as specified by {\em + * narg}. N.B. The datablock and the additional arguments are contiguous + * and are copied over together. For example, + * + * sendOpNV(PP_RESUME, tsoga.pe, 6, nelem, data, + * (W_) ga.weight, (W_) ga.loc.gc.gtid, (W_) ga.loc.gc.slot, + * (W_) tsoga.weight, (W_) tsoga.loc.gc.gtid, (W_) tsoga.loc.gc.slot); + * + * Important: The variable arguments must all be StgWords. + + sendOpNV(_, tid, m, n, data, x1, ..., xm): + + | n elems + +------------------------------ + | x1 | ... | xm | n | data .... + +------------------------------ + */ + +//@cindex sendOpNV +void +sendOpNV(OpCode op, GlobalTaskId task, int nelem, + StgWord *datablock, int narg, ...) +{ + va_list ap; + int i; + StgWord arg; + + va_start(ap, narg); + + traceSendOp(op, task, 0, 0); + IF_PAR_DEBUG(trace, + fprintf(stderr,"sendOpNV: op = %x (%s), task = %x, narg = %d, nelem = %d", + op, getOpName(op), task, narg, nelem)); + + pvm_initsend(PvmDataRaw); + + for (i = 0; i < narg; ++i) { + arg = va_arg(ap, StgWord); + IF_PAR_DEBUG(trace, + fprintf(stderr,"sendOpNV: arg = %d\n",arg)); + PutArgN(i, arg); + } + arg = (StgWord) nelem; + PutArgN(narg, arg); + +/* for (i=0; i < nelem; ++i) fprintf(stderr, "%d ",datablock[i]); */ +/* fprintf(stderr," in sendOpNV\n");*/ + + PutArgs(datablock, nelem); + va_end(ap); + + pvm_send(task, op); +} + +/* + * sendOpN take a variable size array argument, whose size is given by + * {\em n}. For example, + * + * sendOpN( PP_STATS, StatsTask, 3, stats_array); + */ + +//@cindex sendOpN +void +sendOpN(OpCode op, GlobalTaskId task, int n, StgPtr args) +{ + long arg; + + traceSendOp(op, task, 0, 0); + + pvm_initsend(PvmDataRaw); + arg = (long) n; + PutArgN(0, arg); + PutArgs(args, n); + pvm_send(task, op); +} + +/* + * waitForPEOp waits for a packet from global task {\em who} with the + * OpCode {\em op}. Other OpCodes are handled by processUnexpected. + */ +//@cindex waitForPEOp +rtsPacket +waitForPEOp(OpCode op, GlobalTaskId who) +{ + rtsPacket p; + int nbytes; + OpCode opCode; + GlobalTaskId sender_id; + rtsBool match; + + do { + IF_PAR_DEBUG(verbose, + fprintf(stderr,"waitForPEOp: op = %x (%s), who = %x\n", + op, getOpName(op), who)); + + while((p = pvm_recv(ANY_TASK,ANY_OPCODE)) < 0) + pvm_perror("waitForPEOp: Waiting for PEOp"); + + pvm_bufinfo( p, &nbytes, &opCode, &sender_id ); + IF_PAR_DEBUG(verbose, + fprintf(stderr,"waitForPEOp: received: OpCode = %x, sender_id = %x", + opCode, getOpName(opCode), sender_id)); + + match = (op == ANY_OPCODE || op == opCode) && + (who == ANY_TASK || who == sender_id); + + if (match) + return(p); + + /* Handle the unexpected OpCodes */ + processUnexpected(p); + + } while(rtsTrue); +} + +/* + * processUnexpected processes unexpected messages. If the message is a + * FINISH it exits the prgram, and PVM gracefully + */ +//@cindex processUnexpected +void +processUnexpected(rtsPacket packet) +{ + OpCode opCode = getOpcode(packet); + + IF_PAR_DEBUG(verbose, + GlobalTaskId sender = senderTask(packet); + fprintf(stderr,"== [%x] processUnexpected: Received %x (%s), sender %x\n", + mytid, opCode, getOpName(opCode), sender)); + + switch (opCode) { + case PP_FINISH: + stg_exit(EXIT_SUCCESS); + break; + + /* Anything we're not prepared to deal with. Note that ALL OpCodes + are discarded during termination -- this helps prevent bizarre + race conditions. */ + default: + if (!GlobalStopPending) { + GlobalTaskId errorTask; + OpCode opCode; + + getOpcodeAndSender(packet,&opCode,&errorTask); + fprintf(stderr,"Task %x: Unexpected OpCode %x from %x in processUnexpected", + mytid, opCode, errorTask ); + + stg_exit(EXIT_FAILURE); + } + } +} + +//@cindex getOpcode +OpCode +getOpcode(rtsPacket p) +{ + int nbytes; + OpCode OpCode; + GlobalTaskId sender_id; + pvm_bufinfo(p, &nbytes, &OpCode, &sender_id); + return(OpCode); +} + +//@cindex getOpcodeAndSender +void +getOpcodeAndSender(rtsPacket p, OpCode *opCodep, GlobalTaskId *senderIdp) +{ + int nbytes; + pvm_bufinfo(p, &nbytes, opCodep, senderIdp); +} + +//@cindex senderTask +GlobalTaskId +senderTask(rtsPacket p) +{ + int nbytes; + OpCode opCode; + GlobalTaskId sender_id; + pvm_bufinfo(p, &nbytes, &opCode, &sender_id); + return(sender_id); +} + +/* + * PEStartUp does the low-level comms specific startup stuff for a + * PE. It initialises the comms system, joins the appropriate groups, + * synchronises with the other PEs. Receives and records in a global + * variable the task-id of SysMan. If this is the main thread (discovered + * in main.lc), identifies itself to SysMan. Finally it receives + * from SysMan an array of the Global Task Ids of each PE, which is + * returned as the value of the function. + */ + +//@cindex startUpPE +GlobalTaskId * +startUpPE(nat nPEs) +{ + int i; + rtsPacket addr; + long *buffer = (long *) stgMallocBytes(sizeof(long) * nPEs, + "PEStartUp (buffer)"); + GlobalTaskId *thePEs = (GlobalTaskId *) + stgMallocBytes(sizeof(GlobalTaskId) * nPEs, + "PEStartUp (PEs)"); + + mytid = _my_gtid; /* Initialise PVM and get task id into global var.*/ + + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] PEStartup: Task id = [%x], No. PEs = %d \n", + mytid, mytid, nPEs)); + checkComms(pvm_joingroup(PEGROUP), "PEStartup"); + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] PEStartup: Joined PEGROUP\n", mytid)); + checkComms(pvm_joingroup(PECTLGROUP), "PEStartup"); + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] PEStartup: Joined PECTLGROUP\n", mytid)); + checkComms(pvm_barrier(PECTLGROUP, nPEs+1), "PEStartup"); + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] PEStartup, Passed PECTLGROUP barrier\n", mytid)); + + addr = waitForPEOp(PP_SYSMAN_TID, ANY_GLOBAL_TASK); + SysManTask = senderTask(addr); + if (IAmMainThread) { /* Main Thread Identifies itself to SysMan */ + pvm_initsend(PvmDataDefault); + pvm_send(SysManTask, PP_MAIN_TASK); + } + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] Thread waits for %s\n", + mytid, getOpName(PP_PETIDS))); + addr = waitForPEOp(PP_PETIDS, ANY_GLOBAL_TASK); + GetArgs(buffer, nPEs); + for (i = 0; i < nPEs; ++i) { + thePEs[i] = (GlobalTaskId) buffer[i]; + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] PEStartup: PEs[%d] = %x \n", + mytid, i, thePEs[i])); + } + free(buffer); + return thePEs; +} + +/* + * PEShutdown does the low-level comms-specific shutdown stuff for a + * single PE. It leaves the groups and then exits from pvm. + */ +//@cindex shutDownPE +void +shutDownPE(void) +{ + IF_PAR_DEBUG(verbose, + fprintf(stderr, "== [%x] PEshutdown\n", mytid)); + + checkComms(pvm_lvgroup(PEGROUP),"PEShutDown"); + checkComms(pvm_lvgroup(PECTLGROUP),"PEShutDown"); + checkComms(pvm_exit(),"PEShutDown"); +} + +#endif /* PAR -- whole file */ + +//@node Index, , Auxiliary functions, GUM Low-Level Inter-Task Communication +//@subsection Index + +//@index +//* getOpName:: @cindex\s-+getOpName +//* traceSendOp:: @cindex\s-+traceSendOp +//* sendOp:: @cindex\s-+sendOp +//* sendOp1:: @cindex\s-+sendOp1 +//* sendOp2:: @cindex\s-+sendOp2 +//* sendOpV:: @cindex\s-+sendOpV +//* sendOpNV:: @cindex\s-+sendOpNV +//* sendOpN:: @cindex\s-+sendOpN +//* waitForPEOp:: @cindex\s-+waitForPEOp +//* processUnexpected:: @cindex\s-+processUnexpected +//* getOpcode:: @cindex\s-+getOpcode +//* getOpcodeAndSender:: @cindex\s-+getOpcodeAndSender +//* senderTask:: @cindex\s-+senderTask +//* startUpPE:: @cindex\s-+startUpPE +//* shutDownPE:: @cindex\s-+shutDownPE +//@end index diff --git a/ghc/rts/parallel/PEOpCodes.h b/ghc/rts/parallel/PEOpCodes.h new file mode 100644 index 0000000..8380f46 --- /dev/null +++ b/ghc/rts/parallel/PEOpCodes.h @@ -0,0 +1,52 @@ +#ifndef PEOPCODES_H +#define PEOPCODES_H + +/************************************************************************ +* PEOpCodes.h * +* * +* This file contains definitions for all the GUM PE Opcodes * +* It's based on the GRAPH for PVM version * +* Phil Trinder, Glasgow University 8th December 1994 * +* * +************************************************************************/ + +#define REPLY_OK 0x00 + +/*Startup + Shutdown*/ +#define PP_SYSMAN_TID 0x50 +#define PP_MAIN_TASK 0x51 +#define PP_FINISH 0x52 +#define PP_PETIDS 0x53 + +/* Stats stuff */ +#define PP_STATS 0x54 +#define PP_STATS_ON 0x55 +#define PP_STATS_OFF 0x56 + +#define PP_FAIL 0x57 + +/*Garbage Collection*/ +#define PP_GC_INIT 0x58 +#define PP_FULL_SYSTEM 0x59 +#define PP_GC_POLL 0x5a + +/*GUM Messages*/ +#define PP_FETCH 0x5b +#define PP_RESUME 0x5c +#define PP_ACK 0x5d +#define PP_FISH 0x5e +#define PP_SCHEDULE 0x5f +#define PP_FREE 0x60 + +#define MIN_PEOPS 0x50 +#define MAX_PEOPS 0x60 + +#define PEOP_NAMES "Init", "IOInit", \ + "Finish", "PETIDS", \ + "Stats", "Stats_On", "Stats_Off", \ + "Fail", \ + "GCInit", "FullSystem", "GCPoll", \ + "Fetch","Resume","ACK","Fish","Schedule", \ + "Free" + +#endif /* PEOPCODES_H */ diff --git a/ghc/rts/parallel/Pack.c b/ghc/rts/parallel/Pack.c new file mode 100644 index 0000000..b5484a1 --- /dev/null +++ b/ghc/rts/parallel/Pack.c @@ -0,0 +1,2614 @@ +/* + Time-stamp: + $Id: Pack.c,v 1.2 2000/01/13 14:34:08 hwloidl Exp $ + + Graph packing and unpacking code for sending it to another processor + and retrieving the original graph structure from the packet. + In the old RTS the code was split into Pack.c and Unpack.c (now deceased) + Used in GUM and GrAnSim. + + The GrAnSim version of the code defines routines for *simulating* the + packing of closures in the same way it is done in the parallel runtime + system. Basically GrAnSim only puts the addresses of the closures to be + transferred into a buffer. This buffer will then be associated with the + event of transferring the graph. When this event is scheduled, the + @UnpackGraph@ routine is called and the buffer can be discarded + afterwards. + + Note that in GranSim we need many buffers, not just one per PE. */ + +//@node Graph packing, , , +//@section Graph packing + +#if defined(PAR) || defined(GRAN) /* whole file */ + +#define _HS (sizeofW(StgHeader)) + +//@menu +//* Includes:: +//* Prototypes:: +//* Global variables:: +//* ADT of Closure Queues:: +//* Initialisation for packing:: +//* Packing Functions:: +//* Low level packing routines:: +//* Unpacking routines:: +//* Aux fcts for packing:: +//* Printing Packet Contents:: +//* End of file:: +//@end menu +//*/ + +//@node Includes, Prototypes, Graph packing, Graph packing +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "ClosureTypes.h" +#include "Storage.h" +#include "Hash.h" +#include "Parallel.h" +#include "GranSimRts.h" +#include "ParallelRts.h" +# if defined(DEBUG) +# include "ParallelDebug.h" +# endif +#include "FetchMe.h" + +/* Which RTS flag should be used to get the size of the pack buffer ? */ +# if defined(PAR) +# define RTS_PACK_BUFFER_SIZE RtsFlags.ParFlags.packBufferSize +# else /* GRAN */ +# define RTS_PACK_BUFFER_SIZE RtsFlags.GranFlags.packBufferSize +# endif + +//@node Prototypes, Global variables, Includes, Graph packing +//@subsection Prototypes +/* + Code declarations. +*/ + +//@node ADT of closure queues, Init for packing, Prototypes, Prototypes +//@subsubsection ADT of closure queues + +static inline void AllocClosureQueue(nat size); +static inline void InitClosureQueue(void); +static inline rtsBool QueueEmpty(void); +static inline void QueueClosure(StgClosure *closure); +static inline StgClosure *DeQueueClosure(void); + +//@node Init for packing, Packing routines, ADT of closure queues, Prototypes +//@subsubsection Init for packing + +static void initPacking(void); +# if defined(PAR) +rtsBool initPackBuffer(void); +# elif defined(GRAN) +rtsPackBuffer *InstantiatePackBuffer (void); +static void reallocPackBuffer (void); +# endif + +//@node Packing routines, Low level packing fcts, Init for packing, Prototypes +//@subsubsection Packing routines + +static void PackClosure (StgClosure *closure); + +//@node Low level packing fcts, Unpacking routines, Packing routines, Prototypes +//@subsubsection Low level packing fcts + +# if defined(GRAN) +static inline void Pack (StgClosure *data); +# else +static inline void Pack (StgWord data); + +static void PackPLC (StgPtr addr); +static void PackOffset (int offset); +static void GlobaliseAndPackGA (StgClosure *closure); +# endif + +//@node Unpacking routines, Aux fcts for packing, Low level packing fcts, Prototypes +//@subsubsection Unpacking routines + +# if defined(PAR) +void InitPendingGABuffer(nat size); +void CommonUp(StgClosure *src, StgClosure *dst); +StgClosure *UnpackGraph(rtsPackBuffer *packBuffer, + globalAddr **gamap, + nat *nGAs); +# elif defined(GRAN) +void CommonUp(StgClosure *src, StgClosure *dst); +StgClosure *UnpackGraph(rtsPackBuffer* buffer); +#endif + +//@node Aux fcts for packing, , Unpacking routines, Prototypes +//@subsubsection Aux fcts for packing + +# if defined(PAR) +static void DonePacking(void); +static void AmPacking(StgClosure *closure); +static int OffsetFor(StgClosure *closure); +static rtsBool NotYetPacking(int offset); +static rtsBool RoomToPack (nat size, nat ptrs); + rtsBool isOffset(globalAddr *ga); + rtsBool isFixed(globalAddr *ga); +# elif defined(GRAN) +static void DonePacking(void); +static rtsBool NotYetPacking(StgClosure *closure); +# endif + +//@node Global variables, ADT of Closure Queues, Prototypes, Graph packing +//@subsection Global variables +/* + Static data declarations +*/ + +static nat pack_locn, /* ptr to first free loc in pack buffer */ + clq_size, clq_pos, + buf_id = 1; /* identifier for buffer */ +static nat unpacked_size; +static nat reservedPAsize; /* Space reserved for primitive arrays */ +static rtsBool RoomInBuffer; + +# if defined(GRAN) +/* + The pack buffer + To be pedantic: in GrAnSim we're packing *addresses* of closures, + not the closures themselves. +*/ +static rtsPackBuffer *Bonzo = NULL; /* size: can be set via option */ +# else +static rtsPackBuffer *Bonzo = NULL; /* size: can be set via option */ +# endif + +/* + Bit of a hack for testing if a closure is the root of the graph. This is + set in @PackNearbyGraph@ and tested in @PackClosure@. +*/ + +static nat packed_thunks = 0; +static StgClosure *graph_root; + +# if defined(PAR) +/* + The offset hash table is used during packing to record the location in + the pack buffer of each closure which is packed. +*/ +//@cindex offsetTable +static HashTable *offsetTable; + +//@cindex PendingGABuffer +static globalAddr *PendingGABuffer; +/* is initialised in main; */ +# endif /* PAR */ + +//@node ADT of Closure Queues, Initialisation for packing, Global variables, Graph packing +//@subsection ADT of Closure Queues + +//@menu +//* Closure Queues:: +//* Init routines:: +//* Basic routines:: +//@end menu + +//@node Closure Queues, Init routines, ADT of Closure Queues, ADT of Closure Queues +//@subsubsection Closure Queues +/* + Closure Queues + + These routines manage the closure queue. +*/ + +static nat clq_pos, clq_size; + +static StgClosure **ClosureQueue = NULL; /* HWL: init in main */ + +//@node Init routines, Basic routines, Closure Queues, ADT of Closure Queues +//@subsubsection Init routines + +/* @InitClosureQueue@ initialises the closure queue. */ + +//@cindex AllocClosureQueue +static inline void +AllocClosureQueue(size) +nat size; +{ + ASSERT(ClosureQueue == NULL); + ClosureQueue = (StgClosure**) stgMallocWords(size, "AllocClosureQueue"); +} + +//@cindex InitClosureQueue +static inline void +InitClosureQueue(void) +{ + clq_pos = clq_size = 0; + + if ( ClosureQueue == NULL ) + AllocClosureQueue(RTS_PACK_BUFFER_SIZE); +} + +//@node Basic routines, , Init routines, ADT of Closure Queues +//@subsubsection Basic routines + +/* + QueueEmpty returns rtsTrue if the closure queue is empty; rtsFalse otherwise. +*/ + +//@cindex QueueEmpty +static inline rtsBool +QueueEmpty(void) +{ + return(clq_pos >= clq_size); +} + +/* QueueClosure adds its argument to the closure queue. */ + +//@cindex QueueClosure +static inline void +QueueClosure(closure) +StgClosure *closure; +{ + if(clq_size < RTS_PACK_BUFFER_SIZE ) + ClosureQueue[clq_size++] = closure; + else + barf("Closure Queue Overflow (EnQueueing %p (%s))", + closure, info_type(closure)); +} + +/* DeQueueClosure returns the head of the closure queue. */ + +//@cindex DeQueueClosure +static inline StgClosure * +DeQueueClosure(void) +{ + if(!QueueEmpty()) + return(ClosureQueue[clq_pos++]); + else + return((StgClosure*)NULL); +} + +//@node Initialisation for packing, Packing Functions, ADT of Closure Queues, Graph packing +//@subsection Initialisation for packing +/* + Simple Packing Routines + + About packet sizes in GrAnSim: In GrAnSim we use a malloced block of + gransim_pack_buffer_size words to simulate a packet of pack_buffer_size + words. In the simulated PackBuffer we only keep the addresses of the + closures that would be packed in the parallel system (see Pack). To + decide if a packet overflow occurs pack_buffer_size must be compared + versus unpacked_size (see RoomToPack). Currently, there is no multi + packet strategy implemented, so in the case of an overflow we just stop + adding closures to the closure queue. If an overflow of the simulated + packet occurs, we just realloc some more space for it and carry on as + usual. -- HWL */ + +# if defined(GRAN) +rtsPackBuffer * +InstantiatePackBuffer (void) { + extern rtsPackBuffer *Bonzo; + + Bonzo = (rtsPackBuffer *) stgMallocWords(sizeofW(rtsPackBuffer), + "InstantiatePackBuffer: failed to alloc packBuffer"); + Bonzo->size = RtsFlags.GranFlags.packBufferSize_internal; + Bonzo->buffer = (StgWord **) stgMallocWords(RtsFlags.GranFlags.packBufferSize_internal, + "InstantiatePackBuffer: failed to alloc GranSim internal packBuffer"); + /* NB: gransim_pack_buffer_size instead of pack_buffer_size -- HWL */ + /* stgMallocWords is now simple allocate in Storage.c */ + + return (Bonzo); +} + +/* + Reallocate the GranSim internal pack buffer to make room for more closure + pointers. This is independent of the check for packet overflow as in GUM +*/ +static void +reallocPackBuffer (void) { + + ASSERT(pack_locn >= (int)Bonzo->size+sizeofW(rtsPackBuffer)); + + IF_GRAN_DEBUG(packBuffer, + belch("** Increasing size of PackBuffer %p to %d words (PE %u @ %d)\n", + Bonzo, Bonzo->size+REALLOC_SZ, + CurrentProc, CurrentTime[CurrentProc])); + + Bonzo = (rtsPackBuffer*)realloc(Bonzo, + sizeof(StgClosure*)*(REALLOC_SZ + + (int)Bonzo->size + + sizeofW(rtsPackBuffer))) ; + if (Bonzo==(rtsPackBuffer*)NULL) + barf("Failing to realloc %d more words for PackBuffer %p (PE %u @ %d)\n", + REALLOC_SZ, Bonzo, CurrentProc, CurrentTime[CurrentProc]); + + Bonzo->size += REALLOC_SZ; + + ASSERT(pack_locn < Bonzo->size+sizeofW(rtsPackBuffer)); +} +# endif + +# if defined(PAR) +/* @initPacking@ initialises the packing buffer etc. */ +//@cindex initPackBuffer +rtsBool +initPackBuffer(void) +{ + if (Bonzo == NULL) { /* not yet allocated */ + + if ((Bonzo = (rtsPackBuffer *) + stgMallocWords(sizeofW(rtsPackBuffer)+RtsFlags.ParFlags.packBufferSize, + "initPackBuffer")) == NULL) + return rtsFalse; + + InitPendingGABuffer(RtsFlags.ParFlags.packBufferSize); + AllocClosureQueue(RtsFlags.ParFlags.packBufferSize); + } + return rtsTrue; +} +# endif + +static void +initPacking(void) +{ +# if defined(GRAN) + Bonzo = InstantiatePackBuffer(); /* for GrAnSim only -- HWL */ + /* NB: free in UnpackGraph */ +# endif + + Bonzo->id = buf_id++; /* buffer id are only used for debugging! */ + pack_locn = 0; /* the index into the actual pack buffer */ + unpacked_size = 0; /* the size of the whole graph when unpacked */ + reservedPAsize = 0; + RoomInBuffer = rtsTrue; + InitClosureQueue(); + packed_thunks = 0; /* total number of thunks packed so far */ +# if defined(PAR) + offsetTable = allocHashTable(); +# endif +} + +//@node Packing Functions, Low level packing routines, Initialisation for packing, Graph packing +//@subsection Packing Functions + +//@menu +//* Packing Sections of Nearby Graph:: +//* Packing Closures:: +//@end menu + +//@node Packing Sections of Nearby Graph, Packing Closures, Packing Functions, Packing Functions +//@subsubsection Packing Sections of Nearby Graph +/* + Packing Sections of Nearby Graph + + @PackNearbyGraph@ packs a closure and associated graph into a static + buffer (@PackBuffer@). It returns the address of this buffer and the + size of the data packed into the buffer (in its second parameter, + @packBufferSize@). The associated graph is packed in a depth first + manner, hence it uses an explicit queue of closures to be packed rather + than simply using a recursive algorithm. Once the packet is full, + closures (other than primitive arrays) are packed as FetchMes, and their + children are not queued for packing. */ + +//@cindex PackNearbyGraph + +/* NB: this code is shared between GranSim and GUM; + tso only used in GranSim */ +rtsPackBuffer * +PackNearbyGraph(closure, tso, packBufferSize) +StgClosure* closure; +StgTSO* tso; +nat *packBufferSize; +{ + extern rtsPackBuffer *Bonzo; + /* Ensure enough heap for all possible RBH_Save closures */ + + ASSERT(RTS_PACK_BUFFER_SIZE > 0); + + /* ToDo: check that we have enough heap for the packet + ngoq ngo' + if (Hp + PACK_HEAP_REQUIRED > HpLim) + return NULL; + */ + + initPacking(); +# if defined(GRAN) + graph_root = closure; +# endif + + IF_GRAN_DEBUG(pack, + belch(">>> Packing <<%d>> (buffer @ %p); graph root @ %p [PE %d]\n demanded by TSO %d (%p) [PE %u]", + Bonzo->id, Bonzo, closure, where_is(closure), + tso->id, tso, where_is((StgClosure*)tso))); + + IF_GRAN_DEBUG(pack, + belch("** PrintGraph of %p is:", closure); + PrintGraph(closure,0)); + + IF_PAR_DEBUG(pack, + belch(">>> Packing <<%d>> (buffer @ %p); graph root @ %p [%x]\n demanded by TSO %d (%p)", + Bonzo->id, Bonzo, closure, mytid, + tso->id, tso)); + + IF_PAR_DEBUG(pack, + belch("** PrintGraph of %p is:", closure); + belch("** pack_locn=%d", pack_locn); + PrintGraph(closure,0)); + + QueueClosure(closure); + do { + PackClosure(DeQueueClosure()); + } while (!QueueEmpty()); + +# if defined(PAR) + + /* Record how much space is needed to unpack the graph */ + Bonzo->tso = tso; // ToDo: check: used in GUM or only for debugging? + Bonzo->unpacked_size = unpacked_size; + Bonzo->size = pack_locn; + + /* Set the size parameter */ + ASSERT(pack_locn <= RtsFlags.ParFlags.packBufferSize); + *packBufferSize = pack_locn; + +# else /* GRAN */ + + /* Record how much space is needed to unpack the graph */ + // PackBuffer[PACK_FLAG_LOCN] = (P_) MAGIC_PACK_FLAG; for testing + Bonzo->tso = tso; + Bonzo->unpacked_size = unpacked_size; + + // ASSERT(pack_locn <= PackBuffer[PACK_SIZE_LOCN]+PACK_HDR_SIZE); + /* ToDo: Print an earlier, more meaningful message */ + if (pack_locn==0) /* i.e. packet is empty */ + barf("EMPTY PACKET! Can't transfer closure %p at all!!\n", + closure); + Bonzo->size = pack_locn; + *packBufferSize = pack_locn; + +# endif + + DonePacking(); /* {GrAnSim}vaD 'ut'Ha' */ + +# if defined(GRAN) + IF_GRAN_DEBUG(pack , + belch("** Finished <<%d>> packing graph %p; closures packed: %d; thunks packed: %d; size of graph: %d", + Bonzo->id, closure, Bonzo->size, packed_thunks, Bonzo->unpacked_size)); + if (RtsFlags.GranFlags.GranSimStats.Global) { + globalGranStats.tot_packets++; + globalGranStats.tot_packet_size += pack_locn; + } + + IF_GRAN_DEBUG(pack, PrintPacket(Bonzo)); +# elif defined(PAR) + IF_GRAN_DEBUG(pack , + belch("** Finished <<%d>> packing graph %p; closures packed: %d; thunks packed: %d; size of graph: %d", + Bonzo->id, closure, Bonzo->size, packed_thunks, Bonzo->unpacked_size); + PrintPacket(Bonzo)); +# endif /* GRAN */ + + return (Bonzo); +} + +//@cindex PackOneNode + +# if defined(GRAN) +/* This version is used when the node is already local */ + +rtsPackBuffer * +PackOneNode(closure, tso, packBufferSize) +StgClosure* closure; +StgTSO* tso; +nat *packBufferSize; +{ + extern rtsPackBuffer *Bonzo; + int i, clpack_locn; + + initPacking(); + + IF_GRAN_DEBUG(pack, + belch("** PackOneNode: %p (%s)[PE %d] requested by TSO %d (%p) [PE %d]", + closure, info_type(closure), + where_is(closure), tso->id, tso, where_is((StgClosure *)tso))); + + Pack(closure); + + /* Record how much space is needed to unpack the graph */ + Bonzo->tso = tso; + Bonzo->unpacked_size = unpacked_size; + + /* Set the size parameter */ + ASSERT(pack_locn <= RTS_PACK_BUFFER_SIZE); + Bonzo->size = pack_locn; + *packBufferSize = pack_locn; + + if (RtsFlags.GranFlags.GranSimStats.Global) { + globalGranStats.tot_packets++; + globalGranStats.tot_packet_size += pack_locn; + } + IF_GRAN_DEBUG(pack, + PrintPacket(Bonzo)); + + return (Bonzo); +} +# endif /* GRAN */ + +#if defined(GRAN) + +/* + PackTSO and PackStkO are entry points for two special kinds of closure + which are used in the parallel RTS. Compared with other closures they + are rather awkward to pack because they don't follow the normal closure + layout (where all pointers occur before all non-pointers). Luckily, + they're only needed when migrating threads between processors. */ + +//@cindex PackTSO +rtsPackBuffer* +PackTSO(tso, packBufferSize) +StgTSO *tso; +nat *packBufferSize; +{ + extern rtsPackBuffer *Bonzo; + IF_GRAN_DEBUG(pack, + belch("** Packing TSO %d (%p)", tso->id, tso)); + *packBufferSize = 0; + // PackBuffer[0] = PackBuffer[1] = 0; ??? + return(Bonzo); +} + +//@cindex PackStkO +rtsPackBuffer* +PackStkO(stko, packBufferSize) +StgPtr stko; +nat *packBufferSize; +{ + extern rtsPackBuffer *Bonzo; + IF_GRAN_DEBUG(pack, + belch("** Packing STKO %p", stko)); + *packBufferSize = 0; + // PackBuffer[0] = PackBuffer[1] = 0; + return(Bonzo); +} + +void +PackFetchMe(StgClosure *closure) +{ + barf("{PackFetchMe}Daq Qagh: no FetchMe closures in GRAN!"); +} + +#elif defined(PAR) + +rtsPackBuffer* +PackTSO(tso, packBufferSize) +StgTSO *tso; +nat *packBufferSize; +{ + barf("{PackTSO}Daq Qagh: trying to pack a TSO; thread migrations not supported, yet"); +} + +rtsPackBuffer* +PackStkO(stko, packBufferSize) +StgPtr stko; +nat *packBufferSize; +{ + barf("{PackStkO}Daq Qagh: trying to pack a STKO; thread migrations not supported, yet"); +} + +//@cindex PackFetchMe +void +PackFetchMe(StgClosure *closure) +{ + StgInfoTable *ip; + nat i; + +#if defined(GRAN) + barf("{PackFetchMe}Daq Qagh: no FetchMe closures in GRAN!"); +#else + /* Pack a FetchMe closure instead of closure */ + ip = &FETCH_ME_info; + /* this assumes that the info ptr is always the first word in a closure*/ + Pack((StgWord)ip); + for (i = 1; i < _HS; ++i) // pack rest of fixed header + Pack((StgWord)*(((StgPtr)closure)+i)); + + unpacked_size += _HS; // ToDo: check +#endif +} + +#endif + +//@node Packing Closures, , Packing Sections of Nearby Graph, Packing Functions +//@subsubsection Packing Closures +/* + Packing Closures + + @PackClosure@ is the heart of the normal packing code. It packs a single + closure into the pack buffer, skipping over any indirections and + globalising it as necessary, queues any child pointers for further + packing, and turns it into a @FetchMe@ or revertible black hole (@RBH@) + locally if it was a thunk. Before the actual closure is packed, a + suitable global address (GA) is inserted in the pack buffer. There is + always room to pack a fetch-me to the closure (guaranteed by the + RoomToPack calculation), and this is packed if there is no room for the + entire closure. + + Space is allocated for any primitive array children of a closure, and + hence a primitive array can always be packed along with it's parent + closure. */ + +//@cindex PackClosure + +# if defined(PAR) + +void +PackClosure(closure) +StgClosure *closure; +{ + StgInfoTable *info; + StgClosure *indirectee, *rbh; + nat size, ptrs, nonptrs, vhs, i, clpack_locn; + rtsBool is_CONSTR = rtsFalse; + char str[80]; + + ASSERT(closure!=NULL); + indirectee = closure; + do { + /* Don't pack indirection closures */ + closure = indirectee; + indirectee = IS_INDIRECTION(closure); + IF_PAR_DEBUG(pack, + if (indirectee) + belch("** Shorted an indirection (%s) at %p (-> %p)", + info_type(closure), closure, indirectee)); + } while (indirectee); + + clpack_locn = OffsetFor(closure); + + /* If the closure has been packed already, just pack an indirection to it + to guarantee that the graph doesn't become a tree when unpacked */ + if (!NotYetPacking(clpack_locn)) { + StgInfoTable *info; + + PackOffset(clpack_locn); + return; + } + + /* + * PLCs reside on all of the PEs already. Just pack the + * address as a GA (a bit of a kludge, since an address may + * not fit in *any* of the individual GA fields). Const, + * charlike and small intlike closures are converted into + * PLCs. + */ + switch (get_itbl(closure)->type) { + +# ifdef DEBUG + // check error cases only in a debugging setup + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + case RET_DYN: + barf("** {Pack}Daq Qagh: found return vector %p (%s) when packing (thread migration not implemented)", + closure, info_type(closure)); + /* never reached */ + + case UPDATE_FRAME: + case STOP_FRAME: + case CATCH_FRAME: + case SEQ_FRAME: + barf("** {Pack}Daq Qagh: found stack frame %p (%s) when packing (thread migration not implemented)", + closure, info_type(closure)); + /* never reached */ + + case TSO: + case BLOCKED_FETCH: + case EVACUATED: + /* something's very wrong */ + barf("** {Pack}Daq Qagh: found %s (%p) when packing", + info_type(closure), closure); + /* never reached */ +# endif + + case CONSTR_CHARLIKE: + IF_PAR_DEBUG(pack, + belch("** Packing a charlike closure %d", + ((StgIntCharlikeClosure*)closure)->data)); + + PackPLC(CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)closure)->data)); + return; + + case CONSTR_INTLIKE: + { + StgInt val = ((StgIntCharlikeClosure*)closure)->data; + + if ((val <= MAX_INTLIKE) && (val >= MIN_INTLIKE)) { + IF_PAR_DEBUG(pack, + belch("** Packing a small intlike %d as a PLC", val)); + PackPLC(INTLIKE_CLOSURE(val)); + return; + } else { + IF_PAR_DEBUG(pack, + belch("** Packing a big intlike %d as a normal closure", + val)); + break; + } + } + + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_0_2: + /* it's a constructor (i.e. plain data) but we don't know + how many ptrs, non-ptrs there are => use generic code */ + IF_PAR_DEBUG(pack, + belch("** Packing a CONSTR %p (%s) using generic packing with GA", + closure, info_type(closure))); + // is_CONSTR = rtsTrue; + break; + /* fall through to generic packing code */ + + case CONSTR_STATIC: + case CONSTR_NOCAF_STATIC:// For now we ship indirections to CAFs: They are + // evaluated on each PE if needed + IF_PAR_DEBUG(pack, + belch("** Packing a %p (%s) as a PLC", + closure, info_type(closure))); + + PackPLC(closure); + return; + + case MVAR: + /* MVARs may not be copied; they are sticky objects in the new RTS */ + /* therefore we treat them just as RBHs etc (what a great system!) */ + IF_PAR_DEBUG(pack, + belch("** Found an MVar at %p (%s)", + closure, info_type(closure))); + /* fall through !! */ + + case THUNK_SELECTOR: // ToDo: fix packing of this strange beast + IF_PAR_DEBUG(pack, + belch("** Found an THUNK_SELECTORE at %p (%s)", + closure, info_type(closure))); + /* fall through !! */ + + case CAF_BLACKHOLE: + case SE_CAF_BLACKHOLE: + case SE_BLACKHOLE: + case BLACKHOLE: + case RBH: + case FETCH_ME: + case FETCH_ME_BQ: + + /* If it's a (revertible) black-hole, pack a FetchMe closure to it */ + //ASSERT(pack_locn > PACK_HDR_SIZE); + + IF_PAR_DEBUG(pack, + belch("** Packing a BH or FM at %p (%s) of (fixed size %d)", + closure, info_type(closure), _HS)); + + /* Need a GA even when packing a constructed FETCH_ME (cruel world!) */ + GlobaliseAndPackGA(closure); + + PackFetchMe(closure); + return; + + default: +/* IF_PAR_DEBUG(pack, */ +/* belch("** Not a PLC or BH ... ")); */ + } /* switch */ + + /* get info about basic layout of the closure */ + info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str); + + ASSERT(!IS_BLACK_HOLE(closure)); + + IF_PAR_DEBUG(pack, + fprintf(stderr, "** packing %p (%s) (size=%d, ptrs=%d, nonptrs=%d)\n", + closure, info_type(closure), size, ptrs, nonptrs)); + + /* + * Now peek ahead to see whether the closure has any primitive array + * children + */ + /* + ToDo: fix this code -- HWL + for (i = 0; i < ptrs; ++i) { + StgInfoTable * childInfo; + nat childSize, childPtrs, childNonPtrs, childVhs; + + // extract i-th pointer out of closure + childInfo = get_closure_info(((PP_) (closure))[i + FIXED_HS + vhs], + &childSize, &childPtrs, &childNonPtrs, &childVhs, str); + if (IS_BIG_MOTHER(childInfo)) { + reservedPAsize += PACK_GA_SIZE + FIXED_HS + childVhs + childNonPtrs + + childPtrs * PACK_FETCHME_SIZE; + } + } + */ + /* Record the location of the GA */ + AmPacking(closure); + + /* Pack the global address */ + if (!is_CONSTR) { + GlobaliseAndPackGA(closure); + } else { + IF_PAR_DEBUG(pack, + belch("** No GA allocated for CONSTR %p (%s)", + closure, info_type(closure))); + } + + /* + * Pack a fetchme to the closure if it's a black hole, or the buffer is full + * and it isn't a primitive array. N.B. Primitive arrays are always packed + * (because their parents index into them directly) + */ + + // ToDo: pack FMs if no more room available in packet (see below) + if (!(RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs))) + barf("** Qagh: Pack: not enough room in packet to pack closure %p (%s)", + closure, info_type(closure)); + + /* + Has been moved into the switch statement + + if (IS_BLACK_HOLE(closure)) + !(RoomToPack(PACK_GA_SIZE + FIXED_HS + vhs + nonptrs, ptrs) + || IS_BIG_MOTHER(info))) + { + + ASSERT(pack_locn > PACK_HDR_SIZE); + + info = FetchMe_info; + for (i = 0; i < FIXED_HS; ++i) { + if (i == INFO_HDR_POSN) + Pack((StgWord) FetchMe_info); + else + Pack(closure[i]); + } + + unpacked_size += FIXED_HS + FETCHME_CLOSURE_SIZE(dummy); + + } else { + */ + if (info->type == ARR_WORDS || info->type == MUT_ARR_PTRS || + info->type == MUT_ARR_PTRS_FROZEN || info->type == MUT_VAR) + belch("** ghuH: found %s; packing of primitive arrays not yet implemented", + info_type(closure)); + + /* At last! A closure we can actually pack! */ + if (ip_MUTABLE(info) && (info->type != FETCH_ME)) + fprintf(stderr, "** ghuH: Replicated a Mutable closure!\n"); + + /* + Remember, the generic closure layout is as follows: + +-------------------------------------------------+ + | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS | + +-------------------------------------------------+ + */ + /* pack fixed and variable header */ + for (i = 0; i < _HS + vhs; ++i) + Pack((StgWord)*(((StgPtr)closure)+i)); + + /* register all ptrs for further packing */ + for (i = 0; i < ptrs; ++i) + QueueClosure(((StgClosure *) *(((StgPtr)closure)+(i+_HS+vhs)))); + + /* pack non-ptrs */ + for (i = 0; i < nonptrs; ++i) + Pack((StgWord)*(((StgPtr)closure)+(i+_HS+vhs+ptrs))); + + unpacked_size += _HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size); + + /* + * Record that this is a revertable black hole so that we can fill in + * its address from the fetch reply. Problem: unshared thunks may cause + * space leaks this way, their GAs should be deallocated following an + * ACK. + */ + + // IS_UPDATABLE(closure) == !closure_UNPOINTED(closure) !? HWL + if (closure_THUNK(closure) && !closure_UNPOINTED(closure)) { + rbh = convertToRBH(closure); + ASSERT(rbh == closure); // rbh at the same position (minced version) + packed_thunks++; + } +} + +# else /* GRAN */ + +/* Fake the packing of a closure */ + +void +PackClosure(closure) +StgClosure *closure; +{ + StgInfoTable *info, *childInfo; + nat size, ptrs, nonptrs, vhs; + char info_hdr_ty[80]; + nat i; + StgClosure *indirectee, *rbh; + char str[80]; + rtsBool is_mutable, will_be_rbh, no_more_thunks_please; + + is_mutable = rtsFalse; + + /* In GranSim we don't pack and unpack closures -- we just simulate + packing by updating the bitmask. So, the graph structure is unchanged + i.e. we don't short out indirections here. -- HWL */ + + /* Nothing to do with packing but good place to (sanity) check closure; + if the closure is a thunk, it must be unique; otherwise we have copied + work at some point before that which violates one of our main global + assertions in GranSim/GUM */ + ASSERT(!closure_THUNK(closure) || is_unique(closure)); + + IF_GRAN_DEBUG(pack, + belch("** Packing closure %p (%s)", + closure, info_type(closure))); + + if (where_is(closure) != where_is(graph_root)) { + IF_GRAN_DEBUG(pack, + belch("** faking a FETCHME [current PE: %d, closure's PE: %d]", + where_is(graph_root), where_is(closure))); + + /* GUM would pack a FETCHME here; simulate that by increasing the */ + /* unpacked size accordingly but don't pack anything -- HWL */ + unpacked_size += _HS + 2 ; // sizeofW(StgFetchMe); + return; + } + + /* If the closure's not already being packed */ + if (!NotYetPacking(closure)) + /* Don't have to do anything in GrAnSim if closure is already */ + /* packed -- HWL */ + { + IF_GRAN_DEBUG(pack, + belch("** Closure %p is already packed and omitted now!", + closure)); + return; + } + + switch (get_itbl(closure)->type) { + /* ToDo: check for sticky bit here? */ + /* BH-like closures which must not be moved to another PE */ + case CAF_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */ + case SE_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */ + case SE_CAF_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */ + case BLACKHOLE: /* # of ptrs, nptrs: 0,2 */ + case BLACKHOLE_BQ: /* # of ptrs, nptrs: 1,1 */ + case RBH: /* # of ptrs, nptrs: 1,1 */ + /* same for these parallel specific closures */ + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + IF_GRAN_DEBUG(pack, + belch("** Avoid packing BH-like closures (%p, %s)!", + closure, info_type(closure))); + /* Just ignore RBHs i.e. they stay where they are */ + return; + + case THUNK_SELECTOR: + { + StgClosure *sel = ((StgSelector *)closure)->selectee; + + IF_GRAN_DEBUG(pack, + belch("** Avoid packing THUNK_SELECTOR (%p, %s) but queuing %p (%s)!", + closure, info_type(closure), sel, info_type(sel))); + QueueClosure(sel); + IF_GRAN_DEBUG(pack, + belch("** [%p (%s) (Queueing closure) ....]", + sel, info_type(sel))); + } + return; + + case CONSTR_STATIC: + case CONSTR_NOCAF_STATIC: + /* For now we ship indirections to CAFs: + * They are evaluated on each PE if needed */ + IF_GRAN_DEBUG(pack, + belch("** Nothing to pack for %p (%s)!", + closure, info_type(closure))); + // Pack(closure); GUM only + return; + + case CONSTR_CHARLIKE: + case CONSTR_INTLIKE: + IF_GRAN_DEBUG(pack, + belch("** Nothing to pack for %s (%p)!", + closure, info_type(closure))); + // PackPLC(((StgIntCharlikeClosure *)closure)->data); GUM only + return; + + case AP_UPD: + case PAP: + /* partial applications; special treatment necessary? */ + break; + + case CAF_UNENTERED: /* # of ptrs, nptrs: 1,3 */ + case CAF_ENTERED: /* # of ptrs, nptrs: 0,4 (allegedly bogus!!) */ + /* CAFs; special treatment necessary? */ + break; + + case MVAR: + barf("{PackClosure}Daq Qagh: found an MVAR (%p, %s); ToDo: implement proper treatment of MVARs", + closure, info_type(closure)); + + case ARR_WORDS: + case MUT_VAR: + case MUT_ARR_PTRS: + case MUT_ARR_PTRS_FROZEN: + /* Mutable objects; require special treatment to ship all data */ + is_mutable = rtsTrue; + break; + + case WEAK: + case FOREIGN: + case STABLE_NAME: + /* weak pointers and other FFI objects */ + barf("{PackClosure}Daq Qagh: found an FFI object (%p, %s); FFI not yet supported by GranSim, sorry", + closure, info_type(closure)); + + case TSO: + /* parallel objects */ + barf("{PackClosure}Daq Qagh: found a TSO when packing (%p, %s); thread migration not yet implemented, sorry", + closure, info_type(closure)); + + case BCO: + /* Hugs objects (i.e. closures used by the interpreter) */ + barf("{PackClosure}Daq Qagh: found a Hugs closure when packing (%p, %s); GranSim not yet integrated with Hugs, sorry", + closure, info_type(closure)); + + case IND: /* # of ptrs, nptrs: 1,0 */ + case IND_STATIC: /* # of ptrs, nptrs: 1,0 */ + case IND_PERM: /* # of ptrs, nptrs: 1,1 */ + case IND_OLDGEN: /* # of ptrs, nptrs: 1,1 */ + case IND_OLDGEN_PERM: /* # of ptrs, nptrs: 1,1 */ + /* we shouldn't find an indirection here, because we have shorted them + out at the beginning of this functions already. + */ + break; + /* should be: + barf("{PackClosure}Daq Qagh: found indirection when packing (%p, %s)", + closure, info_type(closure)); + */ + + case UPDATE_FRAME: + case CATCH_FRAME: + case SEQ_FRAME: + case STOP_FRAME: + /* stack frames; should never be found when packing for now; + once we support thread migration these have to be covered properly + */ + barf("{PackClosure}Daq Qagh: found stack frame when packing (%p, %s)", + closure, info_type(closure)); + + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + case RET_DYN: + /* vectored returns; should never be found when packing; */ + barf("{PackClosure}Daq Qagh: found vectored return (%p, %s)", + closure, info_type(closure)); + + case INVALID_OBJECT: + barf("{PackClosure}Daq Qagh: found Invalid object (%p, %s)", + closure, info_type(closure)); + + default: + /* + Here we know that the closure is a CONSTR, FUN or THUNK (maybe + a specialised version with wired in #ptr/#nptr info; currently + we treat these specialised versions like the generic version) + */ + } /* switch */ + + /* Otherwise it's not Fixed */ + + info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str); + will_be_rbh = closure_THUNK(closure) && !closure_UNPOINTED(closure); + + IF_GRAN_DEBUG(pack, + belch("** Info on closure %p (%s): size=%d; ptrs=%d", + closure, info_type(closure), + size, ptrs, + (will_be_rbh) ? "will become RBH" : "will NOT become RBH")); + + // check whether IS_UPDATABLE(closure) == !closure_UNPOINTED(closure) -- HWL + no_more_thunks_please = + (RtsFlags.GranFlags.ThunksToPack>0) && + (packed_thunks>=RtsFlags.GranFlags.ThunksToPack); + + /* + should be covered by get_closure_info + if (info->type == FETCH_ME || info->type == FETCH_ME_BQ || + info->type == BLACKHOLE || info->type == RBH ) + size = ptrs = nonptrs = vhs = 0; + */ + /* Now peek ahead to see whether the closure has any primitive */ + /* array children */ + /* + ToDo: fix this code + for (i = 0; i < ptrs; ++i) { + P_ childInfo; + W_ childSize, childPtrs, childNonPtrs, childVhs; + + childInfo = get_closure_info(((StgPtrPtr) (closure))[i + FIXED_HS + vhs], + &childSize, &childPtrs, &childNonPtrs, + &childVhs, junk_str); + if (IS_BIG_MOTHER(childInfo)) { + reservedPAsize += PACK_GA_SIZE + FIXED_HS + + childVhs + childNonPtrs + + childPtrs * PACK_FETCHME_SIZE; + PAsize += PACK_GA_SIZE + FIXED_HS + childSize; + PAptrs += childPtrs; + } + } + */ + /* Don't pack anything (GrAnSim) if it's a black hole, or the buffer + * is full and it isn't a primitive array. N.B. Primitive arrays are + * always packed (because their parents index into them directly) */ + + if (IS_BLACK_HOLE(closure)) + /* + ToDo: fix this code + || + !(RoomToPack(PACK_GA_SIZE + FIXED_HS + vhs + nonptrs, ptrs) + || IS_BIG_MOTHER(info))) + */ + return; + + /* At last! A closure we can actually pack! */ + + if (closure_MUTABLE(closure)) // not nec. && (info->type != FETCHME)) + belch("ghuH: Replicated a Mutable closure!"); + + if (RtsFlags.GranFlags.GranSimStats.Global && + no_more_thunks_please && will_be_rbh) { + globalGranStats.tot_cuts++; + if ( RtsFlags.GranFlags.Debug.pack ) + belch("** PackClosure (w/ ThunksToPack=%d): Cutting tree with root at %#x\n", + RtsFlags.GranFlags.ThunksToPack, closure); + } else if (will_be_rbh || (closure==graph_root) ) { + packed_thunks++; + globalGranStats.tot_thunks++; + } + + if (no_more_thunks_please && will_be_rbh) + return; /* don't pack anything */ + + /* actual PACKING done here -- HWL */ + Pack(closure); + for (i = 0; i < ptrs; ++i) { + /* extract i-th pointer from closure */ + QueueClosure((StgClosure *)payloadPtr(closure,i)); + IF_GRAN_DEBUG(pack, + belch("** [%p (%s) (Queueing closure) ....]", + payloadPtr(closure,i), info_type(payloadPtr(closure,i)))); + } + + /* + for packing words (GUM only) do something like this: + + for (i = 0; i < ptrs; ++i) { + Pack(payloadWord(obj,i+j)); + } + */ + /* Turn thunk into a revertible black hole. */ + if (will_be_rbh) { + rbh = convertToRBH(closure); + ASSERT(rbh != NULL); + } +} +# endif /* PAR */ + +//@node Low level packing routines, Unpacking routines, Packing Functions, Graph packing +//@subsection Low level packing routines + +/* + @Pack@ is the basic packing routine. It just writes a word of data into + the pack buffer and increments the pack location. */ + +//@cindex Pack + +# if defined(PAR) +static inline void +Pack(data) +StgWord data; +{ + ASSERT(pack_locn < RtsFlags.ParFlags.packBufferSize); + Bonzo->buffer[pack_locn++] = data; +} +#endif + +#if defined(GRAN) +static inline void +Pack(closure) +StgClosure *closure; +{ + StgInfoTable *info; + nat size, ptrs, nonptrs, vhs; + char str[80]; + + /* This checks the size of the GrAnSim internal pack buffer. The simulated + pack buffer is checked via RoomToPack (as in GUM) */ + if (pack_locn >= (int)Bonzo->size+sizeofW(rtsPackBuffer)) + reallocPackBuffer(); + + if (closure==(StgClosure*)NULL) + belch("Qagh {Pack}Daq: Trying to pack 0"); + Bonzo->buffer[pack_locn++] = closure; + /* ASSERT: Data is a closure in GrAnSim here */ + info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str); + unpacked_size += _HS + (size < MIN_UPD_SIZE ? + MIN_UPD_SIZE : + size); +} +# endif /* GRAN */ + +/* + If a closure is local, make it global. Then, divide its weight for + export. The GA is then packed into the pack buffer. */ + +# if defined(PAR) + +static void +GlobaliseAndPackGA(closure) +StgClosure *closure; +{ + globalAddr *ga; + globalAddr packGA; + + if ((ga = LAGAlookup(closure)) == NULL) + ga = makeGlobal(closure, rtsTrue); + splitWeight(&packGA, ga); + ASSERT(packGA.weight > 0); + + IF_PAR_DEBUG(pack, + fprintf(stderr, "** Globalising closure %p (%s) with GA", + closure, info_type(closure)); + printGA(&packGA); + fputc('\n', stderr)); + + + Pack((StgWord) packGA.weight); + Pack((StgWord) packGA.payload.gc.gtid); + Pack((StgWord) packGA.payload.gc.slot); +} + +/* + @PackPLC@ makes up a bogus GA for a PLC. Weight 0 implies that a PLC + address follows instead of PE, slot. */ + +//@cindex PackPLC + +static void +PackPLC(addr) +StgPtr addr; +{ + Pack(0L); /* weight */ + Pack((StgWord) addr); /* address */ +} + +/* + @PackOffset@ packs a special GA value that will be interpreted as an + offset to a closure in the pack buffer. This is used to avoid unfolding + the graph structure into a tree. */ + +static void +PackOffset(offset) +int offset; +{ + IF_PAR_DEBUG(pack, + belch("** Packing Offset %d at pack location %u", + offset, pack_locn)); + Pack(1L); /* weight */ + Pack(0L); /* pe */ + Pack(offset); /* slot/offset */ +} +# endif /* PAR */ + +//@node Unpacking routines, Aux fcts for packing, Low level packing routines, Graph packing +//@subsection Unpacking routines + +/* + This was formerly in the (now deceased) module Unpack.c + + Unpacking closures which have been exported to remote processors + + This module defines routines for unpacking closures in the parallel + runtime system (GUM). + + In the case of GrAnSim, this module defines routines for *simulating* the + unpacking of closures as it is done in the parallel runtime system. +*/ + +//@node GUM code, Local Definitions, Unpacking routines, Unpacking routines +//@subsubsection GUM code + +#if defined(PAR) + +//@cindex InitPendingGABuffer +void +InitPendingGABuffer(size) +nat size; +{ + PendingGABuffer = (globalAddr *) + stgMallocBytes(size*2*sizeof(globalAddr), + "InitPendingGABuffer"); +} + +/* + @CommonUp@ commons up two closures which we have discovered to be + variants of the same object. One is made an indirection to the other. */ + +//@cindex CommonUp +void +CommonUp(StgClosure *src, StgClosure *dst) +{ + StgBlockingQueueElement *bqe; + + ASSERT(src != (StgClosure *)NULL && dst != (StgClosure *)NULL); + ASSERT(src != dst); + + IF_PAR_DEBUG(verbose, + belch("__ CommonUp %p (%s) with %p (%s)", + src, info_type(src), dst, info_type(dst))); + + switch (get_itbl(src)->type) { + case BLACKHOLE_BQ: + bqe = ((StgBlockingQueue *)src)->blocking_queue; + break; + + case FETCH_ME_BQ: + bqe = ((StgFetchMeBlockingQueue *)src)->blocking_queue; + break; + + case RBH: + bqe = ((StgRBH *)src)->blocking_queue; + break; + + case BLACKHOLE: + case FETCH_ME: + bqe = END_BQ_QUEUE; + break; + + default: + /* Don't common up anything else */ + return; + } + /* NB: this also awakens the blocking queue for src */ + UPD_IND(src, dst); + // updateWithIndirection(src, dst); + /* + ASSERT(!IS_BIG_MOTHER(INFO_PTR(dst))); + if (bqe != END_BQ_QUEUE) + awaken_blocked_queue(bqe, src); + */ +} + +/* + @UnpackGraph@ unpacks the graph contained in a message buffer. It + returns a pointer to the new graph. The @gamap@ parameter is set to + point to an array of (oldGA,newGA) pairs which were created as a result + of unpacking the buffer; @nGAs@ is set to the number of GA pairs which + were created. + + The format of graph in the pack buffer is as defined in @Pack.lc@. */ + +//@cindex UnpackGraph +StgClosure * +UnpackGraph(packBuffer, gamap, nGAs) +rtsPackBuffer *packBuffer; +globalAddr **gamap; +nat *nGAs; +{ + nat size, ptrs, nonptrs, vhs; + StgWord **buffer, **bufptr, **slotptr; + globalAddr ga, *gaga; + StgClosure *closure, *existing, + *graphroot, *graph, *parent; + StgInfoTable *ip, *oldip; + nat bufsize, i, + pptr = 0, pptrs = 0, pvhs; + rtsBool hasGA; + char str[80]; + + initPackBuffer(); /* in case it isn't already init'd */ + graphroot = (StgClosure *)NULL; + + gaga = PendingGABuffer; + + InitClosureQueue(); + + /* Unpack the header */ + bufsize = packBuffer->size; + buffer = packBuffer->buffer; + bufptr = buffer; + + /* allocate heap */ + if (bufsize > 0) { + graph = allocate(bufsize); + ASSERT(graph != NULL); + } + + parent = (StgClosure *)NULL; + + do { + /* This is where we will ultimately save the closure's address */ + slotptr = bufptr; + + /* First, unpack the next GA or PLC */ + ga.weight = (rtsWeight) *bufptr++; + + if (ga.weight > 0) { + ga.payload.gc.gtid = (GlobalTaskId) *bufptr++; + ga.payload.gc.slot = (int) *bufptr++; + } else { + ga.payload.plc = (StgPtr) *bufptr++; + } + + /* Now unpack the closure body, if there is one */ + if (isFixed(&ga)) { + /* No more to unpack; just set closure to local address */ + IF_PAR_DEBUG(pack, + belch("_* Unpacked PLC at %x", ga.payload.plc)); + hasGA = rtsFalse; + closure = ga.payload.plc; + } else if (isOffset(&ga)) { + /* No more to unpack; just set closure to cached address */ + IF_PAR_DEBUG(pack, + belch("_* Unpacked indirection to %p (was offset %x)", + (StgClosure *) buffer[ga.payload.gc.slot], + ga.payload.gc.slot)); + ASSERT(parent != (StgClosure *)NULL); + hasGA = rtsFalse; + closure = (StgClosure *) buffer[ga.payload.gc.slot]; + } else { + /* Now we have to build something. */ + hasGA = rtsTrue; + + ASSERT(bufsize > 0); + + /* + * Close your eyes. You don't want to see where we're looking. You + * can't get closure info until you've unpacked the variable header, + * but you don't know how big it is until you've got closure info. + * So...we trust that the closure in the buffer is organized the + * same way as they will be in the heap...at least up through the + * end of the variable header. + */ + ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str); + + /* + Remember, the generic closure layout is as follows: + +-------------------------------------------------+ + | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS | + +-------------------------------------------------+ + */ + /* Fill in the fixed header */ + for (i = 0; i < _HS; i++) + ((StgPtr)graph)[i] = (StgWord)*bufptr++; + + if (ip->type == FETCH_ME) + size = ptrs = nonptrs = vhs = 0; + + /* Fill in the packed variable header */ + for (i = 0; i < vhs; i++) + ((StgPtr)graph)[_HS + i] = (StgWord)*bufptr++; + + /* Pointers will be filled in later */ + + /* Fill in the packed non-pointers */ + for (i = 0; i < nonptrs; i++) + ((StgPtr)graph)[_HS + i + vhs + ptrs] = (StgWord)*bufptr++; + + /* Indirections are never packed */ + // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE); + + /* Add to queue for processing */ + QueueClosure(graph); + + /* + * Common up the new closure with any existing closure having the same + * GA + */ + + if ((existing = GALAlookup(&ga)) == NULL) { + globalAddr *newGA; + /* Just keep the new object */ + IF_PAR_DEBUG(pack, + belch("_* Unpacking new GA ((%x, %d, %x))", + ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight)); + + closure = graph; + newGA = setRemoteGA(graph, &ga, rtsTrue); + if (ip->type == FETCH_ME) + // FETCHME_GA(closure) = newGA; + ((StgFetchMe *)closure)->ga = newGA; + } else { + /* Two closures, one global name. Someone loses */ + oldip = get_itbl(existing); + + if ((oldip->type == FETCH_ME || + // ToDo: don't pack a GA for these in the first place + oldip->type == CONSTR || + oldip->type == CONSTR_1_0 || + oldip->type == CONSTR_0_1 || + oldip->type == CONSTR_2_0 || + oldip->type == CONSTR_1_1 || + oldip->type == CONSTR_0_2 || + IS_BLACK_HOLE(existing)) && + ip->type != FETCH_ME) { + + /* What we had wasn't worth keeping */ + closure = graph; + CommonUp(existing, graph); + } else { + StgWord ty; + + /* + * Either we already had something worthwhile by this name or + * the new thing is just another FetchMe. However, the thing we + * just unpacked has to be left as-is, or the child unpacking + * code will fail. Remember that the way pointer words are + * filled in depends on the info pointers of the parents being + * the same as when they were packed. + */ + IF_PAR_DEBUG(pack, + belch("_* Unpacking old GA ((%x, %d, %x)), keeping %#lx", + ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight, + existing)); + + closure = existing; + // HACK + ty = get_itbl(closure)->type; + if (ty == CONSTR || + ty == CONSTR_1_0 || + ty == CONSTR_0_1 || + ty == CONSTR_2_0 || + ty == CONSTR_1_1 || + ty == CONSTR_0_2) + CommonUp(closure, graph); + + } + /* Pool the total weight in the stored ga */ + (void) addWeight(&ga); + } + + /* Sort out the global address mapping */ + if (hasGA || // (ip_THUNK(ip) && !ip_UNPOINTED(ip)) || + (ip_MUTABLE(ip) && ip->type != FETCH_ME)) { + /* Make up new GAs for single-copy closures */ + globalAddr *newGA = makeGlobal(closure, rtsTrue); + + // keep this assertion! + // ASSERT(closure == graph); + + /* Create an old GA to new GA mapping */ + *gaga++ = ga; + splitWeight(gaga, newGA); + ASSERT(gaga->weight == 1L << (BITS_IN(unsigned) - 1)); + gaga++; + } + graph += _HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size); + } + + /* + * Set parent pointer to point to chosen closure. If we're at the top of + * the graph (our parent is NULL), then we want to arrange to return the + * chosen closure to our caller (possibly in place of the allocated graph + * root.) + */ + if (parent == NULL) + graphroot = closure; + else + ((StgPtr)parent)[_HS + pvhs + pptr] = (StgWord) closure; + + /* Save closure pointer for resolving offsets */ + *slotptr = (StgWord) closure; + + /* Locate next parent pointer */ + pptr++; + while (pptr + 1 > pptrs) { + parent = DeQueueClosure(); + + if (parent == NULL) + break; + else { + (void) get_closure_info(parent, &size, &pptrs, &nonptrs, + &pvhs, str); + pptr = 0; + } + } + } while (parent != NULL); + + //ASSERT(bufsize == 0 || graph - 1 <= SAVE_Hp); + + *gamap = PendingGABuffer; + *nGAs = (gaga - PendingGABuffer) / 2; + + /* ToDo: are we *certain* graphroot has been set??? WDP 95/07 */ + ASSERT(graphroot!=NULL); + return (graphroot); +} +#endif /* PAR */ + +//@node GranSim Code, , Local Definitions, Unpacking routines +//@subsubsection GranSim Code + +/* + For GrAnSim: No actual unpacking should be necessary. We just + have to walk over the graph and set the bitmasks appropriately. + Since we use RBHs similarly to GUM but without an ACK message/event + we have to revert the RBH from within the UnpackGraph routine (good luck!) + -- HWL +*/ + +#if defined(GRAN) +void +CommonUp(StgClosure *src, StgClosure *dst) +{ + barf("CommonUp: should never be entered in a GranSim setup"); +} + +StgClosure* +UnpackGraph(buffer) +rtsPackBuffer* buffer; +{ + nat size, ptrs, nonptrs, vhs, + bufptr = 0; + StgClosure *closure, *graphroot, *graph; + StgInfoTable *ip; + StgWord bufsize, unpackedsize, + pptr = 0, pptrs = 0, pvhs; + StgTSO* tso; + char str[240], str1[80]; + int i; + + bufptr = 0; + graphroot = buffer->buffer[0]; + + tso = buffer->tso; + + /* Unpack the header */ + unpackedsize = buffer->unpacked_size; + bufsize = buffer->size; + + IF_GRAN_DEBUG(pack, + belch("<<< Unpacking <<%d>> (buffer @ %p):\n (root @ %p, PE %d,size=%d), demanded by TSO %d (%p)[PE %d]", + buffer->id, buffer, graphroot, where_is(graphroot), + bufsize, tso->id, tso, + where_is((StgClosure *)tso))); + + do { + closure = buffer->buffer[bufptr++]; /* that's all we need for GrAnSim -- HWL */ + + /* Actually only ip is needed; rest is useful for TESTING -- HWL */ + ip = get_closure_info(closure, + &size, &ptrs, &nonptrs, &vhs, str); + + IF_GRAN_DEBUG(pack, + sprintf(str, "** (%p): Changing bitmask[%s]: 0x%x ", + closure, (closure_HNF(closure) ? "NF" : "__"), + PROCS(closure))); + + if (get_itbl(closure)->type == RBH) { + /* if it's an RBH, we have to revert it into a normal closure, thereby + awakening the blocking queue; not that this is code currently not + needed in GUM, but it should be added with the new features in + GdH (and the implementation of an NACK message) + */ + // closure->header.gran.procs = PE_NUMBER(CurrentProc); + SET_GRAN_HDR(closure, PE_NUMBER(CurrentProc)); /* Move node */ + + IF_GRAN_DEBUG(pack, + strcat(str, " (converting RBH) ")); + + convertFromRBH(closure); /* In GUM that's done by convertToFetchMe */ + + IF_GRAN_DEBUG(pack, + belch(":: closure %p (%s) is a RBH; after reverting: IP=%p", + closure, info_type(closure), get_itbl(closure))); + } else if (IS_BLACK_HOLE(closure)) { + IF_GRAN_DEBUG(pack, + belch(":: closure %p (%s) is a BH; copying node to %d", + closure, info_type(closure), CurrentProc)); + closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */ + } else if ( (closure->header.gran.procs & PE_NUMBER(CurrentProc)) == 0 ) { + if (closure_HNF(closure)) { + IF_GRAN_DEBUG(pack, + belch(":: closure %p (%s) is a HNF; copying node to %d", + closure, info_type(closure), CurrentProc)); + closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */ + } else { + IF_GRAN_DEBUG(pack, + belch(":: closure %p (%s) is no (R)BH or HNF; moving node to %d", + closure, info_type(closure), CurrentProc)); + closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */ + } + } + + IF_GRAN_DEBUG(pack, + sprintf(str1, "0x%x", PROCS(closure)); strcat(str, str1)); + IF_GRAN_DEBUG(pack, belch(str)); + + } while (bufptrsize) ; /* (parent != NULL); */ + + /* In GrAnSim we allocate pack buffers dynamically! -- HWL */ + free(buffer->buffer); + free(buffer); + + IF_GRAN_DEBUG(pack, + belch("PrintGraph of %p is:", graphroot); PrintGraph(graphroot,0)); + + return (graphroot); +} +#endif /* GRAN */ + +//@node Aux fcts for packing, Printing Packet Contents, Unpacking routines, Graph packing +//@subsection Aux fcts for packing + +//@menu +//* Offset table:: +//* Packet size:: +//* Types of Global Addresses:: +//* Closure Info:: +//@end menu + +//@node Offset table, Packet size, Aux fcts for packing, Aux fcts for packing +//@subsubsection Offset table + +/* + DonePacking is called when we've finished packing. It releases memory + etc. */ + +//@cindex DonePacking + +# if defined(PAR) + +static void +DonePacking(void) +{ + freeHashTable(offsetTable, NULL); + offsetTable = NULL; +} + +/* + AmPacking records that the closure is being packed. Note the abuse of + the data field in the hash table -- this saves calling @malloc@! */ + +//@cindex AmPacking + +static void +AmPacking(closure) +StgClosure *closure; +{ +/* IF_PAR_DEBUG(pack, */ +/* fprintf(stderr, "** AmPacking %p (%s)(IP %p) at %u\n", */ +/* closure, info_type(closure), get_itbl(closure), pack_locn)); */ + + insertHashTable(offsetTable, (StgWord) closure, (void *) (StgWord) pack_locn); +} + +/* + OffsetFor returns an offset for a closure which is already being packed. */ + +//@cindex OffsetFor + +static int +OffsetFor(closure) +StgClosure *closure; +{ + return (int) (StgWord) lookupHashTable(offsetTable, (StgWord) closure); +} + +/* + NotYetPacking determines whether the closure's already being packed. + Offsets $<$ @PACK_HDR_SIZE@ (e.g. 0) mean no. */ + +//@cindex NotYetPacking + +static rtsBool +NotYetPacking(offset) +int offset; +{ + return(offset == 0); // ToDo: what if root is found again?? FIX +} + +# else /* GRAN */ + +static void +DonePacking(void) +{ + /* nothing */ +} + +/* + NotYetPacking searches through the whole pack buffer for closure. */ + +static rtsBool +NotYetPacking(closure) +StgClosure *closure; +{ nat i; + rtsBool found = rtsFalse; + + for (i=0; (ibuffer[i]==closure; + + return (!found); +} +# endif + +//@node Packet size, Types of Global Addresses, Offset table, Aux fcts for packing +//@subsubsection Packet size + +/* + RoomToPack determines whether there's room to pack the closure into + the pack buffer based on + + o how full the buffer is already, + o the closures' size and number of pointers (which must be packed as GAs), + o the size and number of pointers held by any primitive arrays that it + points to + + It has a *side-effect* (naughty, naughty) in assigning RoomInBuffer + to rtsFalse. +*/ + +//@cindex RoomToPack +static rtsBool +RoomToPack(size, ptrs) +nat size, ptrs; +{ +# if defined(PAR) + if (RoomInBuffer && + (pack_locn + reservedPAsize + size + + ((clq_size - clq_pos) + ptrs) * PACK_FETCHME_SIZE >= RTS_PACK_BUFFER_SIZE)) + { + IF_PAR_DEBUG(pack, + fprintf(stderr, "Buffer full\n")); + + RoomInBuffer = rtsFalse; + } +# else /* GRAN */ + if (RoomInBuffer && + (unpacked_size + reservedPAsize + size + + ((clq_size - clq_pos) + ptrs) * PACK_FETCHME_SIZE >= RTS_PACK_BUFFER_SIZE)) + { + IF_GRAN_DEBUG(packBuffer, + fprintf(stderr, "Buffer full\n")); + RoomInBuffer = rtsFalse; + } +# endif + return (RoomInBuffer); +} + +//@node Types of Global Addresses, Closure Info, Packet size, Aux fcts for packing +//@subsubsection Types of Global Addresses + +/* + Types of Global Addresses + + These routines determine whether a GA is one of a number of special types + of GA. +*/ + +# if defined(PAR) +//@cindex isOffset +rtsBool +isOffset(ga) +globalAddr *ga; +{ + return (ga->weight == 1 && ga->payload.gc.gtid == 0); +} + +//@cindex isFixed +rtsBool +isFixed(ga) +globalAddr *ga; +{ + return (ga->weight == 0); +} +# endif + +//@node Closure Info, , Types of Global Addresses, Aux fcts for packing +//@subsubsection Closure Info + +/* + Closure Info + + @get_closure_info@ determines the size, number of pointers etc. for this + type of closure -- see @SMInfoTables.lh@ for the legal info. types etc. + +[Can someone please keep this function up to date. I keep needing it + (or something similar) for interpretive code, and it keeps + bit-rotting. {\em It really belongs somewhere else too}. KH @@ 17/2/95] */ + +#if 0 + +// {Parallel.h}Daq ngoqvam vIroQpu' + +# if defined(GRAN) || defined(PAR) +/* extracting specific info out of closure; currently only used in GRAN -- HWL */ +//@cindex get_closure_info +StgInfoTable* +get_closure_info(node, size, ptrs, nonptrs, vhs, info_hdr_ty) +StgClosure* node; +nat *size, *ptrs, *nonptrs, *vhs; +char *info_hdr_ty; +{ + StgInfoTable *info; + + info = get_itbl(node); + /* the switch shouldn't be necessary, really; just use default case */ + switch (info->type) { +#if 0 + case CONSTR_1_0: + case THUNK_1_0: + case FUN_1_0: + *size = sizeW_fromITBL(info); + *ptrs = (nat) 1; // (info->layout.payload.ptrs); + *nonptrs = (nat) 0; // (info->layout.payload.nptrs); + *vhs = (nat) 0; // unknown + info_hdr_type(node, info_hdr_ty); + return info; + + case CONSTR_0_1: + case THUNK_0_1: + case FUN_0_1: + *size = sizeW_fromITBL(info); + *ptrs = (nat) 0; // (info->layout.payload.ptrs); + *nonptrs = (nat) 1; // (info->layout.payload.nptrs); + *vhs = (nat) 0; // unknown + info_hdr_type(node, info_hdr_ty); + return info; + + case CONSTR_2_0: + case THUNK_2_0: + case FUN_2_0: + *size = sizeW_fromITBL(info); + *ptrs = (nat) 2; // (info->layout.payload.ptrs); + *nonptrs = (nat) 0; // (info->layout.payload.nptrs); + *vhs = (nat) 0; // unknown + info_hdr_type(node, info_hdr_ty); + return info; + + case CONSTR_1_1: + case THUNK_1_1: + case FUN_1_1: + *size = sizeW_fromITBL(info); + *ptrs = (nat) 1; // (info->layout.payload.ptrs); + *nonptrs = (nat) 1; // (info->layout.payload.nptrs); + *vhs = (nat) 0; // unknown + info_hdr_type(node, info_hdr_ty); + return info; + + case CONSTR_0_2: + case THUNK_0_2: + case FUN_0_2: + *size = sizeW_fromITBL(info); + *ptrs = (nat) 0; // (info->layout.payload.ptrs); + *nonptrs = (nat) 2; // (info->layout.payload.nptrs); + *vhs = (nat) 0; // unknown + info_hdr_type(node, info_hdr_ty); + return info; +#endif + case RBH: + { + StgInfoTable *rip = REVERT_INFOPTR(info); // closure to revert to + *size = sizeW_fromITBL(rip); + *ptrs = (nat) (rip->layout.payload.ptrs); + *nonptrs = (nat) (rip->layout.payload.nptrs); + *vhs = (nat) 0; // unknown + info_hdr_type(node, info_hdr_ty); + return rip; // NB: we return the reverted info ptr for a RBH!!!!!! + } + + default: + *size = sizeW_fromITBL(info); + *ptrs = (nat) (info->layout.payload.ptrs); + *nonptrs = (nat) (info->layout.payload.nptrs); + *vhs = (nat) 0; // unknown + info_hdr_type(node, info_hdr_ty); + return info; + } +} + +//@cindex IS_BLACK_HOLE +rtsBool +IS_BLACK_HOLE(StgClosure* node) +{ + StgInfoTable *info; + info = get_itbl(node); + return ((info->type == BLACKHOLE || info->type == RBH) ? rtsTrue : rtsFalse); +} + +//@cindex IS_INDIRECTION +StgClosure * +IS_INDIRECTION(StgClosure* node) +{ + StgInfoTable *info; + info = get_itbl(node); + switch (info->type) { + case IND: + case IND_OLDGEN: + case IND_PERM: + case IND_OLDGEN_PERM: + case IND_STATIC: + /* relies on indirectee being at same place for all these closure types */ + return (((StgInd*)node) -> indirectee); + default: + return NULL; + } +} + +/* +rtsBool +IS_THUNK(StgClosure* node) +{ + StgInfoTable *info; + info = get_itbl(node); + return ((info->type == THUNK || + info->type == THUNK_STATIC || + info->type == THUNK_SELECTOR) ? rtsTrue : rtsFalse); +} +*/ + +# endif /* GRAN */ +#endif /* 0 */ + +# if 0 +/* ngoq ngo' */ + +P_ +get_closure_info(closure, size, ptrs, nonptrs, vhs, type) +P_ closure; +W_ *size, *ptrs, *nonptrs, *vhs; +char *type; +{ + P_ ip = (P_) INFO_PTR(closure); + + if (closure==NULL) { + fprintf(stderr, "Qagh {get_closure_info}Daq: NULL closure\n"); + *size = *ptrs = *nonptrs = *vhs = 0; + strcpy(type,"ERROR in get_closure_info"); + return; + } else if (closure==PrelBase_Z91Z93_closure) { + /* fprintf(stderr, "Qagh {get_closure_info}Daq: PrelBase_Z91Z93_closure closure\n"); */ + *size = *ptrs = *nonptrs = *vhs = 0; + strcpy(type,"PrelBase_Z91Z93_closure"); + return; + }; + + ip = (P_) INFO_PTR(closure); + + switch (INFO_TYPE(ip)) { + case INFO_SPEC_U_TYPE: + case INFO_SPEC_S_TYPE: + case INFO_SPEC_N_TYPE: + *size = SPEC_CLOSURE_SIZE(closure); + *ptrs = SPEC_CLOSURE_NoPTRS(closure); + *nonptrs = SPEC_CLOSURE_NoNONPTRS(closure); + *vhs = 0 /*SPEC_VHS*/; + strcpy(type,"SPEC"); + break; + + case INFO_GEN_U_TYPE: + case INFO_GEN_S_TYPE: + case INFO_GEN_N_TYPE: + *size = GEN_CLOSURE_SIZE(closure); + *ptrs = GEN_CLOSURE_NoPTRS(closure); + *nonptrs = GEN_CLOSURE_NoNONPTRS(closure); + *vhs = GEN_VHS; + strcpy(type,"GEN"); + break; + + case INFO_DYN_TYPE: + *size = DYN_CLOSURE_SIZE(closure); + *ptrs = DYN_CLOSURE_NoPTRS(closure); + *nonptrs = DYN_CLOSURE_NoNONPTRS(closure); + *vhs = DYN_VHS; + strcpy(type,"DYN"); + break; + + case INFO_TUPLE_TYPE: + *size = TUPLE_CLOSURE_SIZE(closure); + *ptrs = TUPLE_CLOSURE_NoPTRS(closure); + *nonptrs = TUPLE_CLOSURE_NoNONPTRS(closure); + *vhs = TUPLE_VHS; + strcpy(type,"TUPLE"); + break; + + case INFO_DATA_TYPE: + *size = DATA_CLOSURE_SIZE(closure); + *ptrs = DATA_CLOSURE_NoPTRS(closure); + *nonptrs = DATA_CLOSURE_NoNONPTRS(closure); + *vhs = DATA_VHS; + strcpy(type,"DATA"); + break; + + case INFO_IMMUTUPLE_TYPE: + case INFO_MUTUPLE_TYPE: + *size = MUTUPLE_CLOSURE_SIZE(closure); + *ptrs = MUTUPLE_CLOSURE_NoPTRS(closure); + *nonptrs = MUTUPLE_CLOSURE_NoNONPTRS(closure); + *vhs = MUTUPLE_VHS; + strcpy(type,"(IM)MUTUPLE"); + break; + + case INFO_STATIC_TYPE: + *size = STATIC_CLOSURE_SIZE(closure); + *ptrs = STATIC_CLOSURE_NoPTRS(closure); + *nonptrs = STATIC_CLOSURE_NoNONPTRS(closure); + *vhs = STATIC_VHS; + strcpy(type,"STATIC"); + break; + + case INFO_CAF_TYPE: + case INFO_IND_TYPE: + *size = IND_CLOSURE_SIZE(closure); + *ptrs = IND_CLOSURE_NoPTRS(closure); + *nonptrs = IND_CLOSURE_NoNONPTRS(closure); + *vhs = IND_VHS; + strcpy(type,"CAF|IND"); + break; + + case INFO_CONST_TYPE: + *size = CONST_CLOSURE_SIZE(closure); + *ptrs = CONST_CLOSURE_NoPTRS(closure); + *nonptrs = CONST_CLOSURE_NoNONPTRS(closure); + *vhs = CONST_VHS; + strcpy(type,"CONST"); + break; + + case INFO_SPEC_RBH_TYPE: + *size = SPEC_RBH_CLOSURE_SIZE(closure); + *ptrs = SPEC_RBH_CLOSURE_NoPTRS(closure); + *nonptrs = SPEC_RBH_CLOSURE_NoNONPTRS(closure); + if (*ptrs <= 2) { + *nonptrs -= (2 - *ptrs); + *ptrs = 1; + } else + *ptrs -= 1; + *vhs = SPEC_RBH_VHS; + strcpy(type,"SPEC_RBH"); + break; + + case INFO_GEN_RBH_TYPE: + *size = GEN_RBH_CLOSURE_SIZE(closure); + *ptrs = GEN_RBH_CLOSURE_NoPTRS(closure); + *nonptrs = GEN_RBH_CLOSURE_NoNONPTRS(closure); + if (*ptrs <= 2) { + *nonptrs -= (2 - *ptrs); + *ptrs = 1; + } else + *ptrs -= 1; + *vhs = GEN_RBH_VHS; + strcpy(type,"GEN_RBH"); + break; + + case INFO_CHARLIKE_TYPE: + *size = CHARLIKE_CLOSURE_SIZE(closure); + *ptrs = CHARLIKE_CLOSURE_NoPTRS(closure); + *nonptrs = CHARLIKE_CLOSURE_NoNONPTRS(closure); + *vhs = CHARLIKE_VHS; + strcpy(type,"CHARLIKE"); + break; + + case INFO_INTLIKE_TYPE: + *size = INTLIKE_CLOSURE_SIZE(closure); + *ptrs = INTLIKE_CLOSURE_NoPTRS(closure); + *nonptrs = INTLIKE_CLOSURE_NoNONPTRS(closure); + *vhs = INTLIKE_VHS; + strcpy(type,"INTLIKE"); + break; + +# if !defined(GRAN) + case INFO_FETCHME_TYPE: + *size = FETCHME_CLOSURE_SIZE(closure); + *ptrs = FETCHME_CLOSURE_NoPTRS(closure); + *nonptrs = FETCHME_CLOSURE_NoNONPTRS(closure); + *vhs = FETCHME_VHS; + strcpy(type,"FETCHME"); + break; + + case INFO_FMBQ_TYPE: + *size = FMBQ_CLOSURE_SIZE(closure); + *ptrs = FMBQ_CLOSURE_NoPTRS(closure); + *nonptrs = FMBQ_CLOSURE_NoNONPTRS(closure); + *vhs = FMBQ_VHS; + strcpy(type,"FMBQ"); + break; +# endif + + case INFO_BQ_TYPE: + *size = BQ_CLOSURE_SIZE(closure); + *ptrs = BQ_CLOSURE_NoPTRS(closure); + *nonptrs = BQ_CLOSURE_NoNONPTRS(closure); + *vhs = BQ_VHS; + strcpy(type,"BQ"); + break; + + case INFO_BH_TYPE: + *size = BH_CLOSURE_SIZE(closure); + *ptrs = BH_CLOSURE_NoPTRS(closure); + *nonptrs = BH_CLOSURE_NoNONPTRS(closure); + *vhs = BH_VHS; + strcpy(type,"BH"); + break; + + case INFO_TSO_TYPE: + *size = 0; /* TSO_CLOSURE_SIZE(closure); */ + *ptrs = 0; /* TSO_CLOSURE_NoPTRS(closure); */ + *nonptrs = 0; /* TSO_CLOSURE_NoNONPTRS(closure); */ + *vhs = TSO_VHS; + strcpy(type,"TSO"); + break; + + case INFO_STKO_TYPE: + *size = 0; + *ptrs = 0; + *nonptrs = 0; + *vhs = STKO_VHS; + strcpy(type,"STKO"); + break; + + default: + fprintf(stderr, "get_closure_info: Unexpected closure type (%lu), closure %lx\n", + INFO_TYPE(ip), (StgWord) closure); + EXIT(EXIT_FAILURE); + } + + return ip; +} +# endif + +# if 0 +// Use allocate in Storage.c instead +/* + @AllocateHeap@ will bump the heap pointer by @size@ words if the space + is available, but it will not perform garbage collection. + ToDo: check whether we can use an existing STG allocation routine -- HWL +*/ + + +//@cindex AllocateHeap +StgPtr +AllocateHeap(size) +nat size; +{ + StgPtr newClosure; + + /* Allocate a new closure */ + if (Hp + size > HpLim) + return NULL; + + newClosure = Hp + 1; + Hp += size; + + return newClosure; +} +# endif + +# if defined(PAR) + +//@cindex doGlobalGC +void +doGlobalGC(void) +{ + fprintf(stderr,"Splat -- we just hit global GC!\n"); + stg_exit(EXIT_FAILURE); + //fishing = rtsFalse; + outstandingFishes--; +} + +# endif /* PAR */ + +//@node Printing Packet Contents, End of file, Aux fcts for packing, Graph packing +//@subsection Printing Packet Contents +/* + Printing Packet Contents + */ + +#if defined(DEBUG) || defined(GRAN_CHECK) + +//@cindex PrintPacket + +#if defined(PAR) +void +PrintPacket(packBuffer) +rtsPackBuffer *packBuffer; +{ + StgClosure *parent, *graphroot, *closure_start; + StgInfoTable *ip, *oldip; + globalAddr ga; + StgWord **buffer, **bufptr, **slotptr; + + nat bufsize; + nat pptr = 0, pptrs = 0, pvhs; + nat unpack_locn = 0; + nat gastart = unpack_locn; + nat closurestart = unpack_locn; + nat i; + nat size, ptrs, nonptrs, vhs; + char str[80]; + + /* NB: this whole routine is more or less a copy of UnpackGraph with all + unpacking components replaced by printing fcts + Long live higher-order fcts! + */ + initPackBuffer(); /* in case it isn't already init'd */ + graphroot = (StgClosure *)NULL; + + // gaga = PendingGABuffer; + + InitClosureQueue(); + + /* Unpack the header */ + bufsize = packBuffer->size; + buffer = packBuffer->buffer; + bufptr = buffer; + + /* allocate heap + if (bufsize > 0) { + graph = allocate(bufsize); + ASSERT(graph != NULL); + } + */ + + fprintf(stderr, ".* Printing <<%d>> (buffer @ %p):\n", + packBuffer->id, packBuffer); + fprintf(stderr, ".* size: %d; unpacked_size: %d; tso: %p; buffer: %p\n", + packBuffer->size, packBuffer->unpacked_size, + packBuffer->tso, packBuffer->buffer); + + parent = (StgClosure *)NULL; + + do { + /* This is where we will ultimately save the closure's address */ + slotptr = bufptr; + + /* First, unpack the next GA or PLC */ + ga.weight = (rtsWeight) *bufptr++; + + if (ga.weight > 0) { + ga.payload.gc.gtid = (GlobalTaskId) *bufptr++; + ga.payload.gc.slot = (int) *bufptr++; + } else + ga.payload.plc = (StgPtr) *bufptr++; + + /* Now unpack the closure body, if there is one */ + if (isFixed(&ga)) { + fprintf(stderr, ".* [%u]: PLC @ %p\n", gastart, ga.payload.plc); + // closure = ga.payload.plc; + } else if (isOffset(&ga)) { + fprintf(stderr, ".* [%u]: OFFSET TO [%d]\n", gastart, ga.payload.gc.slot); + // closure = (StgClosure *) buffer[ga.payload.gc.slot]; + } else { + /* Print normal closures */ + + ASSERT(bufsize > 0); + + fprintf(stderr, ".* [%u]: ((%x, %d, %x)) ", gastart, + ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight); + + closure_start = bufptr; + ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str); + + /* + Remember, the generic closure layout is as follows: + +-------------------------------------------------+ + | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS | + +-------------------------------------------------+ + */ + /* Print fixed header */ + fprintf(stderr, "FH ["); + for (i = 0; i < _HS; i++) + fprintf(stderr, " %p", *bufptr++); + + if (ip->type == FETCH_ME) + size = ptrs = nonptrs = vhs = 0; + + /* Print variable header */ + fprintf(stderr, "] VH ["); + for (i = 0; i < vhs; i++) + fprintf(stderr, " %p", *bufptr++); + + fprintf(stderr, "] %d PTRS [", ptrs); + + /* Pointers will be filled in later */ + + fprintf(stderr, " ] %d NON-PTRS [", nonptrs); + /* Print non-pointers */ + for (i = 0; i < nonptrs; i++) + fprintf(stderr, " %p", *bufptr++); + + fprintf(stderr, "] (%s)\n", str); + + /* Indirections are never packed */ + // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE); + + /* Add to queue for processing + When just printing the packet we do not have an unpacked closure + in hand, so we feed it the packet entry; + again, this assumes that at least the fixed header of the closure + has the same layout in the packet; also we may not overwrite entries + in the packet (done in Unpack), but for printing that's a bad idea + anyway */ + QueueClosure((StgClosure *)closure_start); + + /* No Common up needed for printing */ + + /* No Sort out the global address mapping for printing */ + + } /* normal closure case */ + + /* Locate next parent pointer */ + pptr++; + while (pptr + 1 > pptrs) { + parent = DeQueueClosure(); + + if (parent == NULL) + break; + else { + (void) get_closure_info(parent, &size, &pptrs, &nonptrs, + &pvhs, str); + pptr = 0; + } + } + } while (parent != NULL); + fprintf(stderr, ".* --- End packet <<%d>> ---\n", packBuffer->id); +} +#else /* GRAN */ +void +PrintPacket(buffer) +rtsPackBuffer *buffer; +{ + // extern char *info_hdr_type(P_ infoptr); /* defined in Threads.lc */ + // extern char *display_info_type(P_ infoptr); /* defined in Threads.lc */ + + StgInfoTable *info; + nat size, ptrs, nonptrs, vhs; + char info_hdr_ty[80]; + char str1[80], str2[80], junk_str[80]; + + /* globalAddr ga; */ + + nat bufsize, unpacked_size ; + StgClosure *parent; + nat pptr = 0, pptrs = 0, pvhs; + + nat unpack_locn = 0; + nat gastart = unpack_locn; + nat closurestart = unpack_locn; + + StgTSO *tso; + StgClosure *closure, *p; + + nat i; + + fprintf(stderr, "*** Printing <<%d>> (buffer @ %p):\n", buffer->id, buffer); + fprintf(stderr, " size: %d; unpacked_size: %d; tso: %d (%p); buffer: %p\n", + buffer->size, buffer->unpacked_size, buffer->tso, buffer->buffer); + fputs(" contents: ", stderr); + for (unpack_locn=0; unpack_locnsize; unpack_locn++) { + closure = buffer->buffer[unpack_locn]; + fprintf(stderr, ", %p (%s)", + closure, info_type(closure)); + } + fputc('\n', stderr); + +#if 0 + /* traverse all elements of the graph; omitted for now, but might be usefule */ + InitClosureQueue(); + + tso = buffer->tso; + + /* Unpack the header */ + unpacked_size = buffer->unpacked_size; + bufsize = buffer->size; + + fprintf(stderr, "Packet %p, size %u (unpacked size is %u); demanded by TSO %d (%p)[PE %d]\n--- Begin ---\n", + buffer, bufsize, unpacked_size, + tso->id, tso, where_is((StgClosure*)tso)); + + do { + closurestart = unpack_locn; + closure = buffer->buffer[unpack_locn++]; + + fprintf(stderr, "[%u]: (%p) ", closurestart, closure); + + info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str1); + strcpy(str2, str1); + fprintf(stderr, "(%s|%s) ", str1, str2); + + if (info->type == FETCH_ME || info->type == FETCH_ME_BQ || + IS_BLACK_HOLE(closure)) + size = ptrs = nonptrs = vhs = 0; + + if (closure_THUNK(closure)) { + if (closure_UNPOINTED(closure)) + fputs("UNPOINTED ", stderr); + else + fputs("POINTED ", stderr); + } + if (IS_BLACK_HOLE(closure)) { + fputs("BLACK HOLE\n", stderr); + } else { + /* Fixed header */ + fprintf(stderr, "FH ["); + for (i = 0, p = (StgClosure*)&(closure->header); i < _HS; i++, p++) + fprintf(stderr, " %p", *p); + + /* Variable header + if (vhs > 0) { + fprintf(stderr, "] VH [%p", closure->payload[_HS]); + + for (i = 1; i < vhs; i++) + fprintf(stderr, " %p", closure->payload[_HS+i]); + } + */ + fprintf(stderr, "] PTRS %u", ptrs); + + /* Non-pointers */ + if (nonptrs > 0) { + fprintf(stderr, " NPTRS [%p", closure->payload[_HS+vhs]); + + for (i = 1; i < nonptrs; i++) + fprintf(stderr, " %p", closure->payload[_HS+vhs+i]); + + putc(']', stderr); + } + putc('\n', stderr); + } + } while (unpack_locn + $Id: ParInit.c,v 1.2 2000/01/13 14:34:08 hwloidl Exp $ + + Initialising the parallel RTS + + An extension based on Kevin Hammond's GRAPH for PVM version + P. Trinder, January 17th 1995. + Adapted for the new RTS + P. Trinder, July 1997. + H-W. Loidl, November 1999. + + ------------------------------------------------------------------------ */ + +#ifdef PAR /* whole file */ + +#define NON_POSIX_SOURCE /* so says Solaris */ + +//@menu +//* Includes:: +//* Global variables:: +//* Initialisation Routines:: +//@end menu + +//@node Includes, Global variables +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "ParallelRts.h" +#include +#include "LLC.h" +#include "HLC.h" + +//@node Global variables, Initialisation Routines, Includes +//@subsection Global variables + +/* Global conditions defined here. */ + +rtsBool IAmMainThread = rtsFalse, /* Set for the main thread */ + GlobalStopPending = rtsFalse; /* Terminating */ + +/* Task identifiers for various interesting global tasks. */ + +GlobalTaskId IOTask = 0, /* The IO Task Id */ + SysManTask = 0, /* The System Manager Task Id */ + mytid = 0; /* This PE's Task Id */ + +rtsTime main_start_time; /* When the program started */ +rtsTime main_stop_time; /* When the program finished */ +jmp_buf exit_parallel_system; /* How to abort from the RTS */ + + +//rtsBool fishing = rtsFalse; /* We have no fish out in the stream */ +rtsTime last_fish_arrived_at = 0; /* Time of arrival of most recent fish*/ +nat outstandingFishes = 0; /* Number of active fishes */ + +//@cindex spark queue +/* GranSim: a globally visible array of spark queues */ +rtsSpark *pending_sparks_hd[SPARK_POOLS], /* ptr to start of a spark pool */ + *pending_sparks_tl[SPARK_POOLS], /* ptr to end of a spark pool */ + *pending_sparks_lim[SPARK_POOLS], + *pending_sparks_base[SPARK_POOLS]; + +//@cindex spark_limit +/* max number of sparks permitted on the PE; + see RtsFlags.ParFlags.maxLocalSparks */ +nat spark_limit[SPARK_POOLS]; + +globalAddr theGlobalFromGA, theGlobalToGA; +/* + HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK !! see FETCH_ME_entry + Only used within FETCH_ME_entry as local vars, but they shouldn't + be defined locally in there -- that would move %esp and you'll never + return from STG land. + -- HWL +*/ +globalAddr *rga_GLOBAL; +globalAddr *lga_GLOBAL; +globalAddr fmbqga_GLOBAL; +StgClosure *p_GLOBAL; + +//@cindex PendingFetches +/* A list of fetch reply messages not yet processed; this list is filled + by awaken_blocked_queue and processed by processFetches */ +StgBlockedFetch *PendingFetches = END_BF_QUEUE; + +//@cindex allPEs +GlobalTaskId *allPEs; + +//@cindex nPEs +nat nPEs = 0; + +//@cindex sparksIgnored +nat sparksIgnored = 0, sparksCreated = 0, + threadsIgnored = 0, threadsCreated = 0; + +//@cindex advisory_thread_count +nat advisory_thread_count = 0; + +/* Where to write the log file + This is now in Parallel.c +FILE *gr_file = NULL; +char gr_filename[STATS_FILENAME_MAXLEN]; +*/ + +/* Flag handling. */ + +#if 0 +/* that's now all done via RtsFlags.ParFlags... */ +rtsBool TraceSparks = rtsFalse; /* Enable the spark trace mode */ +rtsBool SparkLocally = rtsFalse; /* Use local threads if possible */ +rtsBool DelaySparks = rtsFalse; /* Use delayed sparking */ +rtsBool LocalSparkStrategy = rtsFalse; /* Either delayed threads or local threads*/ +rtsBool GlobalSparkStrategy = rtsFalse; /* Export all threads */ + +rtsBool DeferGlobalUpdates = rtsFalse; /* Defer updating of global nodes */ +#endif + +//@node Initialisation Routines, , Global variables +//@subsection Initialisation Routines + +/* + par_exit defines how to terminate the program. If the exit code is + non-zero (i.e. an error has occurred), the PE should not halt until + outstanding error messages have been processed. Otherwise, messages + might be sent to non-existent Task Ids. The infinite loop will actually + terminate, since STG_Exception will call myexit\tr{(0)} when + it received a PP_FINISH from the system manager task. +*/ +//@cindex par_exit +void +shutdownParallelSystem(StgInt n) +{ + belch(" entered shutdownParallelSystem ..."); + ASSERT(GlobalStopPending = rtsTrue); + sendOp(PP_FINISH, SysManTask); + if (n != 0) + waitForTermination(); + else + waitForPEOp(PP_FINISH, SysManTask); + shutDownPE(); + IF_PAR_DEBUG(verbose, + belch("--++ shutting down PE %lx, %ld sparks created, %ld sparks Ignored, %ld threads created, %ld threads Ignored", + (W_) mytid, sparksCreated, sparksIgnored, + threadsCreated, threadsIgnored)); + exit(n); +} + +/* Some prototypes */ +void srand48 (long); +time_t time (time_t *); + +//@cindex initParallelSystem +void +initParallelSystem(void) +{ + belch("entered initParallelSystem ..."); + + /* Don't buffer standard channels... */ + setbuf(stdout,NULL); + setbuf(stderr,NULL); + + srand48(time(NULL) * getpid()); /*Initialise Random-number generator seed*/ + /* Used to select target of FISH message*/ + + theGlobalFromGA.payload.gc.gtid = 0; + theGlobalToGA.payload.gc.gtid = 0; + + //IF_PAR_DEBUG(verbose, + belch("initPackBuffer ..."); + if (!initPackBuffer()) + barf("initPackBuffer"); + + // IF_PAR_DEBUG(verbose, + belch("initMoreBuffers ..."); + if (!initMoreBuffers()) + barf("initMoreBuffers"); + + // IF_PAR_DEBUG(verbose, + belch("initSparkPools ..."); + if (!initSparkPools()) + barf("initSparkPools"); +} + +/* + * SynchroniseSystem synchronises the reduction task with the system + * manager, and initialises the Global address tables (LAGA & GALA) + */ + +//@cindex SynchroniseSystem +void +SynchroniseSystem(void) +{ + int i; + + fprintf(stderr, "SynchroniseSystem: nPEs=%d\n", nPEs); + + initEachPEHook(); /* HWL: hook to be execed on each PE */ + + fprintf(stderr, "SynchroniseSystem: initParallelSystem\n"); + initParallelSystem(); + allPEs = startUpPE(nPEs); + + /* Initialize global address tables */ + initGAtables(); + + /* Record the shortened the PE identifiers for LAGA etc. tables */ + for (i = 0; i < nPEs; ++i) { + fprintf(stderr, "[%x] registering %d-th PE as %x\n", mytid, i, allPEs[i]); + registerTask(allPEs[i]); + } +} + +#endif /* PAR -- whole file */ + +//@index +//* PendingFetches:: @cindex\s-+PendingFetches +//* SynchroniseSystem:: @cindex\s-+SynchroniseSystem +//* allPEs:: @cindex\s-+allPEs +//* initParallelSystem:: @cindex\s-+initParallelSystem +//* nPEs:: @cindex\s-+nPEs +//* par_exit:: @cindex\s-+par_exit +//* spark queue:: @cindex\s-+spark queue +//* sparksIgnored:: @cindex\s-+sparksIgnored +//@end index diff --git a/ghc/rts/parallel/ParInit.h b/ghc/rts/parallel/ParInit.h new file mode 100644 index 0000000..add7ad9 --- /dev/null +++ b/ghc/rts/parallel/ParInit.h @@ -0,0 +1,19 @@ +/* ----------------------------------------------------------------------------- + * ParInit.h,1 + * + * Phil Trinder + * July 1998 + * + * External Parallel Initialisation Interface + * + * ---------------------------------------------------------------------------*/ + +#ifndef PARINIT_H +#define PARINIT_H + +extern void RunParallelSystem (P_); +extern void initParallelSystem(void); +extern void SynchroniseSystem(void); +extern void par_exit(I_); + +#endif PARINIT_H diff --git a/ghc/rts/parallel/ParTypes.h b/ghc/rts/parallel/ParTypes.h new file mode 100644 index 0000000..b280eae --- /dev/null +++ b/ghc/rts/parallel/ParTypes.h @@ -0,0 +1,39 @@ +/* --------------------------------------------------------------------------- + * Time-stamp: + * $Id: ParTypes.h,v 1.2 2000/01/13 14:34:08 hwloidl Exp $ + * + * Runtime system types for GUM + * + * ------------------------------------------------------------------------- */ + +#ifndef PARTYPES_H +#define PARTYPES_H + +#ifdef PAR /* all of it */ + +// now in Parallel.h +//typedef struct hashtable HashTable; +//typedef struct hashlist HashList; + +/* Global addresses now live in Parallel.h (needed in Closures.h) */ +// gaddr + +// now in Parallel.h +/* (GA, LA) pairs +typedef struct gala { + globalAddr ga; + StgPtr la; + struct gala *next; + rtsBool preferred; +} rtsGaLa; +*/ + +#if defined(GRAN) +typedef unsigned long TIME; +typedef unsigned char Proc; +typedef unsigned char EVTTYPE; +#endif + +#endif /* PAR */ + +#endif /* ! PARTYPES_H */ diff --git a/ghc/rts/parallel/Parallel.c b/ghc/rts/parallel/Parallel.c new file mode 100644 index 0000000..8feb516 --- /dev/null +++ b/ghc/rts/parallel/Parallel.c @@ -0,0 +1,776 @@ +/* + Time-stamp: + + Basic functions for use in either GranSim or GUM. +*/ + +#if defined(GRAN) || defined(PAR) /* whole file */ + +//@menu +//* Includes:: +//* Variables and constants:: +//* Writing to the log-file:: +//* Dumping routines:: +//@end menu + +//@node Includes, Variables and constants +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "GranSimRts.h" +#include "ParallelRts.h" + + +//@node Variables and constants, Writing to the log-file, Includes +//@subsection Variables and constants + +/* Where to write the log file */ +FILE *gr_file = NULL; +char gr_filename[STATS_FILENAME_MAXLEN]; + +//@node Writing to the log-file, Dumping routines, Variables and constants +//@subsection Writing to the log-file +/* + Writing to the log-file + + These routines dump event-based info to the main log-file. + The code for writing log files is shared between GranSim and GUM. +*/ + +/* + * If you're not using GNUC and you're on a 32-bit machine, you're + * probably out of luck here. However, since CONCURRENT currently + * requires GNUC, I'm not too worried about it. --JSM + */ + +//@cindex init_gr_simulation +#if defined(GRAN) +void +init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv) +char *prog_argv[], *rts_argv[]; +int prog_argc, rts_argc; +{ + nat i; + char *extension = RtsFlags.GranFlags.GranSimStats.Binary ? "gb" : "gr"; + + if (RtsFlags.GranFlags.GranSimStats.Global) + init_gr_stats(); + + /* init global constants for costs of basic operations */ + gran_arith_cost = RtsFlags.GranFlags.Costs.arith_cost; + gran_branch_cost = RtsFlags.GranFlags.Costs.branch_cost; + gran_load_cost = RtsFlags.GranFlags.Costs.load_cost; + gran_store_cost = RtsFlags.GranFlags.Costs.store_cost; + gran_float_cost = RtsFlags.GranFlags.Costs.float_cost; + + if (RtsFlags.GranFlags.GranSimStats.Suppressed) + return; + + if (!RtsFlags.GranFlags.GranSimStats.Full) + return; + + sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension); + + if ((gr_file = fopen(gr_filename, "w")) == NULL) { + barf("Can't open granularity simulation report file %s\n", + gr_filename); + } + + setbuf(gr_file, NULL); // for debugging turn buffering off + + /* write header with program name, options and setup to gr_file */ + fputs("Granularity Simulation for ", gr_file); + for (i = 0; i < prog_argc; ++i) { + fputs(prog_argv[i], gr_file); + fputc(' ', gr_file); + } + + if (rts_argc > 0) { + fputs("+RTS ", gr_file); + + for (i = 0; i < rts_argc; ++i) { + fputs(rts_argv[i], gr_file); + fputc(' ', gr_file); + } + } + + fputs("\nStart time: ", gr_file); + fputs(time_str(), gr_file); /* defined in RtsUtils.c */ + fputc('\n', gr_file); + + fputs("\n\n--------------------\n\n", gr_file); + + fputs("General Parameters:\n\n", gr_file); + + if (RtsFlags.GranFlags.Light) + fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n", + RtsFlags.GranFlags.DoFairSchedule?"Fair":"Unfair", + RtsFlags.GranFlags.DoThreadMigration?"":"Don't ", + RtsFlags.GranFlags.DoThreadMigration && RtsFlags.GranFlags.DoStealThreadsFirst?" Before Sparks":"", + RtsFlags.GranFlags.DoAsyncFetch ? "Asynchronous Fetch" : + "Block on Fetch"); + else + fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n", + RtsFlags.GranFlags.proc,RtsFlags.GranFlags.DoFairSchedule?"Fair":"Unfair", + RtsFlags.GranFlags.DoThreadMigration?"":"Don't ", + RtsFlags.GranFlags.DoThreadMigration && RtsFlags.GranFlags.DoStealThreadsFirst?" Before Sparks":"", + RtsFlags.GranFlags.DoAsyncFetch ? "Asynchronous Fetch" : + "Block on Fetch"); + + if (RtsFlags.GranFlags.DoBulkFetching) + if (RtsFlags.GranFlags.ThunksToPack) + fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n", + RtsFlags.GranFlags.ThunksToPack, + RtsFlags.GranFlags.packBufferSize); + else + fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n", + RtsFlags.GranFlags.packBufferSize); + else + fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n"); + + fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n", + RtsFlags.GranFlags.FetchStrategy, + RtsFlags.GranFlags.FetchStrategy==0 ? + " block (block-on-fetch)": + RtsFlags.GranFlags.FetchStrategy==1 ? + "only run runnable threads": + RtsFlags.GranFlags.FetchStrategy==2 ? + "create threads only from local sparks": + RtsFlags.GranFlags.FetchStrategy==3 ? + "create threads from local or global sparks": + RtsFlags.GranFlags.FetchStrategy==4 ? + "create sparks and steal threads if necessary": + "unknown"); + + if (RtsFlags.GranFlags.DoPrioritySparking) + fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n"); + + if (RtsFlags.GranFlags.DoPriorityScheduling) + fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n"); + + fprintf(gr_file, "Thread Creation Time %u, Thread Queue Time %u\n", + RtsFlags.GranFlags.Costs.threadcreatetime, + RtsFlags.GranFlags.Costs.threadqueuetime); + fprintf(gr_file, "Thread DeSchedule Time %u, Thread Schedule Time %u\n", + RtsFlags.GranFlags.Costs.threaddescheduletime, + RtsFlags.GranFlags.Costs.threadscheduletime); + fprintf(gr_file, "Thread Context-Switch Time %u\n", + RtsFlags.GranFlags.Costs.threadcontextswitchtime); + fputs("\n\n--------------------\n\n", gr_file); + + fputs("Communication Metrics:\n\n", gr_file); + fprintf(gr_file, + "Latency %u (1st) %u (rest), Fetch %u, Notify %u (Global) %u (Local)\n", + RtsFlags.GranFlags.Costs.latency, + RtsFlags.GranFlags.Costs.additional_latency, + RtsFlags.GranFlags.Costs.fetchtime, + RtsFlags.GranFlags.Costs.gunblocktime, + RtsFlags.GranFlags.Costs.lunblocktime); + fprintf(gr_file, + "Message Creation %u (+ %u after send), Message Read %u\n", + RtsFlags.GranFlags.Costs.mpacktime, + RtsFlags.GranFlags.Costs.mtidytime, + RtsFlags.GranFlags.Costs.munpacktime); + fputs("\n\n--------------------\n\n", gr_file); + + fputs("Instruction Metrics:\n\n", gr_file); + fprintf(gr_file, "Arith %u, Branch %u, Load %u, Store %u, Float %u, Alloc %u\n", + RtsFlags.GranFlags.Costs.arith_cost, + RtsFlags.GranFlags.Costs.branch_cost, + RtsFlags.GranFlags.Costs.load_cost, + RtsFlags.GranFlags.Costs.store_cost, + RtsFlags.GranFlags.Costs.float_cost, + RtsFlags.GranFlags.Costs.heapalloc_cost); + fputs("\n\n++++++++++++++++++++\n\n", gr_file); + +# if 0 + /* binary log files are currently not supported */ + if (RtsFlags.GranFlags.GranSimStats.Binary) + grputw(sizeof(rtsTime)); +# endif + + return (0); +} + +#elif defined(PAR) + +void +init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv) +char *prog_argv[], *rts_argv[]; +int prog_argc, rts_argc; +{ + nat i; + char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN]; + char *extension = RtsFlags.ParFlags.ParStats.Binary ? "gb" : "gr"; + + sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension); + + if (!RtsFlags.ParFlags.ParStats.Full) + return; + + if ((gr_file = fopen(gr_filename, "w")) == NULL) + barf("Can't open activity report file %s\n", gr_filename); + + setbuf(gr_file, NULL); // for debugging turn buffering off + + /* write header with program name, options and setup to gr_file */ + for (i = 0; i < prog_argc; ++i) { + fputs(prog_argv[i], gr_file); + fputc(' ', gr_file); + } + + if (rts_argc > 0) { + fputs("+RTS ", gr_file); + + for (i = 0; i < rts_argc; ++i) { + fputs(rts_argv[i], gr_file); + fputc(' ', gr_file); + } + } + fputc('\n', gr_file); + + /* record the absolute start time to allow synchronisation of log-files */ + fputs("Start-Time: ", gr_file); + fputs(time_str(), gr_file); + fputc('\n', gr_file); + + startTime = CURRENT_TIME; + ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); + fprintf(gr_file, "PE %2u [%s]: TIME\n", thisPE, time_string); + + /* + IF_PAR_DEBUG(verbose, + belch("== Start-time: %ld (%s)", + startTime, time_string)); + */ +# if 0 + ngoq Dogh'q' vImuS + + if (startTime > LL(1000000000)) { + fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE, + (rtsTime) (startTime / LL(1000000000)), + (rtsTime) (startTime % LL(1000000000))); + } else { + fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime); + } + /* binary log files are currently not supported */ + if (RtsFlags.GranFlags.GranSimStats.Binary) + grputw(sizeof(rtsTime)); +# endif + + return; +} +#endif /* PAR */ + +//@cindex end_gr_simulation +#if defined(GRAN) +void +end_gr_simulation(void) +{ + char time_string[TIME_STR_LEN]; + + ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); + + if (RtsFlags.GranFlags.GranSimStats.Suppressed) + return; + + /* Print event stats */ + if (RtsFlags.GranFlags.GranSimStats.Global) { + nat i; + + fprintf(stderr,"Total yields: %d\n", + globalGranStats.tot_yields); + + fprintf(stderr,"Total number of threads created: %d ; per PE:\n", + globalGranStats.tot_threads_created); + for (i=0; i0) + fprintf(stderr," Number of closure queue overflows: %u\n", + closure_queue_overflows); + */ + } + } /* RtsFlags.GranFlags.GranSimStats.Global */ + +# if defined(GRAN_COUNT) +# error "GRAN_COUNT not supported; should be parallel ticky profiling, really" + fprintf(stderr,"Update count statistics:\n"); + fprintf(stderr," Total number of updates: %u\n",nUPDs); + fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n", + nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ); + fprintf(stderr," Number of PAPs: %u\n",nPAPs); +# endif + + fprintf(stderr, "Simulation finished after @ %s @ cycles. %d sparks created, %d sparks ignored. Check %s for details.\n", + time_string, sparksCreated, sparksIgnored, gr_filename); + + if (RtsFlags.GranFlags.GranSimStats.Full) + fclose(gr_file); +} + +#elif defined(PAR) + +/* + Under GUM we print only one line. +*/ +void +end_gr_simulation(void) +{ + char time_string[TIME_STR_LEN]; + + ullong_format_string(CURRENT_TIME-startTime, time_string, rtsFalse/*no commas!*/); + + fprintf(stderr, "Computation finished after @ %s @ ms. %d sparks created, %d sparks ignored. Check %s for details.\n", + time_string, sparksCreated, sparksIgnored, gr_filename); + + if (RtsFlags.ParFlags.ParStats.Full) + fclose(gr_file); +} +#endif /* PAR */ + +//@node Dumping routines, , Writing to the log-file +//@subsection Dumping routines + +//@cindex DumpGranEvent +void +DumpGranEvent(name, tso) +GranEventType name; +StgTSO *tso; +{ + DumpRawGranEvent(CURRENT_PROC, (PEs)0, name, tso, END_TSO_QUEUE, (StgInt)0, (StgInt)0); +} + +//@cindex DumpRawGranEvent +void +DumpRawGranEvent(proc, p, name, tso, node, sparkname, len) +PEs proc, p; /* proc ... where it happens; p ... where node lives */ +GranEventType name; +StgTSO *tso; +StgClosure *node; +StgInt sparkname, len; +{ + FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1 + StgWord id; + char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN]; + ullong_format_string(TIME_ON_PROC(proc), time_string, rtsFalse/*no commas!*/); + + output_file = gr_file; + ASSERT(output_file!=NULL); +# if defined(GRAN) + IF_DEBUG(gran, + fprintf(stderr, "GRAN: Dumping info to file with handle %#x\n", output_file)) + + if (RtsFlags.GranFlags.GranSimStats.Suppressed) + return; +# endif + + id = tso == NULL ? -1 : tso->id; + if (node==stgCast(StgClosure*,&END_TSO_QUEUE_closure)) + strcpy(node_str,"________"); /* "END_TSO_QUEUE"); */ + else + sprintf(node_str,"0x%-6lx",node); + + if (name > GR_EVENT_MAX) + name = GR_EVENT_MAX; + + if (BINARY_STATS) + barf("binary log files not yet supported"); +#if 0 + /* ToDo: fix code for writing binary GrAnSim statistics */ + switch (name) { + case GR_START: + case GR_STARTQ: + grputw(name); + grputw(proc); + abort(); /* die please: a single word */ + /* doesn't represent long long times */ + grputw(TIME_ON_PROC(proc)); + grputw((StgWord)node); + break; + case GR_FETCH: + case GR_REPLY: + case GR_BLOCK: + grputw(name); + grputw(proc); + abort(); /* die please: a single word */ + /* doesn't represent long long times */ + grputw(TIME_ON_PROC(proc)); /* this line is bound to */ + grputw(id); /* do the wrong thing */ + break; + default: + grputw(name); + grputw(proc); + abort(); /* die please: a single word */ + /* doesn't represent long long times */ + grputw(TIME_ON_PROC(proc)); + grputw((StgWord)node); + } +#endif + else /* !BINARY_STATS */ + switch (name) { + case GR_START: + case GR_STARTQ: + fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\t[sparks %u]\n", + proc,time_string,gran_event_names[name], + id,node_str,sparkname,len); + break; + case GR_FETCH: + case GR_REPLY: + case GR_BLOCK: + case GR_STOLEN: + case GR_STOLENQ: + fprintf(output_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n", + proc, time_string, gran_event_names[name], + id,node_str,p); + break; + case GR_RESUME: + case GR_RESUMEQ: + case GR_SCHEDULE: + case GR_DESCHEDULE: + fprintf(output_file,"PE %2u [%s]: %-9s\t%lx \n", + proc,time_string,gran_event_names[name],id); + break; + case GR_STEALING: + fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t \t(by %2u)\n", + proc,time_string,gran_event_names[name],id,p); + break; + case GR_ALLOC: + fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t \tallocating %u words\n", + proc,time_string,gran_event_names[name],id,len); + break; + default: + fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", + proc,time_string,gran_event_names[name],id,node_str,len); + } +} + +//@cindex DumpGranInfo +void +DumpEndEvent(proc, tso, mandatory_thread) +PEs proc; +StgTSO *tso; +rtsBool mandatory_thread; +{ + FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1 + char time_string[TIME_STR_LEN]; + ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); + + output_file = gr_file; + ASSERT(output_file!=NULL); +#if defined(GRAN) + if (RtsFlags.GranFlags.GranSimStats.Suppressed) + return; +#endif + + if (BINARY_STATS) { + barf("binary log files not yet supported"); +#if 0 + grputw(GR_END); + grputw(proc); + abort(); /* die please: a single word doesn't represent long long times */ + grputw(CURRENT_TIME); /* this line is bound to fail */ + grputw(tso->id); +#ifdef PAR + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); + grputw(0); +#else + grputw(tso->gran.sparkname); + grputw(tso->gran.startedat); + grputw(tso->gran.exported); + grputw(tso->gran.basicblocks); + grputw(tso->gran.allocs); + grputw(tso->gran.exectime); + grputw(tso->gran.blocktime); + grputw(tso->gran.blockcount); + grputw(tso->gran.fetchtime); + grputw(tso->gran.fetchcount); + grputw(tso->gran.localsparks); + grputw(tso->gran.globalsparks); +#endif + grputw(mandatory_thread); +#endif /* 0 */ + } else { + + /* + * NB: DumpGranEvent cannot be used because PE may be wrong + * (as well as the extra info) + */ + fprintf(output_file, "PE %2u [%s]: END %lx, SN %u, ST %lu, EXP %c, BB %u, HA %u, RT %u, BT %u (%u), FT %u (%u), LS %u, GS %u, MY %c\n" + ,proc + ,time_string + ,tso->id +#if defined(GRAN) + ,tso->gran.sparkname + ,tso->gran.startedat + ,tso->gran.exported ? 'T' : 'F' + ,tso->gran.basicblocks + ,tso->gran.allocs + ,tso->gran.exectime + ,tso->gran.blocktime + ,tso->gran.blockcount + ,tso->gran.fetchtime + ,tso->gran.fetchcount + ,tso->gran.localsparks + ,tso->gran.globalsparks +#elif defined(PAR) + ,tso->par.sparkname + ,tso->par.startedat + ,tso->par.exported ? 'T' : 'F' + ,tso->par.basicblocks + ,tso->par.allocs + ,tso->par.exectime + ,tso->par.blocktime + ,tso->par.blockcount + ,tso->par.fetchtime + ,tso->par.fetchcount + ,tso->par.localsparks + ,tso->par.globalsparks +#endif + ,mandatory_thread ? 'T' : 'F' + ); + } +} + +//@cindex DumpTSO +void +DumpTSO(tso) +StgTSO *tso; +{ + FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1 + + output_file = gr_file; + ASSERT(output_file!=NULL); + fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %u, LINK 0x%lx, TYPE %s\n" + ,tso +#if defined(GRAN) + ,tso->gran.sparkname +#elif defined(PAR) + ,tso->par.sparkname +#endif + ,tso->id + ,tso->link + ,/*tso->state==T_MAIN?"MAIN": + TSO_TYPE(tso)==T_FAIL?"FAIL": + TSO_TYPE(tso)==T_REQUIRED?"REQUIRED": + TSO_TYPE(tso)==T_ADVISORY?"ADVISORY": + */ + "???" + ); + + fprintf(output_file,"TSO %lx: SN %u, ST %u, GBL %c, BB %u, HA %u, RT %u, BT %u (%u), FT %u (%u) LS %u, GS %u\n" + ,tso->id +#if defined(GRAN) + ,tso->gran.sparkname + ,tso->gran.startedat + ,tso->gran.exported?'T':'F' + ,tso->gran.basicblocks + ,tso->gran.allocs + ,tso->gran.exectime + ,tso->gran.blocktime + ,tso->gran.blockcount + ,tso->gran.fetchtime + ,tso->gran.fetchcount + ,tso->gran.localsparks + ,tso->gran.globalsparks +#elif defined(PAR) + ,tso->par.sparkname + ,tso->par.startedat + ,tso->par.exported?'T':'F' + ,tso->par.basicblocks + ,tso->par.allocs + ,tso->par.exectime + ,tso->par.blocktime + ,tso->par.blockcount + ,tso->par.fetchtime + ,tso->par.fetchcount + ,tso->par.localsparks + ,tso->par.globalsparks +#endif + ); +} + +#if 0 +/* + ToDo: fix binary output of log files, and support new log file format. +*/ +/* + Output a terminate event and an 8-byte time. +*/ + +//@cindex grterminate +void +grterminate(v) +rtsTime v; +{ + if (!BINARY_STATS) + barf("grterminate: binary statistics not enabled\n"); + +# if defined(GRAN) + if (RtsFlags.GranFlags.GranSimStats.Suppressed) + return; +# endif + + DumpGranEvent(GR_TERMINATE, stgCast(StgTSO*,&END_TSO_QUEUE_closure)); + + if (sizeof(rtsTime) == 4) { + putc('\0', gr_file); + putc('\0', gr_file); + putc('\0', gr_file); + putc('\0', gr_file); + } else { + putc(v >> 56l, gr_file); + putc((v >> 48l) & 0xffl, gr_file); + putc((v >> 40l) & 0xffl, gr_file); + putc((v >> 32l) & 0xffl, gr_file); + } + putc((v >> 24l) & 0xffl, gr_file); + putc((v >> 16l) & 0xffl, gr_file); + putc((v >> 8l) & 0xffl, gr_file); + putc(v & 0xffl, gr_file); +} + +/* + Length-coded output: first 3 bits contain length coding + + 00x 1 byte + 01x 2 bytes + 10x 4 bytes + 110 8 bytes + 111 5 or 9 bytes +*/ + +//@cindex grputw +void +grputw(v) +rtsTime v; +{ + if (!BINARY_STATS) + barf("grputw: binary statistics not enabled\n"); + +# if defined(GRAN) + if (RtsFlags.GranFlags.GranSimStats.Suppressed) + return; +# endif + + if (v <= 0x3fl) { /* length v = 1 byte */ + fputc(v & 0x3f, gr_file); + } else if (v <= 0x3fffl) { /* length v = 2 byte */ + fputc((v >> 8l) | 0x40l, gr_file); + fputc(v & 0xffl, gr_file); + } else if (v <= 0x3fffffffl) { /* length v = 4 byte */ + fputc((v >> 24l) | 0x80l, gr_file); + fputc((v >> 16l) & 0xffl, gr_file); + fputc((v >> 8l) & 0xffl, gr_file); + fputc(v & 0xffl, gr_file); + } else if (sizeof(TIME) == 4) { + fputc(0x70, gr_file); + fputc((v >> 24l) & 0xffl, gr_file); + fputc((v >> 16l) & 0xffl, gr_file); + fputc((v >> 8l) & 0xffl, gr_file); + fputc(v & 0xffl, gr_file); + } else { + if (v <= 0x3fffffffffffffl) + putc((v >> 56l) | 0x60l, gr_file); + else { + putc(0x70, gr_file); + putc((v >> 56l) & 0xffl, gr_file); + } + + putc((v >> 48l) & 0xffl, gr_file); + putc((v >> 40l) & 0xffl, gr_file); + putc((v >> 32l) & 0xffl, gr_file); + putc((v >> 24l) & 0xffl, gr_file); + putc((v >> 16l) & 0xffl, gr_file); + putc((v >> 8l) & 0xffl, gr_file); + putc(v & 0xffl, gr_file); + } +} +#endif /* 0 */ + +#endif /* GRAN || PAR whole file */ diff --git a/ghc/rts/parallel/ParallelDebug.c b/ghc/rts/parallel/ParallelDebug.c new file mode 100644 index 0000000..2924b51 --- /dev/null +++ b/ghc/rts/parallel/ParallelDebug.c @@ -0,0 +1,1390 @@ +/* + Time-stamp: + +Various debugging routines for GranSim and GUM +*/ + +#if defined(GRAN) || defined(PAR) /* whole file */ + +//@node Debugging routines for GranSim and GUM, , , +//@section Debugging routines for GranSim and GUM + +//@menu +//* Includes:: +//* Constants and Variables:: +//* Closures:: +//* Threads:: +//* Events:: +//* Sparks:: +//* Processors:: +//* Shortcuts:: +//* Printing info type:: +//* Printing Pack:et Contents:: +//* End of File:: +//@end menu +//*/ + +//@node Includes, Prototypes, Debugging routines for GranSim and GUM, Debugging routines for GranSim and GUM +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "GranSimRts.h" +#include "ParallelRts.h" +#include "StgMiscClosures.h" +# if defined(DEBUG) +# include "ParallelDebug.h" +# endif + +//@node Prototypes, Constants and Variables, Includes, Debugging routines for GranSim and GUM +//@subsection Prototypes +/* +rtsBool isOffset(globalAddr *ga); +rtsBool isFixed(globalAddr *ga); +*/ +//@node Constants and Variables, Closures, Prototypes, Debugging routines for GranSim and GUM +//@subsection Constants and Variables + +/* Names as strings; needed by get_closure_info in ClosureMacros.h -- HWL */ +static char *closure_type_names[] = { + "INVALID_OBJECT", /* 0 */ + "CONSTR", /* 1 */ + "CONSTR_1_0", /* 2 */ + "CONSTR_0_1", /* 3 */ + "CONSTR_2_0", /* 4 */ + "CONSTR_1_1", /* 5 */ + "CONSTR_0_2", /* 6 */ + "CONSTR_INTLIKE", /* 7 */ + "CONSTR_CHARLIKE", /* 8 */ + "CONSTR_STATIC", /* 9 */ + "CONSTR_NOCAF_STATIC", /* 10 */ + "FUN", /* 11 */ + "FUN_1_0", /* 12 */ + "FUN_0_1", /* 13 */ + "FUN_2_0", /* 14 */ + "FUN_1_1", /* 15 */ + "FUN_0_2", /* 16 */ + "FUN_STATIC", /* 17 */ + "THUNK", /* 18 */ + "THUNK_1_0", /* 19 */ + "THUNK_0_1", /* 20 */ + "THUNK_2_0", /* 21 */ + "THUNK_1_1", /* 22 */ + "THUNK_0_2", /* 23 */ + "THUNK_STATIC", /* 24 */ + "THUNK_SELECTOR", /* 25 */ + "BCO", /* 26 */ + "AP_UPD", /* 27 */ + "PAP", /* 28 */ + "IND", /* 29 */ + "IND_OLDGEN", /* 30 */ + "IND_PERM", /* 31 */ + "IND_OLDGEN_PERM", /* 32 */ + "IND_STATIC", /* 33 */ + "CAF_UNENTERED", /* 34 */ + "CAF_ENTERED", /* 35 */ + "CAF_BLACKHOLE", /* 36 */ + "RET_BCO", /* 37 */ + "RET_SMALL", /* 38 */ + "RET_VEC_SMALL", /* 39 */ + "RET_BIG", /* 40 */ + "RET_VEC_BIG", /* 41 */ + "RET_DYN", /* 42 */ + "UPDATE_FRAME", /* 43 */ + "CATCH_FRAME", /* 44 */ + "STOP_FRAME", /* 45 */ + "SEQ_FRAME", /* 46 */ + "BLACKHOLE", /* 47 */ + "BLACKHOLE_BQ", /* 48 */ + "SE_BLACKHOLE", /* 49 */ + "SE_CAF_BLACKHOLE", /* 50 */ + "MVAR", /* 51 */ + "ARR_WORDS", /* 52 */ + "MUT_ARR_PTRS", /* 53 */ + "MUT_ARR_PTRS_FROZEN", /* 54 */ + "MUT_VAR", /* 55 */ + "WEAK", /* 56 */ + "FOREIGN", /* 57 */ + "STABLE_NAME", /* 58 */ + "TSO", /* 59 */ + "BLOCKED_FETCH", /* 60 */ + "FETCH_ME", /* 61 */ + "EVACUATED", /* 62 */ + "N_CLOSURE_TYPES", /* 63 */ + "FETCH_ME_BQ", /* 64 */ + "RBH" /* 65 */ +}; + + +#if defined(GRAN) && defined(GRAN_CHECK) +//@node Closures, Threads, Constants and Variables, Debugging routines for GranSim and GUM +//@subsection Closures + +void +G_PRINT_NODE(node) +StgClosure* node; +{ + StgInfoTable *info_ptr; + StgTSO* bqe; + nat size = 0, ptrs = 0, nonptrs = 0, i, vhs = 0; + char info_hdr_ty[80], info_ty[80]; + + if (node==NULL) { + fprintf(stderr,"NULL\n"); + return; + } else if (node==END_TSO_QUEUE) { + fprintf(stderr,"END_TSO_QUEUE\n"); + return; + } + /* size_and_ptrs(node,&size,&ptrs); */ + info_ptr = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_hdr_ty); + + /* vhs = var_hdr_size(node); */ + display_info_type(info_ptr,info_ty); + + fprintf(stderr,"Node: 0x%lx", node); + +#if defined(PAR) + fprintf(stderr," [GA: 0x%lx]",GA(node)); +#endif + +#if defined(USE_COST_CENTRES) + fprintf(stderr," [CC: 0x%lx]",CC_HDR(node)); +#endif + +#if defined(GRAN) + fprintf(stderr," [Bitmask: 0%lo]",PROCS(node)); +#endif + + if (info_ptr->type==TSO) + fprintf(stderr," TSO: 0x%lx (%x) IP: 0x%lx (%s), type %s \n ", + (StgTSO*)node, ((StgTSO*)node)->id, info_ptr, info_hdr_ty, info_ty); + else + fprintf(stderr," IP: 0x%lx (%s), type %s \n VHS: %d, size: %ld, ptrs:%ld, nonptrs: %ld\n ", + info_ptr,info_hdr_ty,info_ty,vhs,size,ptrs,nonptrs); + + /* For now, we ignore the variable header */ + + fprintf(stderr," Ptrs: "); + for(i=0; i < ptrs; ++i) + { + if ( (i+1) % 6 == 0) + fprintf(stderr,"\n "); + fprintf(stderr," 0x%lx[P]",node->payload[i]); + }; + + fprintf(stderr," Data: "); + for(i=0; i < nonptrs; ++i) + { + if( (i+1) % 6 == 0) + fprintf(stderr,"\n "); + fprintf(stderr," %lu[D]",node->payload[ptrs+i]); + } + fprintf(stderr, "\n"); + + + switch (info_ptr->type) + { + case TSO: + fprintf(stderr,"\n TSO_LINK: %#lx", + ((StgTSO*)node)->link); + break; + + case BLACKHOLE: + case RBH: + bqe = ((StgBlockingQueue*)node)->blocking_queue; + fprintf(stderr," BQ of %#lx: ", node); + G_PRINT_BQ(bqe); + break; + case FETCH_ME: + case FETCH_ME_BQ: + printf("Panic: found FETCH_ME or FETCH_ME_BQ Infotable in GrAnSim system.\n"); + break; + default: + /* do nothing */ + } +} + +void +G_PPN(node) /* Extracted from PrintPacket in Pack.lc */ +StgClosure* node; +{ + StgInfoTable *info ; + nat size = 0, ptrs = 0, nonptrs = 0, i, vhs = 0, locn = 0; + char info_type[80]; + + /* size_and_ptrs(node,&size,&ptrs); */ + info = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_type); + + if (info->type == FETCH_ME || info->type == FETCH_ME_BQ || + info->type == BLACKHOLE || info->type == RBH ) + size = ptrs = nonptrs = vhs = 0; + + if (closure_THUNK(node)) { + if (!closure_UNPOINTED(node)) + fputs("SHARED ", stderr); + else + fputs("UNSHARED ", stderr); + } + if (info->type==BLACKHOLE) { + fputs("BLACK HOLE\n", stderr); + } else { + /* Fixed header */ + fprintf(stderr, "(%s) FH [%#lx", info_type, node[locn++]); + for (i = 1; i < FIXED_HS; i++) + fprintf(stderr, " %#lx", node[locn++]); + + /* Variable header */ + if (vhs > 0) { + fprintf(stderr, "] VH [%#lx", node->payload[0]); + + for (i = 1; i < vhs; i++) + fprintf(stderr, " %#lx", node->payload[i]); + } + + fprintf(stderr, "] PTRS %u", ptrs); + + /* Non-pointers */ + if (nonptrs > 0) { + fprintf(stderr, " NPTRS [%#lx", node->payload[ptrs]); + + for (i = 1; i < nonptrs; i++) + fprintf(stderr, " %#lx", node->payload[ptrs+i]); + + putc(']', stderr); + } + putc('\n', stderr); + } + +} + +#if 0 +// ToDo: fix this!! -- HWL +void +G_INFO_TABLE(node) +StgClosure *node; +{ + StgInfoTable *info_ptr; + nat size = 0, ptrs = 0, nonptrs = 0, vhs = 0; + char info_type[80], hdr_type[80]; + + info_hdr_type(info_ptr, hdr_type); + + // get_itbl(node); + info_ptr = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_type); + fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n", + info_type,info_ptr,(W_) ENTRY_CODE(info_ptr), + size, ptrs); + // INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr)); + + if (closure_THUNK(node) && !closure_UNPOINTED(node) ) { + fprintf(stderr," RBH InfoPtr: %#lx\n", + RBH_INFOPTR(info_ptr)); + } + +#if defined(PAR) + fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr)); +#endif + +#if defined(USE_COST_CENTRES) + fprintf(stderr,"Cost Centre (???): 0x%lx\n",INFO_CAT(info_ptr)); +#endif + +#if defined(_INFO_COPYING) + fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n", + INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr)); +#endif + +#if defined(_INFO_COMPACTING) + fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n", + (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr)); + fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t", + (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr)); +#if 0 /* avoid INFO_TYPE */ + if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE) + fprintf(stderr,"plus specialised code\n"); + else + fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr)); +#endif /* 0 */ +#endif /* _INFO_COMPACTING */ +} +#endif /* 0 */ + +//@cindex G_PRINT_BQ +void +G_PRINT_BQ(node) +StgClosure* node; +{ + StgInfoTable *info; + StgTSO *tso, *last; + char str[80], str0[80]; + + fprintf(stderr,"\n[PE %d] @ %lu BQ: ", + CurrentProc,CurrentTime[CurrentProc]); + if ( node == (StgClosure*)NULL ) { + fprintf(stderr," NULL.\n"); + return; + } + if ( node == END_TSO_QUEUE ) { + fprintf(stderr," _|_\n"); + return; + } + tso = ((StgBlockingQueue*)node)->blocking_queue; + while (node != END_TSO_QUEUE) { + PEs proc; + + /* Find where the tso lives */ + proc = where_is(node); + info = get_itbl(node); + + switch (info->type) { + case TSO: + strcpy(str0,"TSO"); + break; + case BLOCKED_FETCH: + strcpy(str0,"BLOCKED_FETCH"); + break; + default: + strcpy(str0,"???"); + break; + } + + if(proc == CurrentProc) + fprintf(stderr," %#lx (%x) L %s,", + node, ((StgBlockingQueue*)node)->blocking_queue, str0); + else + fprintf(stderr," %#lx (%x) G (PE %d) %s,", + node, ((StgBlockingQueue*)node)->blocking_queue, proc, str0); + + last = tso; + tso = last->link; + } + if ( tso == END_TSO_QUEUE ) + fprintf(stderr," _|_\n"); +} + +//@node Threads, Events, Closures, Debugging routines for GranSim and GUM +//@subsection Threads + +void +G_CURR_THREADQ(verbose) +StgInt verbose; +{ + fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc); + G_THREADQ(run_queue_hd, verbose); +} + +void +G_THREADQ(closure, verbose) +StgTSO* closure; +StgInt verbose; +{ + StgTSO* x; + + fprintf(stderr,"Thread Queue: "); + for (x=closure; x!=END_TSO_QUEUE; x=x->link) + if (verbose) + G_TSO(x,0); + else + fprintf(stderr," %#lx",x); + + if (closure==END_TSO_QUEUE) + fprintf(stderr,"NIL\n"); + else + fprintf(stderr,"\n"); +} + +void +G_TSO(closure,verbose) +StgTSO* closure; +StgInt verbose; +{ + + if (closure==END_TSO_QUEUE) { + fprintf(stderr,"TSO at %#lx is END_TSO_QUEUE!\n"); + return; + } + + if ( verbose & 0x08 ) { /* short info */ + fprintf(stderr,"[TSO @ %#lx, PE %d]: Id: %#lx, Link: %#lx\n", + closure,where_is(closure), + closure->id,closure->link); + return; + } + + fprintf(stderr,"TSO at %#lx has the following contents:\n", + closure); + + fprintf(stderr,"> Id: \t%#lx",closure->id); + // fprintf(stderr,"\tstate: \t%#lx",closure->state); + fprintf(stderr,"\twhatNext: \t%#lx",closure->whatNext); + fprintf(stderr,"\tlink: \t%#lx\n",closure->link); + // fprintf(stderr,"\tType: \t%s\n",type_name[TSO_TYPE(closure)]); + fprintf(stderr,">PRI: \t%#lx", closure->gran.pri); + fprintf(stderr,"\tMAGIC: \t%#lx %s\n", closure->gran.magic, + (closure->gran.magic==TSO_MAGIC ? "it IS a TSO" : "THIS IS NO TSO!!")); + if ( verbose & 0x04 ) { + fprintf(stderr, "Stack: stack @ %#lx (stack_size: %u; max_stack_size: %u)\n", + closure->stack, closure->stack_size, closure->max_stack_size); + fprintf(stderr, " sp: %#lx, su: %#lx, splim: %#lx\n", + closure->sp, closure->su, closure->splim); + } + // fprintf(stderr,"\n"); + if (verbose & 0x01) { + // fprintf(stderr,"} LOCKED: \t%#lx",closure->locked); + fprintf(stderr,"} SPARKNAME: \t%#lx\n", closure->gran.sparkname); + fprintf(stderr,"} STARTEDAT: \t%#lx", closure->gran.startedat); + fprintf(stderr,"\tEXPORTED: \t%#lx\n", closure->gran.exported); + fprintf(stderr,"} BASICBLOCKS: \t%#lx", closure->gran.basicblocks); + fprintf(stderr,"\tALLOCS: \t%#lx\n", closure->gran.allocs); + fprintf(stderr,"} EXECTIME: \t%#lx", closure->gran.exectime); + fprintf(stderr,"\tFETCHTIME: \t%#lx\n", closure->gran.fetchtime); + fprintf(stderr,"} FETCHCOUNT: \t%#lx", closure->gran.fetchcount); + fprintf(stderr,"\tBLOCKTIME: \t%#lx\n", closure->gran.blocktime); + fprintf(stderr,"} BLOCKCOUNT: \t%#lx", closure->gran.blockcount); + fprintf(stderr,"\tBLOCKEDAT: \t%#lx\n", closure->gran.blockedat); + fprintf(stderr,"} GLOBALSPARKS:\t%#lx", closure->gran.globalsparks); + fprintf(stderr,"\tLOCALSPARKS:\t%#lx\n", closure->gran.localsparks); + } + if ( verbose & 0x02 ) { + fprintf(stderr,"BQ that starts with this TSO: "); + G_PRINT_BQ(closure); + } +} + +//@node Events, Sparks, Threads, Debugging routines for GranSim and GUM +//@subsection Events + +void +G_EVENT(event, verbose) +rtsEventQ event; +StgInt verbose; +{ + if (verbose) { + print_event(event); + }else{ + fprintf(stderr," %#lx",event); + } +} + +void +G_EVENTQ(verbose) +StgInt verbose; +{ + extern rtsEventQ EventHd; + rtsEventQ x; + + fprintf(stderr,"RtsEventQ (hd @%#lx):\n",EventHd); + for (x=EventHd; x!=NULL; x=x->next) { + G_EVENT(x,verbose); + } + if (EventHd==NULL) + fprintf(stderr,"NIL\n"); + else + fprintf(stderr,"\n"); +} + +void +G_PE_EQ(pe,verbose) +PEs pe; +StgInt verbose; +{ + extern rtsEventQ EventHd; + rtsEventQ x; + + fprintf(stderr,"RtsEventQ (hd @%#lx):\n",EventHd); + for (x=EventHd; x!=NULL; x=x->next) { + if (x->proc==pe) + G_EVENT(x,verbose); + } + if (EventHd==NULL) + fprintf(stderr,"NIL\n"); + else + fprintf(stderr,"\n"); +} + +//@node Sparks, Processors, Events, Debugging routines for GranSim and GUM +//@subsection Sparks + +void +G_SPARK(spark, verbose) +rtsSparkQ spark; +StgInt verbose; +{ + if (spark==(rtsSpark*)NULL) { + belch("G_SPARK: NULL spark; aborting"); + return; + } + if (verbose) + print_spark(spark); + else + fprintf(stderr," %#lx",spark); +} + +void +G_SPARKQ(spark,verbose) +rtsSparkQ spark; +StgInt verbose; +{ + rtsSparkQ x; + + if (spark==(rtsSpark*)NULL) { + belch("G_SPARKQ: NULL spark; aborting"); + return; + } + + fprintf(stderr,"RtsSparkQ (hd @%#lx):\n",spark); + for (x=spark; x!=NULL; x=x->next) { + G_SPARK(x,verbose); + } + if (spark==NULL) + fprintf(stderr,"NIL\n"); + else + fprintf(stderr,"\n"); +} + +void +G_CURR_SPARKQ(verbose) +StgInt verbose; +{ + G_SPARKQ(pending_sparks_hd,verbose); +} + +//@node Processors, Shortcuts, Sparks, Debugging routines for GranSim and GUM +//@subsection Processors + +void +G_PROC(proc,verbose) +StgInt proc; +StgInt verbose; +{ + extern rtsEventQ EventHd; + extern char *proc_status_names[]; + + fprintf(stderr,"Status of proc %d at time %d (%#lx): %s (%s)\n", + proc,CurrentTime[proc],CurrentTime[proc], + (CurrentProc==proc)?"ACTIVE":"INACTIVE", + proc_status_names[procStatus[proc]]); + G_THREADQ(run_queue_hds[proc],verbose & 0x2); + if ( (CurrentProc==proc) ) + G_TSO(CurrentTSO,1); + + if (EventHd!=NULL) + fprintf(stderr,"Next event (%s) is on proc %d\n", + event_names[EventHd->evttype],EventHd->proc); + + if (verbose & 0x1) { + fprintf(stderr,"\nREQUIRED sparks: "); + G_SPARKQ(pending_sparks_hds[proc],1); + fprintf(stderr,"\nADVISORY_sparks: "); + G_SPARKQ(pending_sparks_hds[proc],1); + } +} + +//@node Shortcuts, Printing info type, Processors, Debugging routines for GranSim and GUM +//@subsection Shortcuts + +/* Debug Processor */ +void +GP(proc) +StgInt proc; +{ G_PROC(proc,1); +} + +/* Debug Current Processor */ +void +GCP(){ G_PROC(CurrentProc,2); } + +/* Debug TSO */ +void +GT(StgPtr tso){ + G_TSO(tso,1); +} + +/* Debug CurrentTSO */ +void +GCT(){ + fprintf(stderr,"Current Proc: %d\n",CurrentProc); + G_TSO(CurrentTSO,1); +} + +/* Shorthand for debugging event queue */ +void +GEQ() { G_EVENTQ(1); } + +/* Shorthand for debugging thread queue of a processor */ +void +GTQ(PEs p) { G_THREADQ(run_queue_hds[p],1); } + +/* Shorthand for debugging thread queue of current processor */ +void +GCTQ() { G_THREADQ(run_queue_hds[CurrentProc],1); } + +/* Shorthand for debugging spark queue of a processor */ +void +GSQ(PEs p) { G_SPARKQ(pending_sparks_hds[p],1); } + +/* Shorthand for debugging spark queue of current processor */ +void +GCSQ() { G_CURR_SPARKQ(1); } + +/* Shorthand for printing a node */ +void +GN(StgPtr node) { G_PRINT_NODE(node); } + +/* Shorthand for printing info table */ +#if 0 +// ToDo: fix -- HWL +void +GIT(StgPtr node) { G_INFO_TABLE(node); } +#endif + +void +printThreadQPtrs(void) +{ + PEs p; + for (p=0; ptype]; +} + +char * +info_type_by_ip(StgInfoTable *ip){ + return closure_type_names[ip->type]; +} + +void +info_hdr_type(StgClosure *closure, char *res){ + strcpy(res,closure_type_names[get_itbl(closure)->type]); +} + +/* + PrintPacket is in Pack.c because it makes use of closure queues +*/ + +#if defined(GRAN) || defined(PAR) + +/* + Print graph rooted at q. The structure of this recursive printing routine + should be the same as in the graph traversals when packing a graph in + GUM. Thus, it demonstrates the structure of such a generic graph + traversal, and in particular, how to extract pointer and non-pointer info + from the multitude of different heap objects available. + + {evacuate}Daq ngoqvam nIHlu'pu'!! +*/ + +void +PrintGraph(StgClosure *p, int indent_level) +{ + StgPtr x, q; + rtsBool printed = rtsFalse; + nat i, j; + const StgInfoTable *info; + + q = p; /* save ptr to object */ + + /* indentation */ + for (j=0; j type) { + + case BCO: + { + StgBCO* bco = stgCast(StgBCO*,p); + nat i; + fprintf(stderr, "BCO (%p) with %d pointers\n", p, bco->n_ptrs); + for (i = 0; i < bco->n_ptrs; i++) { + // bcoConstCPtr(bco,i) = + PrintGraph(bcoConstCPtr(bco,i), indent_level+1); + } + // p += bco_sizeW(bco); + break; + } + + case MVAR: + /* treat MVars specially, because we don't want to PrintGraph the + * mut_link field in the middle of the closure. + */ + { + StgMVar *mvar = ((StgMVar *)p); + // evac_gen = 0; + fprintf(stderr, "MVAR (%p) with 3 pointers (head, tail, value)\n", p); + // (StgClosure *)mvar->head = + PrintGraph((StgClosure *)mvar->head, indent_level+1); + // (StgClosure *)mvar->tail = + PrintGraph((StgClosure *)mvar->tail, indent_level+1); + //(StgClosure *)mvar->value = + PrintGraph((StgClosure *)mvar->value, indent_level+1); + // p += sizeofW(StgMVar); + // evac_gen = saved_evac_gen; + break; + } + + case THUNK_2_0: + if (!printed) { + fprintf(stderr, "THUNK_2_0 (%p) with 2 pointers\n", p); + printed = rtsTrue; + } + case FUN_2_0: + if (!printed) { + fprintf(stderr, "FUN_2_0 (%p) with 2 pointers\n", p); + printed = rtsTrue; + } + // scavenge_srt(info); + case CONSTR_2_0: + if (!printed) { + fprintf(stderr, "CONSTR_2_0 (%p) with 2 pointers\n", p); + printed = rtsTrue; + } + // ((StgClosure *)p)->payload[0] = + PrintGraph(((StgClosure *)p)->payload[0], + indent_level+1); + // ((StgClosure *)p)->payload[1] = + PrintGraph(((StgClosure *)p)->payload[1], + indent_level+1); + // p += sizeofW(StgHeader) + 2; + break; + + case THUNK_1_0: + // scavenge_srt(info); + fprintf(stderr, "THUNK_1_0 (%p) with 1 pointer\n", p); + // ((StgClosure *)p)->payload[0] = + PrintGraph(((StgClosure *)p)->payload[0], + indent_level+1); + // p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */ + break; + + case FUN_1_0: + if (!printed) { + fprintf(stderr, "FUN_1_0 (%p) with 1 pointer\n", p); + printed = rtsTrue; + } + // scavenge_srt(info); + case CONSTR_1_0: + if (!printed) { + fprintf(stderr, "CONSTR_2_0 (%p) with 2 pointers\n", p); + printed = rtsTrue; + } + // ((StgClosure *)p)->payload[0] = + PrintGraph(((StgClosure *)p)->payload[0], + indent_level+1); + // p += sizeofW(StgHeader) + 1; + break; + + case THUNK_0_1: + fprintf(stderr, "THUNK_0_1 (%p) with 0 pointers\n", p); + // scavenge_srt(info); + // p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */ + break; + + case FUN_0_1: + fprintf(stderr, "FUN_0_1 (%p) with 0 pointers\n", p); + //scavenge_srt(info); + case CONSTR_0_1: + fprintf(stderr, "CONSTR_0_1 (%p) with 0 pointers\n", p); + //p += sizeofW(StgHeader) + 1; + break; + + case THUNK_0_2: + if (!printed) { + fprintf(stderr, "THUNK_0_2 (%p) with 0 pointers\n", p); + printed = rtsTrue; + } + case FUN_0_2: + if (!printed) { + fprintf(stderr, "FUN_0_2 (%p) with 0 pointers\n", p); + printed = rtsTrue; + } + // scavenge_srt(info); + case CONSTR_0_2: + if (!printed) { + fprintf(stderr, "CONSTR_0_2 (%p) with 0 pointers\n", p); + printed = rtsTrue; + } + // p += sizeofW(StgHeader) + 2; + break; + + case THUNK_1_1: + if (!printed) { + fprintf(stderr, "THUNK_1_1 (%p) with 1 pointer\n", p); + printed = rtsTrue; + } + case FUN_1_1: + if (!printed) { + fprintf(stderr, "FUN_1_1 (%p) with 1 pointer\n", p); + printed = rtsTrue; + } + // scavenge_srt(info); + case CONSTR_1_1: + if (!printed) { + fprintf(stderr, "CONSTR_1_1 (%p) with 1 pointer\n", p); + printed = rtsTrue; + } + // ((StgClosure *)p)->payload[0] = + PrintGraph(((StgClosure *)p)->payload[0], + indent_level+1); + // p += sizeofW(StgHeader) + 2; + break; + + case FUN: + if (!printed) { + fprintf(stderr, "FUN (%p) with %d pointers\n", p, info->layout.payload.ptrs); + printed = rtsTrue; + } + /* fall through */ + + case THUNK: + if (!printed) { + fprintf(stderr, "THUNK (%p) with %d pointers\n", p, info->layout.payload.ptrs); + printed = rtsTrue; + } + // scavenge_srt(info); + /* fall through */ + + case CONSTR: + if (!printed) { + fprintf(stderr, "CONSTR (%p) with %d pointers\n", p, info->layout.payload.ptrs); + printed = rtsTrue; + } + /* basically same as loop in STABLE_NAME case */ + for (i=0; ilayout.payload.ptrs; i++) + PrintGraph(((StgClosure *)p)->payload[i], + indent_level+1); + break; + /* NOT fall through */ + + case WEAK: + if (!printed) { + fprintf(stderr, "WEAK (%p) with %d pointers\n", p, info->layout.payload.ptrs); + printed = rtsTrue; + } + /* fall through */ + + case FOREIGN: + if (!printed) { + fprintf(stderr, "FOREIGN (%p) with %d pointers\n", p, info->layout.payload.ptrs); + printed = rtsTrue; + } + /* fall through */ + + case STABLE_NAME: + { + StgPtr end; + + if (!printed) { + fprintf(stderr, "STABLE_NAME (%p) with %d pointers (not followed!)\n", + p, info->layout.payload.ptrs); + printed = rtsTrue; + } + end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs; + for (p = (StgPtr)((StgClosure *)p)->payload; p < end; p++) { + // (StgClosure *)*p = + //PrintGraph((StgClosure *)*p, indent_level+1); + fprintf(stderr, ", %p", *p); + } + //fputs("\n", stderr); + // p += info->layout.payload.nptrs; + break; + } + + case IND_PERM: + //if (step->gen->no != 0) { + // SET_INFO(((StgClosure *)p), &IND_OLDGEN_PERM_info); + //} + if (!printed) { + fprintf(stderr, "IND_PERM (%p) with indirection to\n", + p, ((StgIndOldGen *)p)->indirectee); + printed = rtsTrue; + } + /* fall through */ + + case IND_OLDGEN_PERM: + if (!printed) { + fprintf(stderr, "IND_OLDGEN_PERM (%p) with indirection to %p\n", + p, ((StgIndOldGen *)p)->indirectee); + printed = rtsTrue; + } + // ((StgIndOldGen *)p)->indirectee = + PrintGraph(((StgIndOldGen *)p)->indirectee, + indent_level+1); + //if (failed_to_evac) { + // failed_to_evac = rtsFalse; + // recordOldToNewPtrs((StgMutClosure *)p); + //} + // p += sizeofW(StgIndOldGen); + break; + + case CAF_UNENTERED: + { + StgCAF *caf = (StgCAF *)p; + + fprintf(stderr, "CAF_UNENTERED (%p) pointing to %p\n", p, caf->body); + PrintGraph(caf->body, indent_level+1); + //if (failed_to_evac) { + // failed_to_evac = rtsFalse; + // recordOldToNewPtrs((StgMutClosure *)p); + //} else { + // caf->mut_link = NULL; + //} + //p += sizeofW(StgCAF); + break; + } + + case CAF_ENTERED: + { + StgCAF *caf = (StgCAF *)p; + + fprintf(stderr, "CAF_ENTERED (%p) pointing to %p and %p\n", + p, caf->body, caf->value); + // caf->body = + PrintGraph(caf->body, indent_level+1); + //caf->value = + PrintGraph(caf->value, indent_level+1); + //if (failed_to_evac) { + // failed_to_evac = rtsFalse; + // recordOldToNewPtrs((StgMutClosure *)p); + //} else { + // caf->mut_link = NULL; + //} + //p += sizeofW(StgCAF); + break; + } + + case MUT_VAR: + /* ignore MUT_CONSs */ + fprintf(stderr, "MUT_VAR (%p) pointing to %p\n", p, ((StgMutVar *)p)->var); + if (((StgMutVar *)p)->header.info != &MUT_CONS_info) { + //evac_gen = 0; + PrintGraph(((StgMutVar *)p)->var, indent_level+1); + //evac_gen = saved_evac_gen; + } + //p += sizeofW(StgMutVar); + break; + + case CAF_BLACKHOLE: + if (!printed) { + fprintf(stderr, "CAF_BLACKHOLE (%p) with 0 pointers\n", p); + printed = rtsTrue; + } + case SE_CAF_BLACKHOLE: + if (!printed) { + fprintf(stderr, "SE_CAF_BLACKHOLE (%p) with 0 pointers\n", p); + printed = rtsTrue; + } + case SE_BLACKHOLE: + if (!printed) { + fprintf(stderr, "SE_BLACKHOLE (%p) with 0 pointers\n", p); + printed = rtsTrue; + } + case BLACKHOLE: + if (!printed) { + fprintf(stderr, "BLACKHOLE (%p) with 0 pointers\n", p); + printed = rtsTrue; + } + //p += BLACKHOLE_sizeW(); + break; + + case BLACKHOLE_BQ: + { + StgBlockingQueue *bh = (StgBlockingQueue *)p; + // (StgClosure *)bh->blocking_queue = + fprintf(stderr, "BLACKHOLE_BQ (%p) pointing to %p\n", + p, (StgClosure *)bh->blocking_queue); + PrintGraph((StgClosure *)bh->blocking_queue, indent_level+1); + //if (failed_to_evac) { + // failed_to_evac = rtsFalse; + // recordMutable((StgMutClosure *)bh); + //} + // p += BLACKHOLE_sizeW(); + break; + } + + case THUNK_SELECTOR: + { + StgSelector *s = (StgSelector *)p; + fprintf(stderr, "THUNK_SELECTOR (%p) pointing to %p\n", + p, s->selectee); + PrintGraph(s->selectee, indent_level+1); + // p += THUNK_SELECTOR_sizeW(); + break; + } + + case IND: + fprintf(stderr, "IND (%p) pointing to %p\n", p, ((StgInd*)p)->indirectee); + PrintGraph(((StgInd*)p)->indirectee, indent_level+1); + break; + + case IND_OLDGEN: + fprintf(stderr, "IND_OLDGEN (%p) pointing to %p\n", + p, ((StgIndOldGen*)p)->indirectee); + PrintGraph(((StgIndOldGen*)p)->indirectee, indent_level+1); + break; + + case CONSTR_INTLIKE: + fprintf(stderr, "CONSTR_INTLIKE (%p) with 0 pointers\n", p); + break; + case CONSTR_CHARLIKE: + fprintf(stderr, "CONSTR_CHARLIKE (%p) with 0 pointers\n", p); + break; + case CONSTR_STATIC: + fprintf(stderr, "CONSTR_STATIC (%p) with 0 pointers\n", p); + break; + case CONSTR_NOCAF_STATIC: + fprintf(stderr, "CONSTR_NOCAF_STATIC (%p) with 0 pointers\n", p); + break; + case THUNK_STATIC: + fprintf(stderr, "THUNK_STATIC (%p) with 0 pointers\n", p); + break; + case FUN_STATIC: + fprintf(stderr, "FUN_STATIC (%p) with 0 pointers\n", p); + break; + case IND_STATIC: + fprintf(stderr, "IND_STATIC (%p) with 0 pointers\n", p); + break; + + case RET_BCO: + fprintf(stderr, "RET_BCO (%p) with 0 pointers\n", p); + break; + case RET_SMALL: + fprintf(stderr, "RET_SMALL (%p) with 0 pointers\n", p); + break; + case RET_VEC_SMALL: + fprintf(stderr, "RET_VEC_SMALL (%p) with 0 pointers\n", p); + break; + case RET_BIG: + fprintf(stderr, "RET_BIG (%p) with 0 pointers\n", p); + break; + case RET_VEC_BIG: + fprintf(stderr, "RET_VEC_BIG (%p) with 0 pointers\n", p); + break; + case RET_DYN: + fprintf(stderr, "RET_DYN (%p) with 0 pointers\n", p); + break; + case UPDATE_FRAME: + fprintf(stderr, "UPDATE_FRAME (%p) with 0 pointers\n", p); + break; + case STOP_FRAME: + fprintf(stderr, "STOP_FRAME (%p) with 0 pointers\n", p); + break; + case CATCH_FRAME: + fprintf(stderr, "CATCH_FRAME (%p) with 0 pointers\n", p); + break; + case SEQ_FRAME: + fprintf(stderr, "SEQ_FRAME (%p) with 0 pointers\n", p); + break; + + case AP_UPD: /* same as PAPs */ + fprintf(stderr, "AP_UPD (%p) with 0 pointers\n", p); + case PAP: + /* Treat a PAP just like a section of stack, not forgetting to + * PrintGraph the function pointer too... + */ + { + StgPAP* pap = stgCast(StgPAP*,p); + + fprintf(stderr, "PAP (%p) pointing to %p\n", p, pap->fun); + // pap->fun = + PrintGraph(pap->fun, indent_level+1); + //scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args); + //p += pap_sizeW(pap); + break; + } + + case ARR_WORDS: + fprintf(stderr, "ARR_WORDS (%p) with 0 pointers\n", p); + /* nothing to follow */ + //p += arr_words_sizeW(stgCast(StgArrWords*,p)); + break; + + case MUT_ARR_PTRS: + /* follow everything */ + { + StgPtr next; + + fprintf(stderr, "MUT_ARR_PTRS (%p) with %d pointers (not followed)\n", + p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p)); + // evac_gen = 0; /* repeatedly mutable */ + next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); + for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { + // (StgClosure *)*p = + // PrintGraph((StgClosure *)*p, indent_level+1); + fprintf(stderr, ", %p", *p); + } + fputs("\n", stderr); + //evac_gen = saved_evac_gen; + break; + } + + case MUT_ARR_PTRS_FROZEN: + /* follow everything */ + { + StgPtr start = p, next; + + fprintf(stderr, "MUT_ARR_PTRS (%p) with %d pointers (not followed)", + p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p)); + next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); + for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { + // (StgClosure *)*p = + // PrintGraph((StgClosure *)*p, indent_level+1); + fprintf(stderr, ", %p", *p); + } + fputs("\n", stderr); + //if (failed_to_evac) { + /* we can do this easier... */ + // recordMutable((StgMutClosure *)start); + // failed_to_evac = rtsFalse; + //} + break; + } + + case TSO: + { + StgTSO *tso; + + tso = (StgTSO *)p; + fprintf(stderr, "TSO (%p) with link field %p\n", p, (StgClosure *)tso->link); + // evac_gen = 0; + /* chase the link field for any TSOs on the same queue */ + // (StgClosure *)tso->link = + PrintGraph((StgClosure *)tso->link, indent_level+1); + //if (tso->blocked_on) { + // tso->blocked_on = PrintGraph(tso->blocked_on); + //} + /* scavenge this thread's stack */ + //scavenge_stack(tso->sp, &(tso->stack[tso->stack_size])); + //evac_gen = saved_evac_gen; + //p += tso_sizeW(tso); + break; + } + +#if defined(GRAN) || defined(PAR) + case RBH: + { + StgInfoTable *rip = REVERT_INFOPTR(get_itbl(p)); + //if (LOOKS_LIKE_GHC_INFO(rip)) + // fprintf(stderr, "RBH (%p) with 0 pointers (reverted type=%s)\n", + // p, info_type_by_ip(rip)); + //else + fprintf(stderr, "RBH (%p) with 0 pointers (reverted IP=%x)\n", + p, rip); + } + break; +#endif +#if defined(PAR) + case BLOCKED_FETCH: + fprintf(stderr, "BLOCKED_FETCH (%p) with 0 pointers (link=%p)\n", + p, ((StgBlockedFetch *)p)->link); + break; + case FETCH_ME: + fprintf(stderr, "FETCH_ME (%p) with 0 pointers\n", p); + break; + case FETCH_ME_BQ: + fprintf(stderr, "FETCH_ME_BQ (%p) with 0 pointers (blocking_queue=%p)\n", + p, ((StgFetchMeBlockingQueue *)p)->blocking_queue); + break; +#endif + case EVACUATED: + fprintf(stderr, "EVACUATED (%p) with 0 pointers (evacuee=%p)\n", + p, ((StgEvacuated *)p)->evacuee); + break; + + default: + barf("PrintGraph: unknown closure %d (%s)", + info -> type, info_type(info)); + } + + /* If we didn't manage to promote all the objects pointed to by + * the current object, then we have to designate this object as + * mutable (because it contains old-to-new generation pointers). + */ + //if (failed_to_evac) { + // mkMutCons((StgClosure *)q, &generations[evac_gen]); + // failed_to_evac = rtsFalse; + //} +} + +#endif /* GRAN */ + +#endif /* GRAN || PAR */ +//@node End of File, , Printing Packet Contents, Debugging routines for GranSim and GUM +//@subsection End of File diff --git a/ghc/rts/parallel/ParallelDebug.h b/ghc/rts/parallel/ParallelDebug.h new file mode 100644 index 0000000..62f2232 --- /dev/null +++ b/ghc/rts/parallel/ParallelDebug.h @@ -0,0 +1,49 @@ +/* + Time-stamp: + + Prototypes of all parallel debugging functions. + */ + +#ifndef PARALLEL_DEBUG_H +#define PARALLEL_DEBUG_H + +#if defined(GRAN) // || defined(PAR) +void G_PRINT_NODE(StgClosure* node); +void G_PPN(StgClosure* node); +void G_INFO_TABLE(StgClosure* node); +void G_CURR_THREADQ(StgInt verbose); +void G_THREADQ(StgTSO* closure, StgInt verbose); +void G_TSO(StgTSO* closure, StgInt verbose); +void G_EVENT(rtsEventQ event, StgInt verbose); +void G_EVENTQ(StgInt verbose); +void G_PE_EQ(PEs pe, StgInt verbose); +void G_SPARK(rtsSparkQ spark, StgInt verbose); +void G_SPARKQ(rtsSparkQ spark, StgInt verbose); +void G_CURR_SPARKQ(StgInt verbose); +void G_PROC(StgInt proc, StgInt verbose); +void GP(StgInt proc); +void GCP(void); +void GT(StgPtr tso); +void GCT(void); +void GEQ(void); +void GTQ(PEs p); +void GCTQ(void); +void GSQ(PEs p); +void GCSQ(void); +void GN(StgPtr node); +void GIT(StgPtr node); +#endif + +#if defined(GRAN) || defined(PAR) + +char *display_info_type(StgClosure *closure, char *str); +void info_hdr_type(StgClosure *closure, char *res); +char *info_type(StgClosure *closure); +char *info_type_by_ip(StgInfoTable *ip); + +void PrintPacket(rtsPackBuffer *buffer); +void PrintGraph(StgClosure *p, int indent_level); + +#endif /* GRAN || PAR */ + +#endif /* PARALLEL_DEBUG_H */ diff --git a/ghc/rts/parallel/ParallelRts.h b/ghc/rts/parallel/ParallelRts.h new file mode 100644 index 0000000..e139541 --- /dev/null +++ b/ghc/rts/parallel/ParallelRts.h @@ -0,0 +1,294 @@ +/* -------------------------------------------------------------------------- + Time-stamp: + $Id: ParallelRts.h,v 1.2 2000/01/13 14:34:09 hwloidl Exp $ + + Variables and functions specific to the parallel RTS (i.e. GUM or GranSim) + ----------------------------------------------------------------------- */ + +#ifndef PARALLEL_RTS_H +#define PARALLEL_RTS_H + +#if defined(GRAN) || defined(PAR) + +//@menu +//* Packing routines:: +//* Spark handling routines:: +//* GC routines:: +//* Debugging routines:: +//* Generating .gr profiles:: +//* Common macros:: +//* Index:: +//@end menu + +#ifndef GRAN +// Dummy def for NO_PRI if not in GranSim +#define NO_PRI 0 +#endif + +//@node Packing routines, Spark handling routines +//@subsection Packing routines + +#if defined(GRAN) +/* Statistics info */ +extern nat tot_packets, tot_packet_size, tot_cuts, tot_thunks; +#endif + +#if defined(GRAN) +/* Pack.c */ +rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso, + nat *packBufferSize); +rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso, + nat *packBufferSize); +rtsPackBuffer *PackTSO(StgTSO *tso, nat *packBufferSize); +rtsPackBuffer *PackStkO(StgPtr stko, nat *packBufferSize); +void PackFetchMe(StgClosure *closure); + +/* Unpack.c */ +StgClosure* UnpackGraph(rtsPackBuffer* buffer); +void InitPendingGABuffer(nat size); + +/* RBH.c */ +StgClosure *convertToRBH(StgClosure *closure); +void convertFromRBH(StgClosure *closure); + +/* General closure predicates */ +/* + {Parallel.h}Daq ngoqvam vIroQpu' + +StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty); +rtsBool IS_BLACK_HOLE(StgClosure* node); +StgClosure *IS_INDIRECTION(StgClosure* node); +rtsBool IS_THUNK(StgClosure* closure); +*/ + +#elif defined(PAR) + +/* Pack.c */ +rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso, + nat *packBufferSize); + +rtsPackBuffer *PackTSO(StgTSO *tso, nat *packBufferSize); +rtsPackBuffer *PackStkO(StgPtr stko, nat *packBufferSize); +void PackFetchMe(StgClosure *closure); + +/* Unpack.c */ +void CommonUp(StgClosure *src, StgClosure *dst); +StgClosure *UnpackGraph(rtsPackBuffer *buffer, globalAddr **gamap, + nat *nGAs); + +/* RBH.c */ +StgClosure *convertToRBH(StgClosure *closure); +void convertToFetchMe(StgRBH *rbh, globalAddr *ga); + +/* General closure predicates */ +/* + {Parallel.h}Daq ngoqvam vIroQpu' + +StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty); +rtsBool IS_BLACK_HOLE(StgClosure* node); +StgClosure *IS_INDIRECTION(StgClosure* node); +rtsBool IS_THUNK(StgClosure* closure); +*/ + +#endif + +/* this routine should be moved to a more general module; currently in Pack.c +StgInfoTable* get_closure_info(StgClosure* node, + nat *size, nat *ptrs, nat *nonptrs, nat *vhs, + char *info_hdr_ty); +*/ +void doGlobalGC(void); + +//@node Spark handling routines, GC routines, Packing routines +//@subsection Spark handling routines + +/* now in ../Sparks.c */ + +#if 0 + +#if defined(PAR) + +rtsSpark findLocalSpark(rtsBool forexport); +StgTSO* activateSpark (rtsSpark spark); +void disposeSpark(rtsSpark spark); +rtsBool add_to_spark_queue(StgClosure *closure, rtsBool required); +rtsBool initSparkPools (void); + +nat spark_queue_len(nat pool); +void markSparkQueue(void); +void print_sparkq(void); + +#elif defined(GRAN) + +void findLocalSpark (rtsEvent *event, + rtsBool *found_res, rtsSparkQ *spark_res); +rtsBool activateSpark (rtsEvent *event, rtsSparkQ spark); +rtsSpark *newSpark (StgClosure *node, StgInt name, StgInt gran_info, + StgInt size_info, StgInt par_info, StgInt local); +void disposeSpark(rtsSpark *spark); +void disposeSparkQ(rtsSparkQ spark); +void add_to_spark_queue(rtsSpark *spark); +void print_spark(rtsSpark *spark); +nat spark_queue_len(PEs proc); +rtsSpark *delete_from_sparkq (rtsSpark *spark, PEs p, rtsBool dispose_too); +void markSparkQueue(void); +void print_sparkq(PEs proc); +void print_sparkq_stats(void); + +#endif +#endif /* 0 */ + +//@node GC routines, Debugging routines, Spark handling routines +//@subsection GC routines + +#if defined(PAR) +/* HLComms.c */ +void freeRemoteGA(int pe, globalAddr *ga); +void sendFreeMessages(void); + +/* Global.c */ +void markLocalGAs(rtsBool full); +void RebuildGAtables(rtsBool full); +void RebuildLAGAtable(void); +#endif + +//@node Debugging routines, Generating .gr profiles, GC routines +//@subsection Debugging routines + +#if defined(PAR) +void printGA (globalAddr *ga); +void printGALA (GALA *gala); +void printLAGAtable(void); +#endif + +//@node Generating .gr profiles, Common macros, Debugging routines +//@subsection Generating .gr profiles + +#define STATS_FILENAME_MAXLEN 128 + +/* Where to write the log file */ +//@cindex gr_file +//@cindex gr_filename +extern FILE *gr_file; +extern char gr_filename[STATS_FILENAME_MAXLEN]; + +//@cindex init_gr_simulation +//@cindex end_gr_simulation +void init_gr_simulation(int rts_argc, char *rts_argv[], + int prog_argc, char *prog_argv[]); +void end_gr_simulation(void); + +//@node Common macros, Index, Generating .gr profiles +//@subsection Common macros + +/* + extracting specific info out of a closure; used in packing (GranSim, GUM) +*/ +//@cindex get_closure_info +static inline StgInfoTable* +get_closure_info(node, size, ptrs, nonptrs, vhs, info_hdr_ty) +StgClosure* node; +nat *size, *ptrs, *nonptrs, *vhs; +char *info_hdr_ty; +{ + StgInfoTable *info; + + info = get_itbl(node); + /* the switch shouldn't be necessary, really; just use default case */ + switch (info->type) { + case RBH: + { + StgInfoTable *rip = REVERT_INFOPTR(info); // closure to revert to + *size = sizeW_fromITBL(rip); + *ptrs = (nat) (rip->layout.payload.ptrs); + *nonptrs = (nat) (rip->layout.payload.nptrs); + *vhs = (nat) 0; // unknown +#if 0 /* DEBUG */ + info_hdr_type(node, info_hdr_ty); +#else + strcpy(info_hdr_ty, "UNKNOWN"); +#endif + return rip; // NB: we return the reverted info ptr for a RBH!!!!!! + } + + default: + *size = sizeW_fromITBL(info); + *ptrs = (nat) (info->layout.payload.ptrs); + *nonptrs = (nat) (info->layout.payload.nptrs); + *vhs = (nat) 0; // unknown +#if 0 /* DEBUG */ + info_hdr_type(node, info_hdr_ty); +#else + strcpy(info_hdr_ty, "UNKNOWN"); +#endif + return info; + } +} + +//@cindex IS_BLACK_HOLE +static inline rtsBool +IS_BLACK_HOLE(StgClosure* node) +{ + StgInfoTable *info; + switch (get_itbl(node)->type) { + case BLACKHOLE: + case BLACKHOLE_BQ: + case RBH: + case FETCH_ME: + case FETCH_ME_BQ: + return rtsTrue; + default: + return rtsFalse; + } +//return ((info->type == BLACKHOLE || info->type == RBH) ? rtsTrue : rtsFalse); +} + +//@cindex IS_INDIRECTION +static inline StgClosure * +IS_INDIRECTION(StgClosure* node) +{ + StgInfoTable *info; + info = get_itbl(node); + switch (info->type) { + case IND: + case IND_OLDGEN: + case IND_PERM: + case IND_OLDGEN_PERM: + case IND_STATIC: + /* relies on indirectee being at same place for all these closure types */ + return (((StgInd*)node) -> indirectee); + default: + return NULL; + } +} + +//@cindex unwindInd +static inline StgClosure * +UNWIND_IND (StgClosure *closure) +{ + StgClosure *next; + + while ((next = IS_INDIRECTION((StgClosure *)closure)) != NULL) + closure = next; + + ASSERT(next==(StgClosure *)NULL); + return closure; +} + +#endif /* defined(PAR) || defined(GRAN) */ + +#endif /* PARALLEL_RTS_H */ + +//@node Index, , Common macros +//@subsection Index + +//@index +//* IS_BLACK_HOLE:: @cindex\s-+IS_BLACK_HOLE +//* IS_INDIRECTION:: @cindex\s-+IS_INDIRECTION +//* end_gr_simulation:: @cindex\s-+end_gr_simulation +//* get_closure_info:: @cindex\s-+get_closure_info +//* gr_file:: @cindex\s-+gr_file +//* gr_filename:: @cindex\s-+gr_filename +//* init_gr_simulation:: @cindex\s-+init_gr_simulation +//* unwindInd:: @cindex\s-+unwindInd +//@end index diff --git a/ghc/rts/parallel/RBH.c b/ghc/rts/parallel/RBH.c new file mode 100644 index 0000000..faf2591 --- /dev/null +++ b/ghc/rts/parallel/RBH.c @@ -0,0 +1,338 @@ +/* + Time-stamp: + + Revertible Black Hole Manipulation. + Used in GUM and GranSim during the packing of closures. These black holes + must be revertible because a GC might occur while the packet is being + transmitted. In this case all RBHs have to be reverted. + */ + +#if defined(PAR) || defined(GRAN) /* whole file */ + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "GranSimRts.h" +#include "ParallelRts.h" +# if defined(DEBUG) +# include "ParallelDebug.h" +# endif +#include "Storage.h" // for recordMutable +#include "StgMacros.h" // inlined IS_... fcts + +/* + Turn a closure into a revertible black hole. After the conversion, the + first two words of the closure (after the fixed header, of course) will + be a link to the mutables list (if appropriate for the garbage + collector), and a pointer to the blocking queue. The blocking queue is + terminated by a 2-word SPEC closure which holds the original contents of + the first two words of the closure. +*/ + +//@menu +//* Externs and prototypes:: +//* Conversion Functions:: +//* Index:: +//@end menu +//*/ + +//@node Externs and prototypes, Conversion Functions +//@section Externs and prototypes + +EXTFUN(RBH_Save_0_info); +EXTFUN(RBH_Save_1_info); +EXTFUN(RBH_Save_2_info); + +//@node Conversion Functions, Index, Externs and prototypes +//@section Conversion Functions + +/* + A closure is turned into an RBH upon packing it (see PackClosure in Pack.c). + This is needed in case we have to do a GC before the packet is turned + into a graph on the PE receiving the packet. +*/ +//@cindex convertToRBH +StgClosure * +convertToRBH(closure) +StgClosure *closure; +{ + StgRBHSave *rbh_save; + StgInfoTable *info_ptr, *rbh_info_ptr, *old_info; + nat size, ptrs, nonptrs, vhs; + char str[80]; + + /* + Closure layout before this routine runs amuck: + +------------------- + | HEADER | DATA ... + +------------------- + | FIXED_HS | + */ + /* + Turn closure into an RBH. This is done by modifying the info_ptr, + grabbing the info_ptr of the RBH for this closure out of its + ITBL. Additionally, we have to save the words from the closure, which + will hold the link to the blocking queue. For this purpose we use the + RBH_Save_N closures, with N being the number of pointers for this + closure. */ + IF_GRAN_DEBUG(pack, + belch(":* Converting closure %p (%s) into an RBH", + closure, info_type(closure))); + IF_PAR_DEBUG(pack, + belch(":* Converting closure %p (%s) into an RBH", + closure, info_type(closure))); + + ASSERT(closure_THUNK(closure)); + + IF_GRAN_DEBUG(pack, + old_info = get_itbl(closure)); + + /* Allocate a new closure for the holding data ripped out of closure */ + if ((rbh_save = (StgRBHSave *)allocate(FIXED_HS + 2)) == NULL) + return NULL; /* have to Garbage Collect; check that in the caller! */ + + info_ptr = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str); + ASSERT(size >= MIN_UPD_SIZE); + + /* Fill in the RBH_Save closure with the original data from closure */ + rbh_save->payload[0] = (StgPtr) ((StgRBH *)closure)->blocking_queue; + rbh_save->payload[1] = (StgPtr) ((StgRBH *)closure)->mut_link; + + /* Set the info_ptr for the rbh_Save closure according to the number of + pointers in the original */ + + rbh_info_ptr = (StgInfoTable *) (ptrs == 0 ? &RBH_Save_0_info : + ptrs == 1 ? &RBH_Save_1_info : + &RBH_Save_2_info); + SET_INFO(rbh_save, rbh_info_ptr); + /* same bitmask as the original closure */ + SET_GRAN_HDR(rbh_save, PROCS(closure)); + + /* Init the blocking queue of the RBH and have it point to the saved data */ + ((StgRBH *)closure)->blocking_queue = (StgBlockingQueueElement *)rbh_save; + + ASSERT(LOOKS_LIKE_GHC_INFO(RBH_INFOPTR(get_itbl(closure)))); + /* Turn the closure into a RBH; a great system, indeed! */ + SET_INFO(closure, RBH_INFOPTR(get_itbl(closure))); + + /* + add closure to the mutable list! + do this after having turned the closure into an RBH, because an + RBH is mutable but the think it was previously isn't + */ + //recordMutable((StgMutClosure *)closure); + + //IF_GRAN_DEBUG(pack, + /* sanity check; make sure that reverting the RBH yields the + orig closure, again */ + //ASSERT(REVERT_INFOPTR(get_itbl(closure))==old_info)); + + /* + Closure layout after this routine has run amuck: + +--------------------- + | RBH-HEADER | | | ... + +--------------|---|-- + | FIXED_HS | | v + | Mutable-list ie another StgMutClosure + v + +--------- + | RBH_SAVE with 0-2 words of DATA + +--------- + */ + + return closure; +} + +/* + An RBH closure is turned into a FETCH_ME when reveiving an ACK message + indicating that the transferred closure has been unpacked on the other PE + (see processAck in HLComms.c). The ACK also contains the new GA of the + closure to which the FETCH_ME closure has to point. + + Converting a closure to a FetchMe is trivial, unless the closure has + acquired a blocking queue. If that has happened, we first have to awaken + the blocking queue. What a nuisance! Fortunately, @AwakenBlockingQueue@ + should now know what to do. + + A note on GrAnSim: In GrAnSim we don't have FetchMe closures. However, + we have to turn a RBH back to its original form when the simulated + transfer of the closure has been finished. Therefore we need the + @convertFromRBH@ routine below. After converting the RBH back to its + original form and awakening all TSOs, the first TSO will reenter the + closure which is now local and carry on merrily reducing it (the other + TSO will be less merrily blocked on the now local closure; we're costing + the difference between local and global blocks in the BQ code). -- HWL +*/ + +# if defined(PAR) + +EXTFUN(FETCH_ME_info); + +//@cindex convertToFetchMe +void +convertToFetchMe(rbh, ga) +StgRBH *rbh; +globalAddr *ga; +{ + // StgInfoTable *ip = get_itbl(rbh); + StgBlockingQueueElement *bqe = rbh->blocking_queue; + + ASSERT(get_itbl(rbh)->type==RBH); + + IF_PAR_DEBUG(pack, + belch(":* Converting RBH %p (%s) into a FETCH_ME for GA ((%x, %d, %x))", + rbh, info_type(rbh), + ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight)); + + /* put closure on mutables list, while it is still a RBH */ + //recordMutable((StgMutClosure *)rbh); + + /* actually turn it into a FETCH_ME */ + SET_INFO((StgClosure *)rbh, &FETCH_ME_info); + + /* set the global pointer in the FETCH_ME closure to the given value */ + ((StgFetchMe *)rbh)->ga = ga; + + IF_PAR_DEBUG(pack, + if (get_itbl(bqe)->type==TSO || get_itbl(bqe)->type==BLOCKED_FETCH) + belch(":* Awakening non-empty BQ of RBH closure %p (first TSO is %d (%p)", + rbh, ((StgTSO *)bqe)->id, ((StgTSO *)bqe))); + + /* awaken all TSOs and BLOCKED_FETCHES on the blocking queue */ + if (get_itbl(bqe)->type==TSO || get_itbl(bqe)->type==BLOCKED_FETCH) + awaken_blocked_queue(bqe, (StgClosure *)rbh); +} +# else /* GRAN */ +/* Prototype */ +// void UnlinkFromMUT(StgPtr closure); + +/* + This routine in fact reverts the RBH into its original form; this code + should be of interest for GUM, too, but is not needed in the current version. + convertFromRBH is called where GUM uses convertToFetchMe. +*/ +void +convertFromRBH(closure) +StgClosure *closure; +{ + StgBlockingQueueElement *bqe = ((StgRBH*)closure)->blocking_queue; + char str[NODE_STR_LEN]; // debugging only + StgInfoTable *rip = REVERT_INFOPTR(get_itbl(closure)); // debugging only + + IF_GRAN_DEBUG(pack, + if (get_itbl(bqe)->type==TSO) + sprintf(str, "%d (%p)", + ((StgTSO *)bqe)->id, ((StgTSO *)bqe)); + else + strcpy(str, "empty"); + belch(":* Reverting RBH %p (%s) into a ??? closure again; BQ start: %s", + closure, info_type(closure), str)); + + ASSERT(get_itbl(closure)->type==RBH); + + /* awaken_blocked_queue also restores the RBH_Save closure + (have to call it even if there are no TSOs in the queue!) */ + awaken_blocked_queue(bqe, closure); + + /* Put back old info pointer (grabbed from the RBH's info table). + We do that *after* awakening the BQ to be sure node is an RBH when + calling awaken_blocked_queue (different in GUM!) + */ + SET_INFO(closure, REVERT_INFOPTR(get_itbl(closure))); + + /* put closure on mutables list */ + //recordMutable((StgMutClosure *)closure); + +# if 0 /* rest of this fct */ + /* ngoq ngo' */ + /* FETCHME_GA(closure) = ga; */ + if (IS_MUTABLE(INFO_PTR(bqe))) { + PROC old_proc = CurrentProc, /* NB: For AwakenBlockingQueue, */ + new_proc = where_is(closure); /* CurentProc must be where */ + /* closure lives. */ + CurrentProc = new_proc; + +# if defined(GRAN_CHECK) + if (RTSflags.GranFlags.debug & 0x100) + fprintf(stderr,"===== AwBQ of node 0x%lx (%s) [PE %2u]\n", + closure, (isSpec ? "SPEC_RBH" : "GEN_RBH"), new_proc); +# endif + + rbh_save = AwakenBlockingQueue(bqe); /* AwakenBlockingQueue(bqe); */ + CurrentProc = old_proc; + } else { + rbh_save = bqe; + } + + /* Put data from special RBH save closures back into the closure */ + if ( rbh_save == NULL ) { + fprintf(stderr,"convertFromRBH: No RBH_Save_? closure found at end of BQ!\n"); + EXIT(EXIT_FAILURE); + } else { + closure[isSpec ? SPEC_HS : GEN_HS] = rbh_save[SPEC_HS]; + closure[(isSpec ? SPEC_HS : GEN_HS) + 1] = rbh_save[SPEC_HS + 1]; + } +# endif /* 0 */ + +# if 0 && (defined(GCap) || defined(GCgn)) + /* ngoq ngo' */ + /* If we convert from an RBH in the old generation, + we have to make sure it goes on the mutables list */ + + if(closure <= StorageMgrInfo.OldLim) { + if (IS_MUTABLE(INFO_PTR(closure)) && MUT_LINK(closure) == MUT_NOT_LINKED) { + MUT_LINK(closure) = (StgWord) StorageMgrInfo.OldMutables; + StorageMgrInfo.OldMutables = closure; + } + } +# endif /* 0 */ +} +#endif /* PAR */ + +/* Remove closure from the mutables list */ +#if 0 +/* ngoq ngo' */ +void +UnlinkFromMUT(StgPtr closure) +{ + StgPtr curr = StorageMgrInfo.OldMutables, prev = NULL; + + while (curr != NULL && curr != closure) { + ASSERT(MUT_LINK(curr)!=MUT_NOT_LINKED); + prev=curr; + curr=MUT_LINK(curr); + } + if (curr==closure) { + if (prev==NULL) + StorageMgrInfo.OldMutables = MUT_LINK(curr); + else + MUT_LINK(prev) = MUT_LINK(curr); + MUT_LINK(curr) = MUT_NOT_LINKED; + } + +# if 0 && (defined(GCap) || defined(GCgn)) + { + closq newclos; + extern closq ex_RBH_q; + + newclos = (closq) stgMallocBytes(sizeof(struct clos), "UnlinkFromMUT"); + CLOS_CLOSURE(newclos) = closure; + CLOS_PREV(newclos) = NULL; + CLOS_NEXT(newclos) = ex_RBH_q; + if (ex_RBH_q!=NULL) + CLOS_PREV(ex_RBH_q) = newclos; + ex_RBH_q = newclos; + } +# endif +} +#endif /* PAR */ + +#endif /* PAR || GRAN -- whole file */ + +//@node Index, , Conversion Functions +//@section Index + +//@index +//* convertToFetchMe:: @cindex\s-+convertToFetchMe +//* convertToRBH:: @cindex\s-+convertToRBH +//@end index diff --git a/ghc/rts/parallel/SysMan.c b/ghc/rts/parallel/SysMan.c new file mode 100644 index 0000000..eaafc03 --- /dev/null +++ b/ghc/rts/parallel/SysMan.c @@ -0,0 +1,417 @@ +/* ---------------------------------------------------------------------------- + Time-stamp: + $Id: SysMan.c,v 1.2 2000/01/13 14:34:09 hwloidl Exp $ + + GUM System Manager Program + Handles startup, shutdown and global synchronisation of the parallel system. + + The Parade/AQUA Projects, Glasgow University, 1994-1995. + GdH/APART Projects, Heriot-Watt University, Edinburgh, 1997-1999. + P. Trinder, November 30th. 1994. + Adapted for new RTS + P. Trinder, July 1997. + H-W. Loidl, November 1999. + + ------------------------------------------------------------------------- */ + +//@node GUM System Manager Program, , , +//@section GUM System Manager Program + +//@menu +//* General docu:: +//* Includes:: +//* Macros etc:: +//* Variables:: +//* Main fct:: +//* Auxiliary fcts:: +//* Index:: +//@end menu + +//@node General docu, Includes, GUM System Manager Program, GUM System Manager Program +//@subsection General docu + +/* + +The Sysman task currently controls initiation, termination, of a +parallel Haskell program running under GUM. In the future it may +control global GC synchronisation and statistics gathering. Based on +K. Hammond's SysMan.lc in Graph for PVM. SysMan is unusual in that it +is not part of the executable produced by ghc: it is a free-standing +program that spawns PVM tasks (logical PEs) to evaluate the +program. After initialisation it runs in parallel with the PE tasks, +awaiting messages. + +OK children, buckle down for some serious weirdness, it works like this ... + + +o The argument vector (argv) for SysMan has one the following 2 shapes: + +------------------------------------------------------------------------------- +| SysMan path | debug flag | pvm-executable path | Num. PEs | Program Args ...| +------------------------------------------------------------------------------- + +------------------------------------------------------------------- +| SysMan path | pvm-executable path | Num. PEs | Program Args ... | +------------------------------------------------------------------- + +The "pvm-executable path" is an absolute path of where PVM stashes the +code for each PE. The arguments passed on to each PE-executable +spawned by PVM are: + +------------------------------- +| Num. PEs | Program Args ... | +------------------------------- + +The arguments passed to the Main-thread PE-executable are + +------------------------------------------------------------------- +| main flag | pvm-executable path | Num. PEs | Program Args ... | +------------------------------------------------------------------- + +o SysMan's algorithm is as follows. + +o use PVM to spawn (nPE-1) PVM tasks +o fork SysMan to create the main-thread PE. This permits the main-thread to +read and write to stdin and stdout. +o Barrier-synchronise waiting for all of the PE-tasks to start. +o Broadcast the SysMan task-id, so that the main thread knows it. +o Wait for the Main-thread PE to send it's task-id. +o Broadcast an array of the PE task-ids to all of the PE-tasks. +o Enter a loop awaiting incoming messages, e.g. failure, Garbage-collection, +termination. + +The forked Main-thread algorithm, in SysMan, is as follows. + +o disconnects from PVM. +o sets a flag in argv to indicate that it is the main thread. +o `exec's a copy of the pvm-executable (i.e. the program being run) + + +The pvm-executable run by each PE-task, is initialised as follows. + +o Registers with PVM, obtaining a task-id. +o Joins the barrier synchronisation awaiting the other PEs. +o Receives and records the task-id of SysMan, for future use. +o If the PE is the main thread it sends its task-id to SysMan. +o Receives and records the array of task-ids of the other PEs. +o Begins execution. + +*/ + +//@node Includes, Macros etc, General docu, GUM System Manager Program +//@subsection Includes + +#include "Rts.h" +#include "ParTypes.h" +#include "LLC.h" +#include "Parallel.h" + +//@node Macros etc, Variables, Includes, GUM System Manager Program +//@subsection Macros etc + +#define NON_POSIX_SOURCE /* so says Solaris */ + +#define checkerr(c) do { \ + if ((c)<0) { \ + pvm_perror("Sysman"); \ + fprintf(stderr,"Sysman"); \ + stg_exit(EXIT_FAILURE); \ + } \ + } while(0) + +/* SysMan is put on top of the GHC routine that does the RtsFlags handling. + So, we cannot use the standard macros. For the time being we use a macro + that is fixed at compile time. +*/ +/* debugging enabled */ +#define IF_PAR_DEBUG(c,s) { s; } +/* debugging disabled */ +// #define IF_PAR_DEBUG(c,s) /* nothing */ + +//@node Variables, Main fct, Macros etc, GUM System Manager Program +//@subsection Variables + +/* + The following definitions included so that SysMan can be linked with Low + Level Communications module (LLComms). They are not used in SysMan. */ + +GlobalTaskId mytid, SysManTask; +rtsBool IAmMainThread; +rtsBool GlobalStopPending = rtsFalse; + /* Handle unexpected messages correctly */ + +static GlobalTaskId gtids[MAX_PES]; +static GlobalTaskId sysman_id, sender_id, mainThread_id; +static unsigned PEsTerminated = 0; +static rtsBool Finishing = rtsFalse; +static long PEbuffer[MAX_PES]; +nat nPEs = 0; + +//@node Main fct, Auxiliary fcts, Variables, GUM System Manager Program +//@subsection Main fct + +//@cindex main +main (int argc, char **argv) { + int rbufid; + int opcode, nbytes; + char **pargv; + int i, cc, spawn_flag = PvmTaskDefault; + char *petask, *pvmExecutable; + rtsPacket addr; + + setbuf(stdout, NULL); // disable buffering of stdout + setbuf(stderr, NULL); // disable buffering of stderr + + if (argc > 1) { + if (*argv[1] == '-') { + spawn_flag = PvmTaskDebug; + argv[1] = argv[0]; + argv++; argc--; + } + sysman_id = pvm_mytid(); /* This must be the first PVM call */ + + checkerr(sysman_id); + + /* + Get the full path and filename of the pvm executable (stashed in some + PVM directory), and the number of PEs from the command line. + */ + pvmExecutable = argv[1]; + nPEs = atoi(argv[2]); + + if ((petask = getenv(PETASK)) == NULL) // PETASK set by driver + petask = PETASK; + + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] nPEs (%s) = %d\n", + sysman_id, petask, nPEs)); + + /* Check that we can create the number of PE and IMU tasks requested */ + if (nPEs > MAX_PES) { + fprintf(stderr,"SysMan: No more than %d PEs allowed (%d requested)\n", + MAX_PES, nPEs); + stg_exit(EXIT_FAILURE); + } + /* + Now create the PE Tasks. We spawn (nPEs-1) pvm threads: the Main Thread + (which starts execution and performs IO) is created by forking SysMan + */ + nPEs--; + if (nPEs > 0) { + /* Initialise the PE task arguments from Sysman's arguments */ + pargv = argv + 2; + + IF_PAR_DEBUG(verbose, + fprintf(stderr, "== [%x] Spawning %d PEs(%s) ...\n", + sysman_id, nPEs, petask); + fprintf(stderr, " args: "); + for (i = 0; pargv[i]; ++i) + fprintf(stderr, "%s, ", pargv[i]); + fprintf(stderr, "\n")); + + checkerr(pvm_spawn(petask, pargv, spawn_flag, "", nPEs, gtids)); + /* + * Stash the task-ids of the PEs away in a buffer, once we know + * the Main Thread's task-id, we'll broadcast them all. + */ + for (i = 0; i < nPEs; i++) + PEbuffer[i+1] = (long) gtids[i]; + + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] Spawned\n", sysman_id)); + } + + /* + Create the MainThread PE by forking SysMan. This arcane coding + is required to allow MainThread to read stdin and write to stdout. + PWT 18/1/96 + */ + nPEs++; /* Record that the number of PEs is increasing */ + if ((cc = fork())) { + checkerr(cc); /* Parent continues as SysMan */ + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] SysMan Task is [t%x]\n", sysman_id)); + + /* + SysMan joins PECTLGROUP, so that it can wait (at the + barrier sysnchronisation a few instructions later) for the + other PE-tasks to start. + + The manager group (MGRGROUP) is vestigial at the moment. It + may eventually include a statistics manager, and a (global) + garbage collector manager. + */ + checkerr(pvm_joingroup(PECTLGROUP)); + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] Joined PECTLGROUP \n", sysman_id)); + + /* Wait for all the PEs to arrive */ + checkerr(pvm_barrier(PECTLGROUP, nPEs + 1)); + + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] PECTLGROUP barrier passed \n", + sysman_id)); + + /* Broadcast SysMan's ID, so Main Thread PE knows it */ + pvm_initsend(PvmDataDefault); + pvm_bcast(PEGROUP, PP_SYSMAN_TID); + + /* Wait for Main Thread to identify itself*/ + addr = waitForPEOp(PP_MAIN_TASK, ANY_GLOBAL_TASK); + pvm_bufinfo(addr, &nbytes, &opcode, &mainThread_id); + PEbuffer[0] = mainThread_id; + + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] SysMan received Main Task = %x\n", + sysman_id, mainThread_id)); + + /* Now that we have them all, broadcast Global Task Ids of all PEs */ + pvm_initsend(PvmDataDefault); + PutArgs(PEbuffer, nPEs); + pvm_bcast(PEGROUP, PP_PETIDS); + + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] Sysman successfully initialized!\n", + sysman_id)); + +//@cindex message handling loop + /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + /* Main message handling loop */ + /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + /* Process incoming messages */ + while (1) { + if ((rbufid = pvm_recv(ANY_TASK, ANY_OPCODE)) < 0) + pvm_perror("Sysman: Receiving Message"); + else { + pvm_bufinfo(rbufid, &nbytes, &opcode, &sender_id); + + /* + IF_PAR_DEBUG(trace, + fprintf(stderr,"== [%x] SysMan: Message received by SysMan: rbufid=%x, nbytes = %d, opcode = %x, sender_id = %x\n", + sysman_id, rbufid, nbytes, opcode, sender_id)); + */ + switch (opcode) { + case PP_GC_INIT: + /* This Function not yet implemented for GUM */ + fprintf(stderr,"Global GC from %x Not yet implemented for GUM!\n", + sender_id); + sync(PECTLGROUP, PP_FULL_SYSTEM); + broadcast(PEGROUP, PP_GC_INIT); + /* DoGlobalGC(); */ + /* broadcast(PEGROUP, PP_INIT); */ + break; + + case PP_STATS_ON: + fprintf(stderr,"PP_STATS_ON (from %x) not yet implemented for GUM!\n", + sender_id); + break; + + case PP_STATS_OFF: + fprintf(stderr,"PP_STATS_OFF (from %x) not yet implemented for GUM!\n", + sender_id); + break; + + case PP_FINISH: + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] Finish from %x\n", + sysman_id, sender_id)); + if (!Finishing) { + Finishing = rtsTrue; + PEsTerminated = 1; + pvm_initsend(PvmDataDefault); + pvm_bcast(PEGROUP, PP_FINISH); + } else { + ++PEsTerminated; + } + if (PEsTerminated >= nPEs) { + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] Global Shutdown, Goodbye!! (SysMan has received FINISHes from all PEs)\n", + sysman_id)); + broadcast(PEGROUP, PP_FINISH); + broadcast(MGRGROUP, PP_FINISH); + pvm_lvgroup(PECTLGROUP); + pvm_lvgroup(MGRGROUP); + pvm_exit(); + exit(EXIT_SUCCESS); + /* Qapla'! */ + } + break; + + case PP_FAIL: + IF_PAR_DEBUG(verbose, + fprintf(stderr,"== [%x] Fail from %x\n", + sysman_id, sender_id)); + if (!Finishing) { + Finishing = rtsTrue; + broadcast(PEGROUP, PP_FAIL); + } + break; + + default: + { + /* + char *opname = GetOpName(opcode); + fprintf(stderr,"Sysman: Unrecognised opcode %s (%x)\n", + opname,opcode); */ + fprintf(stderr,"Qagh: Sysman: Unrecognised opcode (%x)\n", + opcode); + } + break; + } /* switch */ + } /* else */ + } /* while 1 */ + } /* forked Sysman Process */ + else { + fprintf(stderr, "Main Thread PE has been forked; doing an execv(%s,...)\n", + pvmExecutable); + pvmendtask(); /* Disconnect from PVM to avoid confusion: */ + /* executable reconnects */ + *argv[0] = '-'; /* Flag that this is the Main Thread PE */ + execv(pvmExecutable,argv); /* Parent task becomes Main Thread PE */ + } + } /* argc > 1 */ +} /* main */ + +//@node Auxiliary fcts, Index, Main fct, GUM System Manager Program +//@subsection Auxiliary fcts + +/* + * This reproduced from RtsUtlis to save linking with a whole ball of wax + */ +/* result-checking malloc wrappers. */ + +//@cindex stgMallocBytes + +void * +stgMallocBytes (int n, char *msg) +{ + char *space; + + if ((space = (char *) malloc((size_t) n)) == NULL) { + fflush(stdout); + fprintf(stderr, msg); + // MallocFailHook((W_) n, msg); /*msg*/ + stg_exit(EXIT_FAILURE); + } + return space; +} + +/* Needed here because its used in loads of places like LLComms etc */ + +//@cindex stg_exit + +void stg_exit(n) +I_ n; +{ + exit(n); +} + +//@node Index, , Auxiliary fcts, GUM System Manager Program +//@subsection Index + +//@index +//* main:: @cindex\s-+main +//* message handling loop:: @cindex\s-+message handling loop +//* stgMallocBytes:: @cindex\s-+stgMallocBytes +//* stg_exit:: @cindex\s-+stg_exit +//@end index