From 91b07216be1cb09230b7d1b417899ddea8620ff3 Mon Sep 17 00:00:00 2001 From: simonmar Date: Tue, 17 Jan 2006 16:13:18 +0000 Subject: [PATCH] [project @ 2006-01-17 16:13:18 by simonmar] Improve the GC behaviour of IORefs (see Ticket #650). This is a small change to the way IORefs interact with the GC, which should improve GC performance for programs with plenty of IORefs. Previously we had a single closure type for mutable variables, MUT_VAR. Mutable variables were *always* on the mutable list in older generations, and always traversed on every GC. Now, we have two closure types: MUT_VAR_CLEAN and MUT_VAR_DIRTY. The latter is on the mutable list, but the former is not. (NB. this differs from MUT_ARR_PTRS_CLEAN and MUT_ARR_PTRS_DIRTY, both of which are on the mutable list). writeMutVar# now implements a write barrier, by calling dirty_MUT_VAR() in the runtime, that does the necessary modification of MUT_VAR_CLEAN into MUT_VAR_DIRY, and adding to the mutable list if necessary. This results in some pretty dramatic speedups for GHC itself. I've just measureed a 30% overall speedup compiling a 31-module program (anna) with the default heap settings :-D --- ghc/compiler/cmm/CLabel.hs | 2 ++ ghc/compiler/codeGen/CgPrimOp.hs | 13 +++++++-- ghc/includes/ClosureTypes.h | 37 ++++++++++++------------ ghc/includes/RtsExternal.h | 1 + ghc/includes/StgMiscClosures.h | 6 ++-- ghc/includes/Storage.h | 9 ++++++ ghc/rts/ClosureFlags.c | 5 ++-- ghc/rts/GC.c | 59 +++++++++++++++++++++++++++++--------- ghc/rts/GCCompact.c | 3 +- ghc/rts/LdvProfile.c | 3 +- ghc/rts/PrimOps.cmm | 5 ++-- ghc/rts/Printer.c | 17 ++++++++--- ghc/rts/ProfHeap.c | 6 ++-- ghc/rts/RetainerProfile.c | 12 +++++--- ghc/rts/Sanity.c | 3 +- ghc/rts/StgMiscClosures.cmm | 6 ++-- ghc/rts/Storage.c | 16 +++++++++++ 17 files changed, 148 insertions(+), 55 deletions(-) diff --git a/ghc/compiler/cmm/CLabel.hs b/ghc/compiler/cmm/CLabel.hs index ca818cb..6216d38 100644 --- a/ghc/compiler/cmm/CLabel.hs +++ b/ghc/compiler/cmm/CLabel.hs @@ -46,6 +46,7 @@ module CLabel ( mkPlainModuleInitLabel, mkSplitMarkerLabel, + mkDirty_MUT_VAR_Label, mkUpdInfoLabel, mkSeqInfoLabel, mkIndStaticInfoLabel, @@ -343,6 +344,7 @@ mkPlainModuleInitLabel hmods mod -- Some fixed runtime system labels mkSplitMarkerLabel = RtsLabel (RtsCode SLIT("__stg_split_marker")) +mkDirty_MUT_VAR_Label = RtsLabel (RtsCode SLIT("dirty_MUT_VAR")) mkUpdInfoLabel = RtsLabel (RtsInfo SLIT("stg_upd_frame")) mkSeqInfoLabel = RtsLabel (RtsInfo SLIT("stg_seq_frame")) mkIndStaticInfoLabel = RtsLabel (RtsInfo SLIT("stg_IND_STATIC")) diff --git a/ghc/compiler/codeGen/CgPrimOp.hs b/ghc/compiler/codeGen/CgPrimOp.hs index 7784efb..91aa391 100644 --- a/ghc/compiler/codeGen/CgPrimOp.hs +++ b/ghc/compiler/codeGen/CgPrimOp.hs @@ -10,13 +10,15 @@ module CgPrimOp ( cgPrimOp ) where +import ForeignCall ( CCallConv(CCallConv) ) import StgSyn ( StgLiveVars, StgArg ) import CgBindery ( getVolatileRegs, getArgAmodes ) import CgMonad import CgInfoTbls ( getConstrTag ) import CgUtils ( cmmOffsetW, cmmOffsetB, cmmLoadIndexW ) import Cmm -import CLabel ( mkMAP_FROZEN_infoLabel, mkMAP_DIRTY_infoLabel ) +import CLabel ( mkMAP_FROZEN_infoLabel, mkMAP_DIRTY_infoLabel, + mkDirty_MUT_VAR_Label ) import CmmUtils import MachOp import SMRep @@ -113,7 +115,14 @@ emitPrimOp [res] ReadMutVarOp [mutv] live = stmtC (CmmAssign res (cmmLoadIndexW mutv fixedHdrSize)) emitPrimOp [] WriteMutVarOp [mutv,var] live - = stmtC (CmmStore (cmmOffsetW mutv fixedHdrSize) var) + = do + stmtC (CmmStore (cmmOffsetW mutv fixedHdrSize) var) + vols <- getVolatileRegs live + stmtC (CmmCall (CmmForeignCall (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) + CCallConv) + [{-no results-}] + [(mutv,PtrHint)] + (Just vols)) -- #define sizzeofByteArrayzh(r,a) \ -- r = (((StgArrWords *)(a))->words * sizeof(W_)) diff --git a/ghc/includes/ClosureTypes.h b/ghc/includes/ClosureTypes.h index 18e9448..f884026 100644 --- a/ghc/includes/ClosureTypes.h +++ b/ghc/includes/ClosureTypes.h @@ -76,23 +76,24 @@ #define MUT_ARR_PTRS_DIRTY 52 #define MUT_ARR_PTRS_FROZEN0 53 #define MUT_ARR_PTRS_FROZEN 54 -#define MUT_VAR 55 -#define WEAK 56 -#define STABLE_NAME 57 -#define TSO 58 -#define BLOCKED_FETCH 59 -#define FETCH_ME 60 -#define FETCH_ME_BQ 61 -#define RBH 62 -#define EVACUATED 63 -#define REMOTE_REF 64 -#define TVAR_WAIT_QUEUE 65 -#define TVAR 66 -#define TREC_CHUNK 67 -#define TREC_HEADER 68 -#define ATOMICALLY_FRAME 69 -#define CATCH_RETRY_FRAME 70 -#define CATCH_STM_FRAME 71 -#define N_CLOSURE_TYPES 72 +#define MUT_VAR_CLEAN 55 +#define MUT_VAR_DIRTY 56 +#define WEAK 57 +#define STABLE_NAME 58 +#define TSO 59 +#define BLOCKED_FETCH 60 +#define FETCH_ME 61 +#define FETCH_ME_BQ 62 +#define RBH 63 +#define EVACUATED 64 +#define REMOTE_REF 65 +#define TVAR_WAIT_QUEUE 66 +#define TVAR 67 +#define TREC_CHUNK 68 +#define TREC_HEADER 69 +#define ATOMICALLY_FRAME 70 +#define CATCH_RETRY_FRAME 71 +#define CATCH_STM_FRAME 72 +#define N_CLOSURE_TYPES 73 #endif /* CLOSURETYPES_H */ diff --git a/ghc/includes/RtsExternal.h b/ghc/includes/RtsExternal.h index 020c6a2..c5f5043 100644 --- a/ghc/includes/RtsExternal.h +++ b/ghc/includes/RtsExternal.h @@ -91,5 +91,6 @@ extern void performMajorGC(void); extern void performGCWithRoots(void (*get_roots)(evac_fn)); extern HsInt64 getAllocations( void ); extern void revertCAFs( void ); +extern void dirty_MUT_VAR(StgClosure *); #endif /* RTSEXTERNAL_H */ diff --git a/ghc/includes/StgMiscClosures.h b/ghc/includes/StgMiscClosures.h index f55c059..432767d 100644 --- a/ghc/includes/StgMiscClosures.h +++ b/ghc/includes/StgMiscClosures.h @@ -126,7 +126,8 @@ RTS_INFO(stg_MUT_ARR_PTRS_CLEAN_info); RTS_INFO(stg_MUT_ARR_PTRS_DIRTY_info); RTS_INFO(stg_MUT_ARR_PTRS_FROZEN_info); RTS_INFO(stg_MUT_ARR_PTRS_FROZEN0_info); -RTS_INFO(stg_MUT_VAR_info); +RTS_INFO(stg_MUT_VAR_CLEAN_info); +RTS_INFO(stg_MUT_VAR_DIRTY_info); RTS_INFO(stg_END_TSO_QUEUE_info); RTS_INFO(stg_MUT_CONS_info); RTS_INFO(stg_catch_info); @@ -186,7 +187,8 @@ RTS_ENTRY(stg_MUT_ARR_PTRS_CLEAN_entry); RTS_ENTRY(stg_MUT_ARR_PTRS_DIRTY_entry); RTS_ENTRY(stg_MUT_ARR_PTRS_FROZEN_entry); RTS_ENTRY(stg_MUT_ARR_PTRS_FROZEN0_entry); -RTS_ENTRY(stg_MUT_VAR_entry); +RTS_ENTRY(stg_MUT_VAR_CLEAN_entry); +RTS_ENTRY(stg_MUT_VAR_DIRTY_entry); RTS_ENTRY(stg_END_TSO_QUEUE_entry); RTS_ENTRY(stg_MUT_CONS_entry); RTS_ENTRY(stg_catch_entry); diff --git a/ghc/includes/Storage.h b/ghc/includes/Storage.h index e37c50d..035088e 100644 --- a/ghc/includes/Storage.h +++ b/ghc/includes/Storage.h @@ -263,6 +263,15 @@ recordMutableLock(StgClosure *p) extern rtsBool keepCAFs; /* ----------------------------------------------------------------------------- + This is the write barrier for MUT_VARs, a.k.a. IORefs. A + MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY + is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY + and is put on the mutable list. + -------------------------------------------------------------------------- */ + +void dirty_MUT_VAR(StgClosure *p); + +/* ----------------------------------------------------------------------------- DEBUGGING predicates for pointers LOOKS_LIKE_INFO_PTR(p) returns False if p is definitely not an info ptr diff --git a/ghc/rts/ClosureFlags.c b/ghc/rts/ClosureFlags.c index a3f2d5f..5545693 100644 --- a/ghc/rts/ClosureFlags.c +++ b/ghc/rts/ClosureFlags.c @@ -81,7 +81,8 @@ StgWord16 closure_flags[] = { /* MUT_ARR_PTRS_DIRTY = */ (_HNF| _NS| _MUT|_UPT ), /* MUT_ARR_PTRS_FROZEN0 = */ (_HNF| _NS| _MUT|_UPT ), /* MUT_ARR_PTRS_FROZEN = */ (_HNF| _NS| _UPT ), -/* MUT_VAR = */ (_HNF| _NS| _MUT|_UPT ), +/* MUT_VAR_CLEAN = */ (_HNF| _NS| _MUT|_UPT ), +/* MUT_VAR_DIRTY = */ (_HNF| _NS| _MUT|_UPT ), /* WEAK = */ (_HNF| _NS| _UPT ), /* STABLE_NAME = */ (_HNF| _NS| _UPT ), /* TSO = */ (_HNF| _NS| _MUT|_UPT ), @@ -100,7 +101,7 @@ StgWord16 closure_flags[] = { /* CATCH_STM_FRAME = */ ( _BTM ) }; -#if N_CLOSURE_TYPES != 72 +#if N_CLOSURE_TYPES != 73 #error Closure types changed: update ClosureFlags.c! #endif diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c index 566ccef..bf5d612 100644 --- a/ghc/rts/GC.c +++ b/ghc/rts/GC.c @@ -1941,7 +1941,8 @@ loop: switch (info->type) { - case MUT_VAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: case MVAR: return copy(q,sizeW_fromITBL(info),stp); @@ -2894,13 +2895,22 @@ scavenge(step *stp) p += sizeofW(StgInd); break; - case MUT_VAR: - evac_gen = 0; + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: { + rtsBool saved_eager_promotion = eager_promotion; + + eager_promotion = rtsFalse; ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var); - evac_gen = saved_evac_gen; - failed_to_evac = rtsTrue; // mutable anyhow + eager_promotion = saved_eager_promotion; + + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; + } p += sizeofW(StgMutVar); break; + } case CAF_BLACKHOLE: case SE_CAF_BLACKHOLE: @@ -3277,12 +3287,21 @@ linear_scan: evacuate(((StgInd *)p)->indirectee); break; - case MUT_VAR: - evac_gen = 0; + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: { + rtsBool saved_eager_promotion = eager_promotion; + + eager_promotion = rtsFalse; ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var); - evac_gen = saved_evac_gen; - failed_to_evac = rtsTrue; + eager_promotion = saved_eager_promotion; + + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; + } break; + } case CAF_BLACKHOLE: case SE_CAF_BLACKHOLE: @@ -3607,12 +3626,22 @@ scavenge_one(StgPtr p) break; } - case MUT_VAR: - evac_gen = 0; + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: { + StgPtr q = p; + rtsBool saved_eager_promotion = eager_promotion; + + eager_promotion = rtsFalse; ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var); - evac_gen = saved_evac_gen; - failed_to_evac = rtsTrue; // mutable anyhow + eager_promotion = saved_eager_promotion; + + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; + } break; + } case CAF_BLACKHOLE: case SE_CAF_BLACKHOLE: @@ -3892,7 +3921,9 @@ scavenge_mutable_list(generation *gen) #ifdef DEBUG switch (get_itbl((StgClosure *)p)->type) { - case MUT_VAR: + case MUT_VAR_CLEAN: + barf("MUT_VAR_CLEAN on mutable list"); + case MUT_VAR_DIRTY: mutlist_MUTVARS++; break; case MUT_ARR_PTRS_CLEAN: case MUT_ARR_PTRS_DIRTY: diff --git a/ghc/rts/GCCompact.c b/ghc/rts/GCCompact.c index 9d05f5d..b5bcc19 100644 --- a/ghc/rts/GCCompact.c +++ b/ghc/rts/GCCompact.c @@ -598,7 +598,8 @@ thread_obj (StgInfoTable *info, StgPtr p) case CONSTR: case STABLE_NAME: case IND_PERM: - case MUT_VAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: case CAF_BLACKHOLE: case SE_CAF_BLACKHOLE: case SE_BLACKHOLE: diff --git a/ghc/rts/LdvProfile.c b/ghc/rts/LdvProfile.c index cd3c2d1..dfdda28 100644 --- a/ghc/rts/LdvProfile.c +++ b/ghc/rts/LdvProfile.c @@ -138,7 +138,8 @@ processHeapClosureForDead( StgClosure *c ) return size; case WEAK: - case MUT_VAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: case BCO: case STABLE_NAME: size = sizeW_fromITBL(info); diff --git a/ghc/rts/PrimOps.cmm b/ghc/rts/PrimOps.cmm index 01b4138..ff95943 100644 --- a/ghc/rts/PrimOps.cmm +++ b/ghc/rts/PrimOps.cmm @@ -159,7 +159,7 @@ newMutVarzh_fast ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast); mv = Hp - SIZEOF_StgMutVar + WDS(1); - SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]); + SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]); StgMutVar_var(mv) = R1; RET_P(mv); @@ -207,7 +207,7 @@ atomicModifyMutVarzh_fast HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast); #if defined(SMP) - foreign "C" ACQUIRE_LOCK(sm_mutex "ptr"); + foreign "C" ACQUIRE_LOCK(sm_mutex "ptr") [R1,R2]; #endif x = StgMutVar_var(R1); @@ -228,6 +228,7 @@ atomicModifyMutVarzh_fast StgThunk_payload(y,0) = z; StgMutVar_var(R1) = y; + foreign "C" dirty_MUT_VAR(R1) [R1]; TICK_ALLOC_THUNK_1(); CCCS_ALLOC(THUNK_1_SIZE); diff --git a/ghc/rts/Printer.c b/ghc/rts/Printer.c index 356bb38..a9f087b 100644 --- a/ghc/rts/Printer.c +++ b/ghc/rts/Printer.c @@ -351,10 +351,17 @@ printClosure( StgClosure *obj ) break; } - case MUT_VAR: + case MUT_VAR_CLEAN: { StgMutVar* mv = (StgMutVar*)obj; - debugBelch("MUT_VAR(var=%p)\n", mv->var); + debugBelch("MUT_VAR_CLEAN(var=%p)\n", mv->var); + break; + } + + case MUT_VAR_DIRTY: + { + StgMutVar* mv = (StgMutVar*)obj; + debugBelch("MUT_VAR_DIRTY(var=%p)\n", mv->var); break; } @@ -692,9 +699,11 @@ static char *closure_type_names[] = { "SE_CAF_BLACKHOLE", "MVAR", "ARR_WORDS", - "MUT_ARR_PTRS", + "MUT_ARR_PTRS_CLEAN", + "MUT_ARR_PTRS_DIRTY", "MUT_ARR_PTRS_FROZEN", - "MUT_VAR", + "MUT_VAR_CLEAN", + "MUT_VAR_DIRTY", "MUT_CONS", "WEAK", "FOREIGN", diff --git a/ghc/rts/ProfHeap.c b/ghc/rts/ProfHeap.c index 85ae9fd..362bafe 100644 --- a/ghc/rts/ProfHeap.c +++ b/ghc/rts/ProfHeap.c @@ -156,7 +156,8 @@ static char *type_names[] = { , "MUT_ARR_PTRS_CLEAN" , "MUT_ARR_PTRS_DIRTY" , "MUT_ARR_PTRS_FROZEN" - , "MUT_VAR" + , "MUT_VAR_CLEAN" + , "MUT_VAR_DIRTY" , "WEAK" @@ -925,7 +926,8 @@ heapCensusChain( Census *census, bdescr *bd ) case MVAR: case WEAK: case STABLE_NAME: - case MUT_VAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: prim = rtsTrue; size = sizeW_fromITBL(info); break; diff --git a/ghc/rts/RetainerProfile.c b/ghc/rts/RetainerProfile.c index 8217f26..2f93cbf 100644 --- a/ghc/rts/RetainerProfile.c +++ b/ghc/rts/RetainerProfile.c @@ -463,7 +463,8 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child ) return; // one child (fixed), no SRT - case MUT_VAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: *first_child = ((StgMutVar *)c)->var; return; case THUNK_SELECTOR: @@ -891,7 +892,8 @@ pop( StgClosure **c, StgClosure **cp, retainer *r ) case SE_CAF_BLACKHOLE: case ARR_WORDS: // one child (fixed), no SRT - case MUT_VAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: case THUNK_SELECTOR: case IND_PERM: case IND_OLDGEN_PERM: @@ -991,7 +993,8 @@ isRetainer( StgClosure *c ) // mutable objects case MVAR: - case MUT_VAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: case MUT_ARR_PTRS_CLEAN: case MUT_ARR_PTRS_DIRTY: case MUT_ARR_PTRS_FROZEN: @@ -2104,7 +2107,8 @@ sanityCheckHeapClosure( StgClosure *c ) case FUN_1_1: case FUN_0_2: case WEAK: - case MUT_VAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: case CAF_BLACKHOLE: case BLACKHOLE: case SE_BLACKHOLE: diff --git a/ghc/rts/Sanity.c b/ghc/rts/Sanity.c index f6947c9..9c0ed2b 100644 --- a/ghc/rts/Sanity.c +++ b/ghc/rts/Sanity.c @@ -305,7 +305,8 @@ checkClosure( StgClosure* p ) case BLACKHOLE: case CAF_BLACKHOLE: case STABLE_NAME: - case MUT_VAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: case CONSTR_INTLIKE: case CONSTR_CHARLIKE: case CONSTR_STATIC: diff --git a/ghc/rts/StgMiscClosures.cmm b/ghc/rts/StgMiscClosures.cmm index 9e71f85..4a69eb2 100644 --- a/ghc/rts/StgMiscClosures.cmm +++ b/ghc/rts/StgMiscClosures.cmm @@ -598,8 +598,10 @@ INFO_TABLE(stg_MUT_ARR_PTRS_FROZEN0, 0, 0, MUT_ARR_PTRS_FROZEN0, "MUT_ARR_PTRS_F Mutable Variables ------------------------------------------------------------------------- */ -INFO_TABLE(stg_MUT_VAR, 1, 0, MUT_VAR, "MUT_VAR", "MUT_VAR") -{ foreign "C" barf("MUT_VAR object entered!"); } +INFO_TABLE(stg_MUT_VAR_CLEAN, 1, 0, MUT_VAR_CLEAN, "MUT_VAR_CLEAN", "MUT_VAR_CLEAN") +{ foreign "C" barf("MUT_VAR_CLEAN object entered!"); } +INFO_TABLE(stg_MUT_VAR_DIRTY, 1, 0, MUT_VAR_DIRTY, "MUT_VAR_DIRTY", "MUT_VAR_DIRTY") +{ foreign "C" barf("MUT_VAR_DIRTY object entered!"); } /* ---------------------------------------------------------------------------- Dummy return closure diff --git a/ghc/rts/Storage.c b/ghc/rts/Storage.c index e44348f..28ccf79 100644 --- a/ghc/rts/Storage.c +++ b/ghc/rts/Storage.c @@ -759,6 +759,22 @@ allocatePinned( nat n ) } /* ----------------------------------------------------------------------------- + This is the write barrier for MUT_VARs, a.k.a. IORefs. A + MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY + is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY + and is put on the mutable list. + -------------------------------------------------------------------------- */ + +void +dirty_MUT_VAR(StgClosure *p) +{ + if (p->header.info == &stg_MUT_VAR_CLEAN_info) { + p->header.info = &stg_MUT_VAR_DIRTY_info; + recordMutable(p); + } +} + +/* ----------------------------------------------------------------------------- Allocation functions for GMP. These all use the allocate() interface - we can't have any garbage -- 1.7.10.4