From 5270423a6afe69f1dc57e5e5a474812182718d40 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Tue, 1 Dec 2009 16:03:21 +0000 Subject: [PATCH] Make allocatePinned use local storage, and other refactorings This is a batch of refactoring to remove some of the GC's global state, as we move towards CPU-local GC. - allocateLocal() now allocates large objects into the local nursery, rather than taking a global lock and allocating then in gen 0 step 0. - allocatePinned() was still allocating from global storage and taking a lock each time, now it uses local storage. (mallocForeignPtrBytes should be faster with -threaded). - We had a gen 0 step 0, distinct from the nurseries, which are stored in a separate nurseries[] array. This is slightly strange. I removed the g0s0 global that pointed to gen 0 step 0, and removed all uses of it. I think now we don't use gen 0 step 0 at all, except possibly when there is only one generation. Possibly more tidying up is needed here. - I removed the global allocate() function, and renamed allocateLocal() to allocate(). - the alloc_blocks global is gone. MAYBE_GC() and doYouWantToGC() now check the local nursery only. --- includes/Cmm.h | 7 +- includes/mkDerivedConstants.c | 3 + includes/rts/storage/GC.h | 36 ++--- rts/Capability.c | 1 + rts/Capability.h | 3 + rts/Interpreter.c | 20 +-- rts/Linker.c | 3 +- rts/PrimOps.cmm | 10 +- rts/ProfHeap.c | 2 +- rts/RaiseAsync.c | 4 +- rts/RtsAPI.c | 34 ++--- rts/STM.c | 12 +- rts/Schedule.c | 4 +- rts/Threads.c | 6 +- rts/Weak.c | 2 +- rts/sm/GC.c | 64 +++++---- rts/sm/MarkWeak.c | 224 ++++++++++++++++-------------- rts/sm/Storage.c | 304 +++++++++++++++-------------------------- rts/sm/Storage.h | 11 +- 19 files changed, 343 insertions(+), 407 deletions(-) diff --git a/includes/Cmm.h b/includes/Cmm.h index aba5c2e..59081e2 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -380,11 +380,12 @@ HP_CHK_GEN(alloc,liveness,reentry); \ TICK_ALLOC_HEAP_NOCTR(alloc); -// allocateLocal() allocates from the nursery, so we check to see +// allocate() allocates from the nursery, so we check to see // whether the nursery is nearly empty in any function that uses -// allocateLocal() - this includes many of the primops. +// allocate() - this includes many of the primops. #define MAYBE_GC(liveness,reentry) \ - if (bdescr_link(CurrentNursery) == NULL || CInt[alloc_blocks] >= CInt[alloc_blocks_lim]) { \ + if (bdescr_link(CurrentNursery) == NULL || \ + step_n_large_blocks(StgRegTable_rNursery(BaseReg)) >= CInt[alloc_blocks_lim]) { \ R9 = liveness; \ R10 = reentry; \ HpAlloc = 0; \ diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c index b6d0106..ddd2e65 100644 --- a/includes/mkDerivedConstants.c +++ b/includes/mkDerivedConstants.c @@ -230,6 +230,7 @@ main(int argc, char *argv[]) field_offset(StgRegTable, rCurrentNursery); field_offset(StgRegTable, rHpAlloc); struct_field(StgRegTable, rRet); + struct_field(StgRegTable, rNursery); def_offset("stgEagerBlackholeInfo", FUN_OFFSET(stgEagerBlackholeInfo)); def_offset("stgGCEnter1", FUN_OFFSET(stgGCEnter1)); @@ -249,6 +250,8 @@ main(int argc, char *argv[]) struct_size(generation); struct_field(generation, mut_list); + struct_field(step, n_large_blocks); + struct_size(CostCentreStack); struct_field(CostCentreStack, ccsID); struct_field(CostCentreStack, mem_alloc); diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index aa05313..1cd57c9 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -75,10 +75,6 @@ typedef struct step_ { // ------------------------------------ // Fields below are used during GC only - // During GC, if we are collecting this step, blocks and n_blocks - // are copied into the following two fields. After GC, these blocks - // are freed. - #if defined(THREADED_RTS) char pad[128]; // make sure the following is // on a separate cache line. @@ -89,6 +85,9 @@ typedef struct step_ { int mark; // mark (not copy)? (old gen only) int compact; // compact (not sweep)? (old gen only) + // During GC, if we are collecting this step, blocks and n_blocks + // are copied into the following two fields. After GC, these blocks + // are freed. bdescr * old_blocks; // bdescr of first from-space block unsigned int n_old_blocks; // number of blocks in from-space unsigned int live_estimate; // for sweeping: estimate of live data @@ -125,7 +124,6 @@ typedef struct generation_ { extern generation * generations; extern generation * g0; -extern step * g0s0; extern generation * oldest_gen; extern step * all_steps; extern nat total_steps; @@ -133,21 +131,14 @@ extern nat total_steps; /* ----------------------------------------------------------------------------- Generic allocation - StgPtr allocateInGen(generation *g, nat n) - Allocates a chunk of contiguous store - n words long in generation g, - returning a pointer to the first word. - Always succeeds. - - StgPtr allocate(nat n) Equaivalent to allocateInGen(g0) - - StgPtr allocateLocal(Capability *cap, nat n) + StgPtr allocate(Capability *cap, nat n) Allocates memory from the nursery in the current Capability. This can be done without taking a global lock, unlike allocate(). - StgPtr allocatePinned(nat n) Allocates a chunk of contiguous store + StgPtr allocatePinned(Capability *cap, nat n) + Allocates a chunk of contiguous store n words long, which is at a fixed address (won't be moved by GC). Returns a pointer to the first word. @@ -163,27 +154,16 @@ extern nat total_steps; allocatePinned, for the benefit of the ticky-ticky profiler. - rtsBool doYouWantToGC(void) Returns True if the storage manager is - ready to perform a GC, False otherwise. - - lnat allocatedBytes(void) Returns the number of bytes allocated - via allocate() since the last GC. - Used in the reporting of statistics. - -------------------------------------------------------------------------- */ -StgPtr allocate ( lnat n ); -StgPtr allocateInGen ( generation *g, lnat n ); -StgPtr allocateLocal ( Capability *cap, lnat n ); -StgPtr allocatePinned ( lnat n ); -lnat allocatedBytes ( void ); +StgPtr allocate ( Capability *cap, lnat n ); +StgPtr allocatePinned ( Capability *cap, lnat n ); /* memory allocator for executable memory */ void * allocateExec(unsigned int len, void **exec_addr); void freeExec (void *p); // Used by GC checks in external .cmm code: -extern nat alloc_blocks; extern nat alloc_blocks_lim; /* ----------------------------------------------------------------------------- diff --git a/rts/Capability.c b/rts/Capability.c index 95050ba..0012c24 100644 --- a/rts/Capability.c +++ b/rts/Capability.c @@ -253,6 +253,7 @@ initCapability( Capability *cap, nat i ) cap->free_trec_headers = NO_TREC; cap->transaction_tokens = 0; cap->context_switch = 0; + cap->pinned_object_block = NULL; } /* --------------------------------------------------------------------------- diff --git a/rts/Capability.h b/rts/Capability.h index 3f01bf3..ff6e368 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -69,6 +69,9 @@ struct Capability_ { bdescr **mut_lists; bdescr **saved_mut_lists; // tmp use during GC + // block for allocating pinned objects into + bdescr *pinned_object_block; + // Context switch flag. We used to have one global flag, now one // per capability. Locks required : none (conflicts are harmless) int context_switch; diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 339d4d8..5197510 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -89,7 +89,7 @@ STATIC_INLINE StgPtr allocate_NONUPD (Capability *cap, int n_words) { - return allocateLocal(cap, stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words)); + return allocate(cap, stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words)); } int rts_stop_next_breakpoint = 0; @@ -604,7 +604,7 @@ do_apply: else /* arity > n */ { // build a new PAP and return it. StgPAP *new_pap; - new_pap = (StgPAP *)allocateLocal(cap, PAP_sizeW(pap->n_args + m)); + new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m)); SET_HDR(new_pap,&stg_PAP_info,CCCS); new_pap->arity = pap->arity - n; new_pap->n_args = pap->n_args + m; @@ -649,7 +649,7 @@ do_apply: // build a PAP and return it. StgPAP *pap; nat i; - pap = (StgPAP *)allocateLocal(cap, PAP_sizeW(m)); + pap = (StgPAP *)allocate(cap, PAP_sizeW(m)); SET_HDR(pap, &stg_PAP_info,CCCS); pap->arity = arity - n; pap->fun = obj; @@ -718,7 +718,7 @@ do_apply: run_BCO_return: // Heap check - if (doYouWantToGC()) { + if (doYouWantToGC(cap)) { Sp--; Sp[0] = (W_)&stg_enter_info; RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow); } @@ -729,7 +729,7 @@ run_BCO_return: run_BCO_return_unboxed: // Heap check - if (doYouWantToGC()) { + if (doYouWantToGC(cap)) { RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow); } // Stack checks aren't necessary at return points, the stack use @@ -747,7 +747,7 @@ run_BCO_fun: ); // Heap check - if (doYouWantToGC()) { + if (doYouWantToGC(cap)) { Sp -= 2; Sp[1] = (W_)obj; Sp[0] = (W_)&stg_apply_interp_info; // placeholder, really @@ -863,7 +863,7 @@ run_BCO: // stg_apply_interp_info pointer and a pointer to // the BCO size_words = BCO_BITMAP_SIZE(obj) + 2; - new_aps = (StgAP_STACK *) allocateLocal(cap, AP_STACK_sizeW(size_words)); + new_aps = (StgAP_STACK *) allocate(cap, AP_STACK_sizeW(size_words)); SET_HDR(new_aps,&stg_AP_STACK_info,CCS_SYSTEM); new_aps->size = size_words; new_aps->fun = &stg_dummy_ret_closure; @@ -1082,7 +1082,7 @@ run_BCO: case bci_ALLOC_AP: { StgAP* ap; int n_payload = BCO_NEXT; - ap = (StgAP*)allocateLocal(cap, AP_sizeW(n_payload)); + ap = (StgAP*)allocate(cap, AP_sizeW(n_payload)); Sp[-1] = (W_)ap; ap->n_args = n_payload; SET_HDR(ap, &stg_AP_info, CCS_SYSTEM/*ToDo*/) @@ -1093,7 +1093,7 @@ run_BCO: case bci_ALLOC_AP_NOUPD: { StgAP* ap; int n_payload = BCO_NEXT; - ap = (StgAP*)allocateLocal(cap, AP_sizeW(n_payload)); + ap = (StgAP*)allocate(cap, AP_sizeW(n_payload)); Sp[-1] = (W_)ap; ap->n_args = n_payload; SET_HDR(ap, &stg_AP_NOUPD_info, CCS_SYSTEM/*ToDo*/) @@ -1105,7 +1105,7 @@ run_BCO: StgPAP* pap; int arity = BCO_NEXT; int n_payload = BCO_NEXT; - pap = (StgPAP*)allocateLocal(cap, PAP_sizeW(n_payload)); + pap = (StgPAP*)allocate(cap, PAP_sizeW(n_payload)); Sp[-1] = (W_)pap; pap->n_args = n_payload; pap->arity = arity; diff --git a/rts/Linker.c b/rts/Linker.c index 2dcd21b..2412864 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -943,9 +943,8 @@ typedef struct _RtsSymbolVal { SymI_HasProto(stg_writeTVarzh) \ SymI_HasProto(stg_yieldzh) \ SymI_NeedsProto(stg_interp_constr_entry) \ - SymI_HasProto(alloc_blocks) \ SymI_HasProto(alloc_blocks_lim) \ - SymI_HasProto(allocateLocal) \ + SymI_HasProto(allocate) \ SymI_HasProto(allocateExec) \ SymI_HasProto(freeExec) \ SymI_HasProto(getAllocations) \ diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index ac6de81..5e762b1 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -58,7 +58,7 @@ stg_newByteArrayzh n = R1; payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); StgArrWords_words(p) = payload_words; @@ -85,7 +85,7 @@ stg_newPinnedByteArrayzh /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(words) []; + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -117,7 +117,7 @@ stg_newAlignedPinnedByteArrayzh /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(words) []; + ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -139,7 +139,7 @@ stg_newArrayzh MAYBE_GC(R2_PTR,stg_newArrayzh); words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n; - ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2]; + ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2]; TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); @@ -356,7 +356,7 @@ stg_mkWeakForeignEnvzh payload_words = 4; words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr", words) []; + ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) []; TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]); diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index 69dd798..b9fc7b3 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -1086,7 +1086,7 @@ heapCensus( void ) // Traverse the heap, collecting the census info if (RtsFlags.GcFlags.generations == 1) { - heapCensusChain( census, g0s0->blocks ); + heapCensusChain( census, g0->steps[0].blocks ); } else { for (g = 0; g < RtsFlags.GcFlags.generations; g++) { for (s = 0; s < generations[g].n_steps; s++) { diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index a0f78ee..fad2803 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -792,7 +792,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, // fun field. // words = frame - sp - 1; - ap = (StgAP_STACK *)allocateLocal(cap,AP_STACK_sizeW(words)); + ap = (StgAP_STACK *)allocate(cap,AP_STACK_sizeW(words)); ap->size = words; ap->fun = (StgClosure *)sp[0]; @@ -856,7 +856,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, // we've got an exception to raise, so let's pass it to the // handler in this frame. // - raise = (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+1); + raise = (StgThunk *)allocate(cap,sizeofW(StgThunk)+1); TICK_ALLOC_SE_THK(1,0); SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs); raise->payload[0] = exception; diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c index 54d1e75..c4babca 100644 --- a/rts/RtsAPI.c +++ b/rts/RtsAPI.c @@ -28,7 +28,7 @@ HaskellObj rts_mkChar (Capability *cap, HsChar c) { - StgClosure *p = (StgClosure *)allocateLocal(cap, CONSTR_sizeW(0,1)); + StgClosure *p = (StgClosure *)allocate(cap, CONSTR_sizeW(0,1)); SET_HDR(p, Czh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgWord)(StgChar)c; return p; @@ -37,7 +37,7 @@ rts_mkChar (Capability *cap, HsChar c) HaskellObj rts_mkInt (Capability *cap, HsInt i) { - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); SET_HDR(p, Izh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgInt)i; return p; @@ -46,7 +46,7 @@ rts_mkInt (Capability *cap, HsInt i) HaskellObj rts_mkInt8 (Capability *cap, HsInt8 i) { - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); SET_HDR(p, I8zh_con_info, CCS_SYSTEM); /* Make sure we mask out the bits above the lowest 8 */ p->payload[0] = (StgClosure *)(StgInt)i; @@ -56,7 +56,7 @@ rts_mkInt8 (Capability *cap, HsInt8 i) HaskellObj rts_mkInt16 (Capability *cap, HsInt16 i) { - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); SET_HDR(p, I16zh_con_info, CCS_SYSTEM); /* Make sure we mask out the relevant bits */ p->payload[0] = (StgClosure *)(StgInt)i; @@ -66,7 +66,7 @@ rts_mkInt16 (Capability *cap, HsInt16 i) HaskellObj rts_mkInt32 (Capability *cap, HsInt32 i) { - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); SET_HDR(p, I32zh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgInt)i; return p; @@ -75,7 +75,7 @@ rts_mkInt32 (Capability *cap, HsInt32 i) HaskellObj rts_mkInt64 (Capability *cap, HsInt64 i) { - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,2)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2)); SET_HDR(p, I64zh_con_info, CCS_SYSTEM); ASSIGN_Int64((P_)&(p->payload[0]), i); return p; @@ -84,7 +84,7 @@ rts_mkInt64 (Capability *cap, HsInt64 i) HaskellObj rts_mkWord (Capability *cap, HsWord i) { - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); SET_HDR(p, Wzh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgWord)i; return p; @@ -94,7 +94,7 @@ HaskellObj rts_mkWord8 (Capability *cap, HsWord8 w) { /* see rts_mkInt* comments */ - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); SET_HDR(p, W8zh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgWord)(w & 0xff); return p; @@ -104,7 +104,7 @@ HaskellObj rts_mkWord16 (Capability *cap, HsWord16 w) { /* see rts_mkInt* comments */ - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); SET_HDR(p, W16zh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgWord)(w & 0xffff); return p; @@ -114,7 +114,7 @@ HaskellObj rts_mkWord32 (Capability *cap, HsWord32 w) { /* see rts_mkInt* comments */ - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); SET_HDR(p, W32zh_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)(StgWord)(w & 0xffffffff); return p; @@ -123,7 +123,7 @@ rts_mkWord32 (Capability *cap, HsWord32 w) HaskellObj rts_mkWord64 (Capability *cap, HsWord64 w) { - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,2)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2)); /* see mk_Int8 comment */ SET_HDR(p, W64zh_con_info, CCS_SYSTEM); ASSIGN_Word64((P_)&(p->payload[0]), w); @@ -134,7 +134,7 @@ rts_mkWord64 (Capability *cap, HsWord64 w) HaskellObj rts_mkFloat (Capability *cap, HsFloat f) { - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1)); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1)); SET_HDR(p, Fzh_con_info, CCS_SYSTEM); ASSIGN_FLT((P_)p->payload, (StgFloat)f); return p; @@ -143,7 +143,7 @@ rts_mkFloat (Capability *cap, HsFloat f) HaskellObj rts_mkDouble (Capability *cap, HsDouble d) { - StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,sizeofW(StgDouble))); + StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,sizeofW(StgDouble))); SET_HDR(p, Dzh_con_info, CCS_SYSTEM); ASSIGN_DBL((P_)p->payload, (StgDouble)d); return p; @@ -152,7 +152,7 @@ rts_mkDouble (Capability *cap, HsDouble d) HaskellObj rts_mkStablePtr (Capability *cap, HsStablePtr s) { - StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1); + StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1); SET_HDR(p, StablePtr_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)s; return p; @@ -161,7 +161,7 @@ rts_mkStablePtr (Capability *cap, HsStablePtr s) HaskellObj rts_mkPtr (Capability *cap, HsPtr a) { - StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1); + StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1); SET_HDR(p, Ptr_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)a; return p; @@ -170,7 +170,7 @@ rts_mkPtr (Capability *cap, HsPtr a) HaskellObj rts_mkFunPtr (Capability *cap, HsFunPtr a) { - StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1); + StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1); SET_HDR(p, FunPtr_con_info, CCS_SYSTEM); p->payload[0] = (StgClosure *)a; return p; @@ -197,7 +197,7 @@ rts_apply (Capability *cap, HaskellObj f, HaskellObj arg) { StgThunk *ap; - ap = (StgThunk *)allocateLocal(cap,sizeofW(StgThunk) + 2); + ap = (StgThunk *)allocate(cap,sizeofW(StgThunk) + 2); SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_SYSTEM); ap->payload[0] = f; ap->payload[1] = arg; diff --git a/rts/STM.c b/rts/STM.c index 7921a67..ed5a722 100644 --- a/rts/STM.c +++ b/rts/STM.c @@ -412,7 +412,7 @@ static void unpark_waiters_on(Capability *cap, StgTVar *s) { static StgInvariantCheckQueue *new_stg_invariant_check_queue(Capability *cap, StgAtomicInvariant *invariant) { StgInvariantCheckQueue *result; - result = (StgInvariantCheckQueue *)allocateLocal(cap, sizeofW(StgInvariantCheckQueue)); + result = (StgInvariantCheckQueue *)allocate(cap, sizeofW(StgInvariantCheckQueue)); SET_HDR (result, &stg_INVARIANT_CHECK_QUEUE_info, CCS_SYSTEM); result -> invariant = invariant; result -> my_execution = NO_TREC; @@ -422,7 +422,7 @@ static StgInvariantCheckQueue *new_stg_invariant_check_queue(Capability *cap, static StgTVarWatchQueue *new_stg_tvar_watch_queue(Capability *cap, StgClosure *closure) { StgTVarWatchQueue *result; - result = (StgTVarWatchQueue *)allocateLocal(cap, sizeofW(StgTVarWatchQueue)); + result = (StgTVarWatchQueue *)allocate(cap, sizeofW(StgTVarWatchQueue)); SET_HDR (result, &stg_TVAR_WATCH_QUEUE_info, CCS_SYSTEM); result -> closure = closure; return result; @@ -430,7 +430,7 @@ static StgTVarWatchQueue *new_stg_tvar_watch_queue(Capability *cap, static StgTRecChunk *new_stg_trec_chunk(Capability *cap) { StgTRecChunk *result; - result = (StgTRecChunk *)allocateLocal(cap, sizeofW(StgTRecChunk)); + result = (StgTRecChunk *)allocate(cap, sizeofW(StgTRecChunk)); SET_HDR (result, &stg_TREC_CHUNK_info, CCS_SYSTEM); result -> prev_chunk = END_STM_CHUNK_LIST; result -> next_entry_idx = 0; @@ -440,7 +440,7 @@ static StgTRecChunk *new_stg_trec_chunk(Capability *cap) { static StgTRecHeader *new_stg_trec_header(Capability *cap, StgTRecHeader *enclosing_trec) { StgTRecHeader *result; - result = (StgTRecHeader *) allocateLocal(cap, sizeofW(StgTRecHeader)); + result = (StgTRecHeader *) allocate(cap, sizeofW(StgTRecHeader)); SET_HDR (result, &stg_TREC_HEADER_info, CCS_SYSTEM); result -> enclosing_trec = enclosing_trec; @@ -1175,7 +1175,7 @@ void stmAddInvariantToCheck(Capability *cap, // 1. Allocate an StgAtomicInvariant, set last_execution to NO_TREC // to signal that this is a new invariant in the current atomic block - invariant = (StgAtomicInvariant *) allocateLocal(cap, sizeofW(StgAtomicInvariant)); + invariant = (StgAtomicInvariant *) allocate(cap, sizeofW(StgAtomicInvariant)); TRACE("%p : stmAddInvariantToCheck allocated invariant=%p", trec, invariant); SET_HDR (invariant, &stg_ATOMIC_INVARIANT_info, CCS_SYSTEM); invariant -> code = code; @@ -1657,7 +1657,7 @@ void stmWriteTVar(Capability *cap, StgTVar *stmNewTVar(Capability *cap, StgClosure *new_value) { StgTVar *result; - result = (StgTVar *)allocateLocal(cap, sizeofW(StgTVar)); + result = (StgTVar *)allocate(cap, sizeofW(StgTVar)); SET_HDR (result, &stg_TVAR_info, CCS_SYSTEM); result -> current_value = new_value; result -> first_watch_queue_entry = END_STM_WATCH_QUEUE; diff --git a/rts/Schedule.c b/rts/Schedule.c index bb36f9b..3ae1fe0 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -2262,7 +2262,7 @@ threadStackOverflow(Capability *cap, StgTSO *tso) "increasing stack size from %ld words to %d.", (long)tso->stack_size, new_stack_size); - dest = (StgTSO *)allocateLocal(cap,new_tso_size); + dest = (StgTSO *)allocate(cap,new_tso_size); TICK_ALLOC_TSO(new_stack_size,0); /* copy the TSO block and the old stack into the new area */ @@ -2533,7 +2533,7 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception) // Only create raise_closure if we need to. if (raise_closure == NULL) { raise_closure = - (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+1); + (StgThunk *)allocate(cap,sizeofW(StgThunk)+1); SET_HDR(raise_closure, &stg_raise_info, CCCS); raise_closure->payload[0] = exception; } diff --git a/rts/Threads.c b/rts/Threads.c index 3b209ea..9867c1c 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -63,7 +63,7 @@ createThread(Capability *cap, nat size) } size = round_to_mblocks(size); - tso = (StgTSO *)allocateLocal(cap, size); + tso = (StgTSO *)allocate(cap, size); stack_size = size - TSO_STRUCT_SIZEW; TICK_ALLOC_TSO(stack_size, 0); @@ -102,8 +102,8 @@ createThread(Capability *cap, nat size) */ ACQUIRE_LOCK(&sched_mutex); tso->id = next_thread_id++; // while we have the mutex - tso->global_link = g0s0->threads; - g0s0->threads = tso; + tso->global_link = cap->r.rNursery->threads; + cap->r.rNursery->threads = tso; RELEASE_LOCK(&sched_mutex); // ToDo: report the stack size in the event? diff --git a/rts/Weak.c b/rts/Weak.c index f5c3a62..f5e918a 100644 --- a/rts/Weak.c +++ b/rts/Weak.c @@ -120,7 +120,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list) debugTrace(DEBUG_weak, "weak: batching %d finalizers", n); - arr = (StgMutArrPtrs *)allocateLocal(cap, sizeofW(StgMutArrPtrs) + n); + arr = (StgMutArrPtrs *)allocate(cap, sizeofW(StgMutArrPtrs) + n); TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0); SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_info, CCS_SYSTEM); arr->ptrs = n; diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 0593bd7..3f556ab 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -425,9 +425,9 @@ SET_GCT(gc_threads[0]); // g0s0->old_blocks is the old nursery // g0s0->blocks is to-space from the previous GC if (RtsFlags.GcFlags.generations == 1) { - if (g0s0->blocks != NULL) { - freeChain(g0s0->blocks); - g0s0->blocks = NULL; + if (g0->steps[0].blocks != NULL) { + freeChain(g0->steps[0].blocks); + g0->steps[0].blocks = NULL; } } @@ -646,18 +646,13 @@ SET_GCT(gc_threads[0]); /* LARGE OBJECTS. The current live large objects are chained on * scavenged_large, having been moved during garbage - * collection from large_objects. Any objects left on + * collection from large_objects. Any objects left on the * large_objects list are therefore dead, so we free them here. */ - for (bd = stp->large_objects; bd != NULL; bd = next) { - next = bd->link; - freeGroup(bd); - bd = next; - } - + freeChain(stp->large_objects); stp->large_objects = stp->scavenged_large_objects; stp->n_large_blocks = stp->n_scavenged_large_blocks; - + ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks); } else // for older generations... { @@ -672,6 +667,7 @@ SET_GCT(gc_threads[0]); // add the new blocks we promoted during this GC stp->n_large_blocks += stp->n_scavenged_large_blocks; + ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks); } } } @@ -685,18 +681,19 @@ SET_GCT(gc_threads[0]); // Free the small objects allocated via allocate(), since this will // all have been copied into G0S1 now. if (RtsFlags.GcFlags.generations > 1) { - if (g0s0->blocks != NULL) { - freeChain(g0s0->blocks); - g0s0->blocks = NULL; + if (g0->steps[0].blocks != NULL) { + freeChain(g0->steps[0].blocks); + g0->steps[0].blocks = NULL; } - g0s0->n_blocks = 0; - g0s0->n_words = 0; + g0->steps[0].n_blocks = 0; + g0->steps[0].n_words = 0; } - alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; // Start a new pinned_object_block - pinned_object_block = NULL; + for (n = 0; n < n_capabilities; n++) { + capabilities[n].pinned_object_block = NULL; + } // Free the mark stack. if (mark_stack_top_bd != NULL) { @@ -932,14 +929,23 @@ initGcThreads (void) void freeGcThreads (void) { + nat s; if (gc_threads != NULL) { #if defined(THREADED_RTS) nat i; - for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { + for (i = 0; i < n_capabilities; i++) { + for (s = 0; s < total_steps; s++) + { + freeWSDeque(gc_threads[i]->steps[s].todo_q); + } stgFree (gc_threads[i]); } stgFree (gc_threads); #else + for (s = 0; s < total_steps; s++) + { + freeWSDeque(gc_threads[0]->steps[s].todo_q); + } stgFree (gc_threads); #endif gc_threads = NULL; @@ -1230,8 +1236,21 @@ init_collected_gen (nat g, nat n_threads) } } + if (g == 0) { + for (i = 0; i < n_capabilities; i++) { + stp = &nurseries[i]; + stp->old_threads = stp->threads; + stp->threads = END_TSO_QUEUE; + } + } + for (s = 0; s < generations[g].n_steps; s++) { + // generation 0, step 0 doesn't need to-space, unless -G1 + if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { + continue; + } + stp = &generations[g].steps[s]; ASSERT(stp->gen_no == g); @@ -1240,11 +1259,6 @@ init_collected_gen (nat g, nat n_threads) stp->old_threads = stp->threads; stp->threads = END_TSO_QUEUE; - // generation 0, step 0 doesn't need to-space - if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { - continue; - } - // deprecate the existing blocks stp->old_blocks = stp->blocks; stp->n_old_blocks = stp->n_blocks; @@ -1642,7 +1656,7 @@ resize_nursery (void) * performance we get from 3L bytes, reducing to the same * performance at 2L bytes. */ - blocks = g0s0->n_blocks; + blocks = generations[0].steps[0].n_blocks; if ( RtsFlags.GcFlags.maxHeapSize != 0 && blocks * RtsFlags.GcFlags.oldGenFactor * 2 > diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c index 4f0a7a4..2f5964f 100644 --- a/rts/sm/MarkWeak.c +++ b/rts/sm/MarkWeak.c @@ -21,6 +21,7 @@ #include "Trace.h" #include "Schedule.h" #include "Weak.h" +#include "Storage.h" /* ----------------------------------------------------------------------------- Weak Pointers @@ -82,6 +83,9 @@ StgTSO *resurrected_threads; // List of blocked threads found to have pending throwTos StgTSO *exception_threads; +static void resurrectUnreachableThreads (step *stp); +static rtsBool tidyThreadList (step *stp); + void initWeakForGC(void) { @@ -182,85 +186,23 @@ traverseWeakPtrList(void) return rtsTrue; case WeakThreads: - /* Now deal with the all_threads list, which behaves somewhat like + /* Now deal with the step->threads lists, which behave somewhat like * the weak ptr list. If we discover any threads that are about to * become garbage, we wake them up and administer an exception. */ - { - StgTSO *t, *tmp, *next, **prev; - nat g, s; - step *stp; + { + nat g, s, n; - // Traverse thread lists for generations we collected... - for (g = 0; g <= N; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - stp = &generations[g].steps[s]; - - prev = &stp->old_threads; - - for (t = stp->old_threads; t != END_TSO_QUEUE; t = next) { - - tmp = (StgTSO *)isAlive((StgClosure *)t); - - if (tmp != NULL) { - t = tmp; - } - - ASSERT(get_itbl(t)->type == TSO); - if (t->what_next == ThreadRelocated) { - next = t->_link; - *prev = next; - continue; - } - - next = t->global_link; - - // This is a good place to check for blocked - // exceptions. It might be the case that a thread is - // blocked on delivering an exception to a thread that - // is also blocked - we try to ensure that this - // doesn't happen in throwTo(), but it's too hard (or - // impossible) to close all the race holes, so we - // accept that some might get through and deal with - // them here. A GC will always happen at some point, - // even if the system is otherwise deadlocked. - // - // If an unreachable thread has blocked - // exceptions, we really want to perform the - // blocked exceptions rather than throwing - // BlockedIndefinitely exceptions. This is the - // only place we can discover such threads. - // The target thread might even be - // ThreadFinished or ThreadKilled. Bugs here - // will only be seen when running on a - // multiprocessor. - if (t->blocked_exceptions != END_TSO_QUEUE) { - if (tmp == NULL) { - evacuate((StgClosure **)&t); - flag = rtsTrue; - } - t->global_link = exception_threads; - exception_threads = t; - *prev = next; - continue; - } - - if (tmp == NULL) { - // not alive (yet): leave this thread on the - // old_all_threads list. - prev = &(t->global_link); - } - else { - // alive - *prev = next; - - // move this thread onto the correct threads list. - step *new_step; - new_step = Bdescr((P_)t)->step; - t->global_link = new_step->threads; - new_step->threads = t; - } - } + // Traverse thread lists for generations we collected... + for (n = 0; n < n_capabilities; n++) { + if (tidyThreadList(&nurseries[n])) { + flag = rtsTrue; + } + } + for (g = 0; g <= N; g++) { + for (s = 0; s < generations[g].n_steps; s++) { + if (tidyThreadList(&generations[g].steps[s])) { + flag = rtsTrue; } } } @@ -272,36 +214,18 @@ traverseWeakPtrList(void) /* And resurrect any threads which were about to become garbage. */ { - nat g, s; - step *stp; - StgTSO *t, *tmp, *next; + nat g, s, n; + for (n = 0; n < n_capabilities; n++) { + resurrectUnreachableThreads(&nurseries[n]); + } for (g = 0; g <= N; g++) { for (s = 0; s < generations[g].n_steps; s++) { - stp = &generations[g].steps[s]; - - for (t = stp->old_threads; t != END_TSO_QUEUE; t = next) { - next = t->global_link; - - // ThreadFinished and ThreadComplete: we have to keep - // these on the all_threads list until they - // become garbage, because they might get - // pending exceptions. - switch (t->what_next) { - case ThreadKilled: - case ThreadComplete: - continue; - default: - tmp = t; - evacuate((StgClosure **)&tmp); - tmp->global_link = resurrected_threads; - resurrected_threads = tmp; - } - } + resurrectUnreachableThreads(&generations[g].steps[s]); } } } - + /* Finally, we can update the blackhole_queue. This queue * simply strings together TSOs blocked on black holes, it is * not intended to keep anything alive. Hence, we do not follow @@ -316,15 +240,113 @@ traverseWeakPtrList(void) ASSERT(*pt != NULL); } } - + weak_stage = WeakDone; // *now* we're done, return rtsTrue; // but one more round of scavenging, please - + } + default: barf("traverse_weak_ptr_list"); return rtsTrue; } +} + + static void resurrectUnreachableThreads (step *stp) +{ + StgTSO *t, *tmp, *next; + + for (t = stp->old_threads; t != END_TSO_QUEUE; t = next) { + next = t->global_link; + + // ThreadFinished and ThreadComplete: we have to keep + // these on the all_threads list until they + // become garbage, because they might get + // pending exceptions. + switch (t->what_next) { + case ThreadKilled: + case ThreadComplete: + continue; + default: + tmp = t; + evacuate((StgClosure **)&tmp); + tmp->global_link = resurrected_threads; + resurrected_threads = tmp; + } + } +} + +static rtsBool tidyThreadList (step *stp) +{ + StgTSO *t, *tmp, *next, **prev; + rtsBool flag = rtsFalse; + prev = &stp->old_threads; + + for (t = stp->old_threads; t != END_TSO_QUEUE; t = next) { + + tmp = (StgTSO *)isAlive((StgClosure *)t); + + if (tmp != NULL) { + t = tmp; + } + + ASSERT(get_itbl(t)->type == TSO); + if (t->what_next == ThreadRelocated) { + next = t->_link; + *prev = next; + continue; + } + + next = t->global_link; + + // This is a good place to check for blocked + // exceptions. It might be the case that a thread is + // blocked on delivering an exception to a thread that + // is also blocked - we try to ensure that this + // doesn't happen in throwTo(), but it's too hard (or + // impossible) to close all the race holes, so we + // accept that some might get through and deal with + // them here. A GC will always happen at some point, + // even if the system is otherwise deadlocked. + // + // If an unreachable thread has blocked + // exceptions, we really want to perform the + // blocked exceptions rather than throwing + // BlockedIndefinitely exceptions. This is the + // only place we can discover such threads. + // The target thread might even be + // ThreadFinished or ThreadKilled. Bugs here + // will only be seen when running on a + // multiprocessor. + if (t->blocked_exceptions != END_TSO_QUEUE) { + if (tmp == NULL) { + evacuate((StgClosure **)&t); + flag = rtsTrue; + } + t->global_link = exception_threads; + exception_threads = t; + *prev = next; + continue; + } + + if (tmp == NULL) { + // not alive (yet): leave this thread on the + // old_all_threads list. + prev = &(t->global_link); + } + else { + // alive + *prev = next; + + // move this thread onto the correct threads list. + step *new_step; + new_step = Bdescr((P_)t)->step; + t->global_link = new_step->threads; + new_step->threads = t; + } + } + + return flag; } /* ----------------------------------------------------------------------------- diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 73ef53f..5d371b9 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -40,16 +40,14 @@ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; -bdescr *pinned_object_block; /* allocate pinned objects into this block */ -nat alloc_blocks; /* number of allocate()d blocks since GC */ -nat alloc_blocks_lim; /* approximate limit on alloc_blocks */ +nat alloc_blocks_lim; /* GC if n_large_blocks in any nursery + * reaches this. */ static bdescr *exec_block; generation *generations = NULL; /* all the generations */ generation *g0 = NULL; /* generation 0, for convenience */ generation *oldest_gen = NULL; /* oldest generation, for convenience */ -step *g0s0 = NULL; /* generation 0, step 0, for convenience */ nat total_steps = 0; step *all_steps = NULL; /* single array of steps */ @@ -143,14 +141,6 @@ initStorage( void ) * sizeof(struct generation_), "initStorage: gens"); - /* allocate all the steps into an array. It is important that we do - it this way, because we need the invariant that two step pointers - can be directly compared to see which is the oldest. - Remember that the last generation has only one step. */ - total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps; - all_steps = stgMallocBytes(total_steps * sizeof(struct step_), - "initStorage: steps"); - /* Initialise all generations */ for(g = 0; g < RtsFlags.GcFlags.generations; g++) { gen = &generations[g]; @@ -166,6 +156,14 @@ initStorage( void ) g0 = &generations[0]; oldest_gen = &generations[RtsFlags.GcFlags.generations-1]; + /* allocate all the steps into an array. It is important that we do + it this way, because we need the invariant that two step pointers + can be directly compared to see which is the oldest. + Remember that the last generation has only one step. */ + total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps; + all_steps = stgMallocBytes(total_steps * sizeof(struct step_), + "initStorage: steps"); + /* Allocate step structures in each generation */ if (RtsFlags.GcFlags.generations > 1) { /* Only for multiple-generations */ @@ -187,11 +185,7 @@ initStorage( void ) g0->steps = all_steps; } -#ifdef THREADED_RTS n_nurseries = n_capabilities; -#else - n_nurseries = 1; -#endif nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), "initStorage: nurseries"); @@ -231,7 +225,6 @@ initStorage( void ) } generations[0].max_blocks = 0; - g0s0 = &generations[0].steps[0]; /* The allocation area. Policy: keep the allocation area * small to begin with, even if we have a large suggested heap @@ -246,7 +239,6 @@ initStorage( void ) revertible_caf_list = NULL; /* initialise the allocate() interface */ - alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; exec_block = NULL; @@ -274,7 +266,7 @@ exitStorage (void) void freeStorage (void) { - stgFree(g0s0); // frees all the steps + stgFree(all_steps); // frees all the steps stgFree(generations); freeAllMBlocks(); #if defined(THREADED_RTS) @@ -423,7 +415,6 @@ allocNursery (step *stp, bdescr *tail, nat blocks) static void assignNurseriesToCapabilities (void) { -#ifdef THREADED_RTS nat i; for (i = 0; i < n_nurseries; i++) { @@ -431,11 +422,6 @@ assignNurseriesToCapabilities (void) capabilities[i].r.rCurrentNursery = nurseries[i].blocks; capabilities[i].r.rCurrentAlloc = NULL; } -#else /* THREADED_RTS */ - MainCapability.r.rNursery = &nurseries[0]; - MainCapability.r.rCurrentNursery = nurseries[0].blocks; - MainCapability.r.rCurrentAlloc = NULL; -#endif } static void @@ -469,6 +455,10 @@ resetNurseries( void ) ASSERT(bd->step == stp); IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); } + // these large objects are dead, since we have just GC'd + freeChain(stp->large_objects); + stp->large_objects = NULL; + stp->n_large_blocks = 0; } assignNurseriesToCapabilities(); } @@ -481,6 +471,7 @@ countNurseryBlocks (void) for (i = 0; i < n_nurseries; i++) { blocks += nurseries[i].n_blocks; + blocks += nurseries[i].n_large_blocks; } return blocks; } @@ -565,129 +556,46 @@ move_TSO (StgTSO *src, StgTSO *dest) } /* ----------------------------------------------------------------------------- - The allocate() interface - - allocateInGen() function allocates memory directly into a specific - generation. It always succeeds, and returns a chunk of memory n - words long. n can be larger than the size of a block if necessary, - in which case a contiguous block group will be allocated. - - allocate(n) is equivalent to allocateInGen(g0). + split N blocks off the front of the given bdescr, returning the + new block group. We add the remainder to the large_blocks list + in the same step as the original block. -------------------------------------------------------------------------- */ -StgPtr -allocateInGen (generation *g, lnat n) -{ - step *stp; - bdescr *bd; - StgPtr ret; - - ACQUIRE_SM_LOCK; - - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - - stp = &g->steps[0]; - - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) - { - lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; - - // Attempting to allocate an object larger than maxHeapSize - // should definitely be disallowed. (bug #1791) - if (RtsFlags.GcFlags.maxHeapSize > 0 && - req_blocks >= RtsFlags.GcFlags.maxHeapSize) { - heapOverflow(); - // heapOverflow() doesn't exit (see #2592), but we aren't - // in a position to do a clean shutdown here: we - // either have to allocate the memory or exit now. - // Allocating the memory would be bad, because the user - // has requested that we not exceed maxHeapSize, so we - // just exit. - stg_exit(EXIT_HEAPOVERFLOW); - } - - bd = allocGroup(req_blocks); - dbl_link_onto(bd, &stp->large_objects); - stp->n_large_blocks += bd->blocks; // might be larger than req_blocks - alloc_blocks += bd->blocks; - initBdescr(bd, stp); - bd->flags = BF_LARGE; - bd->free = bd->start + n; - ret = bd->start; - } - else - { - // small allocation (blocks; - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { - bd = allocBlock(); - initBdescr(bd, stp); - bd->flags = 0; - bd->link = stp->blocks; - stp->blocks = bd; - stp->n_blocks++; - alloc_blocks++; - } - ret = bd->free; - bd->free += n; - } - - RELEASE_SM_LOCK; - - return ret; -} - -StgPtr -allocate (lnat n) -{ - return allocateInGen(g0,n); -} - -lnat -allocatedBytes( void ) -{ - lnat allocated; - - allocated = alloc_blocks * BLOCK_SIZE_W; - if (pinned_object_block != NULL) { - allocated -= (pinned_object_block->start + BLOCK_SIZE_W) - - pinned_object_block->free; - } - - return allocated; -} - -// split N blocks off the front of the given bdescr, returning the -// new block group. We treat the remainder as if it -// had been freshly allocated in generation 0. bdescr * splitLargeBlock (bdescr *bd, nat blocks) { bdescr *new_bd; + ACQUIRE_SM_LOCK; + + ASSERT(countBlocks(bd->step->large_objects) == bd->step->n_large_blocks); + // subtract the original number of blocks from the counter first bd->step->n_large_blocks -= bd->blocks; new_bd = splitBlockGroup (bd, blocks); - - dbl_link_onto(new_bd, &g0s0->large_objects); - g0s0->n_large_blocks += new_bd->blocks; - initBdescr(new_bd, g0s0); - new_bd->flags = BF_LARGE; + initBdescr(new_bd, bd->step); + new_bd->flags = BF_LARGE | (bd->flags & BF_EVACUATED); + // if new_bd is in an old generation, we have to set BF_EVACUATED new_bd->free = bd->free; + dbl_link_onto(new_bd, &bd->step->large_objects); + ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W); // add the new number of blocks to the counter. Due to the gaps - // for block descriptor, new_bd->blocks + bd->blocks might not be + // for block descriptors, new_bd->blocks + bd->blocks might not be // equal to the original bd->blocks, which is why we do it this way. - bd->step->n_large_blocks += bd->blocks; + bd->step->n_large_blocks += bd->blocks + new_bd->blocks; + + ASSERT(countBlocks(bd->step->large_objects) == bd->step->n_large_blocks); + + RELEASE_SM_LOCK; return new_bd; } /* ----------------------------------------------------------------------------- - allocateLocal() + allocate() This allocates memory in the current thread - it is intended for use primarily from STG-land where we have a Capability. It is @@ -700,13 +608,38 @@ splitLargeBlock (bdescr *bd, nat blocks) -------------------------------------------------------------------------- */ StgPtr -allocateLocal (Capability *cap, lnat n) +allocate (Capability *cap, lnat n) { bdescr *bd; StgPtr p; + step *stp; if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - return allocateInGen(g0,n); + lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; + + // Attempting to allocate an object larger than maxHeapSize + // should definitely be disallowed. (bug #1791) + if (RtsFlags.GcFlags.maxHeapSize > 0 && + req_blocks >= RtsFlags.GcFlags.maxHeapSize) { + heapOverflow(); + // heapOverflow() doesn't exit (see #2592), but we aren't + // in a position to do a clean shutdown here: we + // either have to allocate the memory or exit now. + // Allocating the memory would be bad, because the user + // has requested that we not exceed maxHeapSize, so we + // just exit. + stg_exit(EXIT_HEAPOVERFLOW); + } + + stp = &nurseries[cap->no]; + + bd = allocGroup(req_blocks); + dbl_link_onto(bd, &stp->large_objects); + stp->n_large_blocks += bd->blocks; // might be larger than req_blocks + initBdescr(bd, stp); + bd->flags = BF_LARGE; + bd->free = bd->start + n; + return bd->start; } /* small allocation (r.rNursery); bd->flags = 0; - // NO: alloc_blocks++; - // calcAllocated() uses the size of the nursery, and we've - // already bumpted nursery->n_blocks above. We'll GC - // pretty quickly now anyway, because MAYBE_GC() will + // If we had to allocate a new block, then we'll GC + // pretty quickly now, because MAYBE_GC() will // notice that CurrentNursery->link is NULL. } else { // we have a block in the nursery: take it and put @@ -778,39 +709,41 @@ allocateLocal (Capability *cap, lnat n) ------------------------------------------------------------------------- */ StgPtr -allocatePinned( lnat n ) +allocatePinned (Capability *cap, lnat n) { StgPtr p; - bdescr *bd = pinned_object_block; + bdescr *bd; + step *stp; // If the request is for a large object, then allocate() // will give us a pinned object anyway. if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - p = allocate(n); + p = allocate(cap, n); Bdescr(p)->flags |= BF_PINNED; return p; } - ACQUIRE_SM_LOCK; - TICK_ALLOC_HEAP_NOCTR(n); CCS_ALLOC(CCCS,n); + bd = cap->pinned_object_block; + // If we don't have a block of pinned objects yet, or the current // one isn't large enough to hold the new object, allocate a new one. if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { - pinned_object_block = bd = allocBlock(); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks++; - initBdescr(bd, g0s0); + ACQUIRE_SM_LOCK + cap->pinned_object_block = bd = allocBlock(); + RELEASE_SM_LOCK + stp = &nurseries[cap->no]; + dbl_link_onto(bd, &stp->large_objects); + stp->n_large_blocks++; + initBdescr(bd, stp); bd->flags = BF_PINNED | BF_LARGE; bd->free = bd->start; - alloc_blocks++; } p = bd->free; bd->free += n; - RELEASE_SM_LOCK; return p; } @@ -900,14 +833,11 @@ calcAllocated( void ) { nat allocated; bdescr *bd; + nat i; - allocated = allocatedBytes(); - allocated += countNurseryBlocks() * BLOCK_SIZE_W; + allocated = countNurseryBlocks() * BLOCK_SIZE_W; - { -#ifdef THREADED_RTS - nat i; - for (i = 0; i < n_nurseries; i++) { + for (i = 0; i < n_capabilities; i++) { Capability *cap; for ( bd = capabilities[i].r.rCurrentNursery->link; bd != NULL; bd = bd->link ) { @@ -919,18 +849,10 @@ calcAllocated( void ) allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W) - cap->r.rCurrentNursery->free; } - } -#else - bdescr *current_nursery = MainCapability.r.rCurrentNursery; - - for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) { - allocated -= BLOCK_SIZE_W; - } - if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) { - allocated -= (current_nursery->start + BLOCK_SIZE_W) - - current_nursery->free; - } -#endif + if (cap->pinned_object_block != NULL) { + allocated -= (cap->pinned_object_block->start + BLOCK_SIZE_W) - + cap->pinned_object_block->free; + } } total_allocated += allocated; @@ -947,16 +869,12 @@ calcLiveBlocks(void) lnat live = 0; step *stp; - if (RtsFlags.GcFlags.generations == 1) { - return g0s0->n_large_blocks + g0s0->n_blocks; - } - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { for (s = 0; s < generations[g].n_steps; s++) { /* approximate amount of live data (doesn't take into account slop * at end of each block). */ - if (g == 0 && s == 0) { + if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { continue; } stp = &generations[g].steps[s]; @@ -988,14 +906,10 @@ calcLiveWords(void) lnat live; step *stp; - if (RtsFlags.GcFlags.generations == 1) { - return g0s0->n_words + countOccupied(g0s0->large_objects); - } - live = 0; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { for (s = 0; s < generations[g].n_steps; s++) { - if (g == 0 && s == 0) continue; + if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) continue; stp = &generations[g].steps[s]; live += stp->n_words + countOccupied(stp->large_objects); } @@ -1384,32 +1298,28 @@ checkSanity( void ) { nat g, s; - if (RtsFlags.GcFlags.generations == 1) { - checkHeap(g0s0->blocks); - checkLargeObjects(g0s0->large_objects); - } else { - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - if (g == 0 && s == 0) { continue; } - ASSERT(countBlocks(generations[g].steps[s].blocks) - == generations[g].steps[s].n_blocks); - ASSERT(countBlocks(generations[g].steps[s].large_objects) - == generations[g].steps[s].n_large_blocks); - checkHeap(generations[g].steps[s].blocks); - checkLargeObjects(generations[g].steps[s].large_objects); - } - } - - for (s = 0; s < n_nurseries; s++) { - ASSERT(countBlocks(nurseries[s].blocks) - == nurseries[s].n_blocks); - ASSERT(countBlocks(nurseries[s].large_objects) - == nurseries[s].n_large_blocks); - } - - checkFreeListSanity(); + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (s = 0; s < generations[g].n_steps; s++) { + if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { + continue; + } + ASSERT(countBlocks(generations[g].steps[s].blocks) + == generations[g].steps[s].n_blocks); + ASSERT(countBlocks(generations[g].steps[s].large_objects) + == generations[g].steps[s].n_large_blocks); + checkHeap(generations[g].steps[s].blocks); + checkLargeObjects(generations[g].steps[s].large_objects); + } + } + + for (s = 0; s < n_nurseries; s++) { + ASSERT(countBlocks(nurseries[s].blocks) + == nurseries[s].n_blocks); + ASSERT(countBlocks(nurseries[s].large_objects) + == nurseries[s].n_large_blocks); } + + checkFreeListSanity(); #if defined(THREADED_RTS) // check the stacks too in threaded mode, because we don't do a diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index 5ddcbdc..30bdf54 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -9,6 +9,8 @@ #ifndef SM_STORAGE_H #define SM_STORAGE_H +#include "Capability.h" + BEGIN_RTS_PRIVATE /* ----------------------------------------------------------------------------- @@ -23,12 +25,11 @@ void freeStorage(void); Storage manager state -------------------------------------------------------------------------- */ -extern bdescr * pinned_object_block; - INLINE_HEADER rtsBool -doYouWantToGC( void ) +doYouWantToGC( Capability *cap ) { - return (alloc_blocks >= alloc_blocks_lim); + return (cap->r.rCurrentNursery->link == NULL || + cap->r.rNursery->n_large_blocks >= alloc_blocks_lim); } /* for splitting blocks groups in two */ @@ -120,6 +121,8 @@ void dirty_MVAR(StgRegTable *reg, StgClosure *p); Nursery manipulation -------------------------------------------------------------------------- */ +extern step *nurseries; + void resetNurseries ( void ); void resizeNurseries ( nat blocks ); void resizeNurseriesFixed ( nat blocks ); -- 1.7.10.4