From c5cafbcca54c4b1117bc43b31d86afa583fb7f62 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Tue, 18 Aug 2009 11:29:42 +0000 Subject: [PATCH] Fix #3429: a tricky race condition There were two bugs, and had it not been for the first one we would not have noticed the second one, so this is quite fortunate. The first bug is in stg_unblockAsyncExceptionszh_ret, when we found a pending exception to raise, but don't end up raising it, there was a missing adjustment to the stack pointer. The second bug was that this case was actually happening at all: it ought to be incredibly rare, because the pending exception thread would have to be killed between us finding it and attempting to raise the exception. This made me suspicious. It turned out that there was a race condition on the tso->flags field; multiple threads were updating this bitmask field non-atomically (one of the bits is the dirty-bit for the generational GC). The fix is to move the dirty bit into its own field of the TSO, making the TSO one word larger (sadly). --- includes/mkDerivedConstants.c | 1 + includes/rts/Constants.h | 12 ------------ includes/rts/storage/TSO.h | 20 +++++++++++++++++++- rts/Exception.cmm | 10 ++++++++++ rts/PrimOps.cmm | 8 ++++---- rts/Sanity.c | 2 +- rts/Threads.c | 5 +++-- rts/sm/Scav.c | 6 +++--- rts/sm/Storage.c | 6 +++--- 9 files changed, 44 insertions(+), 26 deletions(-) diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c index b38f3ab..3b80aac 100644 --- a/includes/mkDerivedConstants.c +++ b/includes/mkDerivedConstants.c @@ -293,6 +293,7 @@ main(int argc, char *argv[]) closure_field(StgTSO, saved_errno); closure_field(StgTSO, trec); closure_field(StgTSO, flags); + closure_field(StgTSO, dirty); closure_field_("StgTSO_CCCS", StgTSO, prof.CCCS); tso_field(StgTSO, sp); tso_field_offset(StgTSO, stack); diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h index bab45a3..b283bef 100644 --- a/includes/rts/Constants.h +++ b/includes/rts/Constants.h @@ -230,19 +230,7 @@ /* * Flags for the tso->flags field. - * - * The TSO_DIRTY flag indicates that this TSO's stack should be - * scanned during garbage collection. The link field of a TSO is - * always scanned, so we don't have to dirty a TSO just for linking - * it on a different list. - * - * TSO_DIRTY is set by - * - schedule(), just before running a thread, - * - raiseAsync(), because it modifies a thread's stack - * - resumeThread(), just before running the thread again - * and unset by the garbage collector (only). */ -#define TSO_DIRTY 1 /* * TSO_LOCKED is set when a TSO is locked to a particular Capability. diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h index 7cb2459..b00f5d4 100644 --- a/includes/rts/storage/TSO.h +++ b/includes/rts/storage/TSO.h @@ -25,7 +25,6 @@ typedef struct { */ typedef StgWord32 StgThreadID; -#define tsoDirty(tso) ((tso)->flags & TSO_DIRTY) #define tsoLocked(tso) ((tso)->flags & TSO_LOCKED) /* @@ -90,6 +89,25 @@ typedef struct StgTSO_ { struct StgTSO_* global_link; /* Links all threads together */ + StgWord dirty; /* non-zero => dirty */ + /* + * The tso->dirty flag indicates that this TSO's stack should be + * scanned during garbage collection. It also indicates that this + * TSO is on the mutable list. + * + * NB. The dirty flag gets a word to itself, so that it can be set + * safely by multiple threads simultaneously (the flags field is + * not safe for this purpose; see #3429). It is harmless for the + * TSO to be on the mutable list multiple times. + * + * tso->dirty is set by dirty_TSO(), and unset by the garbage + * collector (only). + * + * The link field has a separate dirty bit of its own, namely the + * bit TSO_LINK_DIRTY in the tso->flags field, set by + * setTSOLink(). + */ + StgWord16 what_next; /* Values defined in Constants.h */ StgWord16 why_blocked; /* Values defined in Constants.h */ StgWord32 flags; diff --git a/rts/Exception.cmm b/rts/Exception.cmm index 16b5d92..cd3f2bc 100644 --- a/rts/Exception.cmm +++ b/rts/Exception.cmm @@ -49,6 +49,8 @@ import ghczmprim_GHCziBool_True_closure; -------------------------------------------------------------------------- */ +STRING(stg_unblockAsync_err_str, "unblockAsyncExceptions#_ret") + INFO_TABLE_RET( stg_unblockAsyncExceptionszh_ret, RET_SMALL ) { CInt r; @@ -81,6 +83,14 @@ INFO_TABLE_RET( stg_unblockAsyncExceptionszh_ret, RET_SMALL ) jump %ENTRY_CODE(Sp(0)); } } + else { + /* + the thread might have been removed from the + blocked_exception list by someone else in the meantime. + Just restore the stack pointer and continue. + */ + Sp_adj(2); + } } Sp_adj(1); diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index bc2d07a..baadca4 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1173,7 +1173,7 @@ stg_takeMVarzh tso = StgMVar_head(mvar); PerformPut(tso,StgMVar_value(mvar)); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + if (TO_W_(StgTSO_dirty(tso)) == 0) { foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; } @@ -1249,7 +1249,7 @@ stg_tryTakeMVarzh /* actually perform the putMVar for the thread that we just woke up */ tso = StgMVar_head(mvar); PerformPut(tso,StgMVar_value(mvar)); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + if (TO_W_(StgTSO_dirty(tso)) == 0) { foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; } @@ -1329,7 +1329,7 @@ stg_putMVarzh /* actually perform the takeMVar */ tso = StgMVar_head(mvar); PerformTake(tso, val); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + if (TO_W_(StgTSO_dirty(tso)) == 0) { foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; } @@ -1398,7 +1398,7 @@ stg_tryPutMVarzh /* actually perform the takeMVar */ tso = StgMVar_head(mvar); PerformTake(tso, R2); - if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) { + if (TO_W_(StgTSO_dirty(tso)) == 0) { foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; } diff --git a/rts/Sanity.c b/rts/Sanity.c index 49bbff7..4430c4b 100644 --- a/rts/Sanity.c +++ b/rts/Sanity.c @@ -581,7 +581,7 @@ checkGlobalTSOList (rtsBool checkTSOs) // If this TSO is dirty and in an old generation, it better // be on the mutable list. if (tso->what_next == ThreadRelocated) continue; - if (tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) { + if (tso->dirty || (tso->flags & TSO_LINK_DIRTY)) { ASSERT(Bdescr((P_)tso)->gen_no == 0 || (tso->flags & TSO_MARKED)); tso->flags &= ~TSO_MARKED; } diff --git a/rts/Threads.c b/rts/Threads.c index 28820c8..8318e28 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -75,7 +75,8 @@ createThread(Capability *cap, nat size) tso->why_blocked = NotBlocked; tso->blocked_exceptions = END_TSO_QUEUE; - tso->flags = TSO_DIRTY; + tso->flags = 0; + tso->dirty = 1; tso->saved_errno = 0; tso->bound = NULL; @@ -377,7 +378,7 @@ printThreadStatus(StgTSO *t) default: printThreadBlockage(t); } - if (t->flags & TSO_DIRTY) { + if (t->dirty) { debugBelch(" (TSO_DIRTY)"); } else if (t->flags & TSO_LINK_DIRTY) { debugBelch(" (TSO_LINK_DIRTY)"); diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 52de561..672636b 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -89,10 +89,10 @@ scavengeTSO (StgTSO *tso) scavenge_stack(tso->sp, &(tso->stack[tso->stack_size])); if (gct->failed_to_evac) { - tso->flags |= TSO_DIRTY; + tso->dirty = 1; scavenge_TSO_link(tso); } else { - tso->flags &= ~TSO_DIRTY; + tso->dirty = 0; scavenge_TSO_link(tso); if (gct->failed_to_evac) { tso->flags |= TSO_LINK_DIRTY; @@ -1454,7 +1454,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen) continue; case TSO: { StgTSO *tso = (StgTSO *)p; - if ((tso->flags & TSO_DIRTY) == 0) { + if (tso->dirty == 0) { // Must be on the mutable list because its link // field is dirty. ASSERT(tso->flags & TSO_LINK_DIRTY); diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 97615e9..59a41b0 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -850,7 +850,7 @@ void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target) { bdescr *bd; - if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) { + if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { tso->flags |= TSO_LINK_DIRTY; bd = Bdescr((StgPtr)tso); if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no); @@ -862,11 +862,11 @@ void dirty_TSO (Capability *cap, StgTSO *tso) { bdescr *bd; - if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) { + if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { bd = Bdescr((StgPtr)tso); if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no); } - tso->flags |= TSO_DIRTY; + tso->dirty = 1; } /* -- 1.7.10.4