From: Simon Marlow Date: Tue, 31 May 2011 08:39:09 +0000 (+0100) Subject: Fix a cause of very occasional <> with parallel programs and X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=d77cec05ebc9abe62b8af990b519fc9f36684239 Fix a cause of very occasional <> with parallel programs and -feager-blackholing (#5226). See comments for details. --- diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c index aeae1d4..02b4f58 100644 --- a/rts/ThreadPaused.c +++ b/rts/ThreadPaused.c @@ -230,8 +230,38 @@ threadPaused(Capability *cap, StgTSO *tso) #ifdef THREADED_RTS retry: #endif - if (bh_info == &stg_BLACKHOLE_info || - bh_info == &stg_WHITEHOLE_info) + // If the info table is a WHITEHOLE or a BLACKHOLE, then + // another thread has claimed it (via the SET_INFO() + // below), or is in the process of doing so. In that case + // we want to suspend the work that the current thread has + // done on this thunk and wait until the other thread has + // finished. + // + // If eager blackholing is taking place, it could be the + // case that the blackhole points to the current + // TSO. e.g.: + // + // this thread other thread + // -------------------------------------------------------- + // c->indirectee = other_tso; + // c->header.info = EAGER_BH + // threadPaused() + // c->indirectee = other_tso; + // c->header.info = EAGER_BH + // c->header.info = BLACKHOLE + // threadPaused() + // *** c->header.info is now BLACKHOLE, + // c->indirectee points to this TSO + // + // So in this case do *not* suspend the work of the + // current thread, because the current thread will become + // deadlocked on itself. See #5226 for an instance of + // this bug. + // + if ((bh_info == &stg_WHITEHOLE_info || + bh_info == &stg_BLACKHOLE_info) + && + ((StgInd*)bh)->indirectee != (StgClosure*)tso) { debugTrace(DEBUG_squeeze, "suspending duplicate work: %ld words of stack",