From: sof Date: Tue, 9 Jul 2002 20:44:24 +0000 (+0000) Subject: [project @ 2002-07-09 20:44:24 by sof] X-Git-Tag: Approx_11550_changesets_converted~1874 X-Git-Url: http://git.megacz.com/?a=commitdiff_plain;h=d4fc96655eba2dd67726f8043d073ec41ea03662;p=ghc-hetmet.git [project @ 2002-07-09 20:44:24 by sof] awaitEvent: better handling of EBADFs, i.e., don't unconditionally barf() and exit if select() reports an EBADF. See source code comments for details, but in short, we attempt to unblock all threads to handle the error condition before bailing out. If only select() would indicate which file descriptor that was the bad one. (There's no good reason why select() errors other than EBADF could also be handled this way, but let's focus on it for now..) --- diff --git a/ghc/rts/Select.c b/ghc/rts/Select.c index 0408772..922b151 100644 --- a/ghc/rts/Select.c +++ b/ghc/rts/Select.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: Select.c,v 1.19 2001/11/13 13:38:02 simonmar Exp $ + * $Id: Select.c,v 1.20 2002/07/09 20:44:24 sof Exp $ * * (c) The GHC Team 1995-1999 * @@ -86,12 +86,14 @@ awaitEvent(rtsBool wait) int maxfd = -1; #endif rtsBool select_succeeded = rtsTrue; + rtsBool unblock_all = rtsFalse; + static rtsBool prev_unblocked_all = rtsFalse; struct timeval tv; lnat min, ticks; tv.tv_sec = 0; tv.tv_usec = 0; - + IF_DEBUG(scheduler, belch("scheduler: checking for threads blocked on I/O"); if (wait) { @@ -174,11 +176,28 @@ awaitEvent(rtsBool wait) while ((numFound = select(maxfd+1, &rfd, &wfd, NULL, &tv)) < 0) { if (errno != EINTR) { - - printf("%d\n", errno); - fflush(stdout); - perror("select"); + /* Handle bad file descriptors by unblocking all the + waiting threads. Why? Because a thread might have been + a bit naughty and closed a file descriptor while another + was blocked waiting. This is less-than-good programming + practice, but having the RTS as a result fall over isn't + acceptable, so we simply unblock all the waiting threads + should we see a bad file descriptor & give the threads + a chance to clean up their act. + + To avoid getting stuck in a loop, repeated EBADF failures + are 'handled' through barfing. + */ + if ( errno == EBADF && !prev_unblocked_all) { + unblock_all = rtsTrue; + prev_unblocked_all = rtsTrue; + break; + } else { + fprintf(stderr,"%d\n", errno); + fflush(stderr); + perror("select"); barf("select failed"); + } } #else /* on mingwin */ while (1) { @@ -186,6 +205,8 @@ awaitEvent(rtsBool wait) #endif /* mingw32_TARGET_OS */ ACQUIRE_LOCK(&sched_mutex); + prev_unblocked_all = rtsFalse; + #ifndef mingw32_TARGET_OS /* We got a signal; could be one of ours. If so, we need * to start up the signal handler straight away, otherwise @@ -227,15 +248,15 @@ awaitEvent(rtsBool wait) */ prev = NULL; - if (select_succeeded) { + if (select_succeeded || unblock_all) { for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) { next = tso->link; switch (tso->why_blocked) { case BlockedOnRead: - ready = FD_ISSET(tso->block_info.fd, &rfd); + ready = unblock_all || FD_ISSET(tso->block_info.fd, &rfd); break; case BlockedOnWrite: - ready = FD_ISSET(tso->block_info.fd, &wfd); + ready = unblock_all || FD_ISSET(tso->block_info.fd, &wfd); break; default: barf("awaitEvent");