Partial fix for #926
authorSimon Marlow <simonmar@microsoft.com>
Thu, 1 Feb 2007 11:40:47 +0000 (11:40 +0000)
committerSimon Marlow <simonmar@microsoft.com>
Thu, 1 Feb 2007 11:40:47 +0000 (11:40 +0000)
It seems that when a program exits with open DLLs on Windows, the
system attempts to shut down the DLLs, but it also terminates (some
of?) the running threads.  The RTS isn't prepared for threads to die
unexpectedly, so it sits around waiting for its workers to finish.
This bites in two places: ShutdownIOManager() in the the unthreaded
RTS, and shutdownCapability() in the threaded RTS.  So far I've
modified the latter to notice when worker threads have died
unexpectedly and continue shutting down.  It seems a bit trickier to
fix the unthreaded RTS, so for now the workaround for #926 is to use
the threaded RTS.

includes/OSThreads.h
rts/Capability.c
rts/posix/OSThreads.c
rts/win32/OSThreads.c

index 32f147a..f9af6c4 100644 (file)
@@ -73,6 +73,8 @@ typedef pthread_key_t   ThreadLocalKey;
 
 typedef HANDLE Condition;
 typedef DWORD OSThreadId;
+// don't be tempted to use HANDLE as the OSThreadId: there can be 
+// many HANDLES to a given thread, so comparison would not work.
 typedef DWORD ThreadLocalKey;
 
 #define OSThreadProcAttr __stdcall
@@ -146,6 +148,7 @@ typedef void OSThreadProcAttr OSThreadProc(void *);
 
 extern int  createOSThread        ( OSThreadId* tid, 
                                    OSThreadProc *startProc, void *param);
+extern rtsBool osThreadIsAlive    ( OSThreadId id );
 
 //
 // Condition Variables
index 510656f..564a20f 100644 (file)
@@ -664,6 +664,30 @@ shutdownCapability (Capability *cap, Task *task)
            continue;
        }
        cap->running_task = task;
+
+        if (cap->spare_workers) {
+            // Look for workers that have died without removing
+            // themselves from the list; this could happen if the OS
+            // summarily killed the thread, for example.  This
+            // actually happens on Windows when the system is
+            // terminating the program, and the RTS is running in a
+            // DLL.
+            Task *t, *prev;
+            prev = NULL;
+            for (t = cap->spare_workers; t != NULL; t = t->next) {
+                if (!osThreadIsAlive(t->id)) {
+                    debugTrace(DEBUG_sched, 
+                               "worker thread %p has died unexpectedly", t->id);
+                        if (!prev) {
+                            cap->spare_workers = t->next;
+                        } else {
+                            prev->next = t->next;
+                        }
+                        prev = t;
+                }
+            }
+        }
+
        if (!emptyRunQueue(cap) || cap->spare_workers) {
            debugTrace(DEBUG_sched, 
                       "runnable threads or workers still alive, yielding");
index b30d085..79d395f 100644 (file)
@@ -93,6 +93,14 @@ osThreadId()
   return pthread_self();
 }
 
+rtsBool
+osThreadIsAlive(OSThreadId id)
+{
+    // no good way to implement this on POSIX, AFAICT.  Returning true
+    // is safe.
+    return rtsTrue;
+}
+
 void
 initMutex(Mutex* pMut)
 {
index 32800e7..6f3629b 100644 (file)
@@ -7,6 +7,8 @@
  *
  * --------------------------------------------------------------------------*/
 
+#define _WIN32_WINNT 0x0500
+
 #include "Rts.h"
 #if defined(THREADED_RTS)
 #include "OSThreads.h"
@@ -112,6 +114,22 @@ osThreadId()
   return GetCurrentThreadId();
 }
 
+rtsBool
+osThreadIsAlive(OSThreadId id)
+{
+    DWORD exit_code;
+    HANDLE hdl;
+    if (!(hdl = OpenThread(THREAD_QUERY_INFORMATION,FALSE,id))) {
+        sysErrorBelch("osThreadIsAlive: OpenThread");
+        stg_exit(EXIT_FAILURE);
+    }
+    if (!GetExitCodeThread(hdl, &exit_code)) {
+        sysErrorBelch("osThreadIsAlive: GetExitCodeThread");
+        stg_exit(EXIT_FAILURE);
+    }
+    return (exit_code == STILL_ACTIVE);
+}
+
 #ifdef USE_CRITICAL_SECTIONS
 void
 initMutex (Mutex* pMut)
@@ -161,7 +179,8 @@ getThreadLocalVar (ThreadLocalKey *key)
     // r is allowed to be NULL - it can mean that either there was an
     // error or the stored value is in fact NULL.
     if (GetLastError() != NO_ERROR) {
-       barf("getThreadLocalVar: key not found");
+       sysErrorBelch("getThreadLocalVar");
+        stg_exit(EXIT_FAILURE);
     }
 #endif
     return r;
@@ -173,7 +192,8 @@ setThreadLocalVar (ThreadLocalKey *key, void *value)
     BOOL b;
     b = TlsSetValue(*key, value);
     if (!b) {
-       barf("setThreadLocalVar: %d", GetLastError());
+       sysErrorBelch("setThreadLocalVar");
+        stg_exit(EXIT_FAILURE);
     }
 }