Use a separate mutex to protect all_tasks, avoiding a lock-order-reversal
authorSimon Marlow <marlowsd@gmail.com>
Fri, 16 Jul 2010 15:08:32 +0000 (15:08 +0000)
committerSimon Marlow <marlowsd@gmail.com>
Fri, 16 Jul 2010 15:08:32 +0000 (15:08 +0000)
In GHC 6.12.x I found a rare deadlock caused by this
lock-order-reversal:

AQ cap->lock
  startWorkerTask
    newTask
      AQ sched_mutex

scheduleCheckBlackHoles
  AQ sched_mutex
   unblockOne_
    wakeupThreadOnCapabilty
      AQ cap->lock

so sched_mutex and cap->lock are taken in a different order in two
places.

This doesn't happen in the HEAD because we don't have
scheduleCheckBlackHoles, but I thought it would be prudent to make
this less likely to happen in the future by using a different mutex in
newTask.  We can clearly see that the all_tasks mutex cannot be
involved in a deadlock, becasue we never call anything else while
holding it.

rts/Task.c

index a9461c9..8a289be 100644 (file)
@@ -24,7 +24,7 @@
 #endif
 
 // Task lists and global counters.
-// Locks required: sched_mutex.
+// Locks required: all_tasks_mutex.
 Task *all_tasks = NULL;
 static nat taskCount;
 static int tasksInitialized = 0;
@@ -33,6 +33,10 @@ static void   freeTask  (Task *task);
 static Task * allocTask (void);
 static Task * newTask   (rtsBool);
 
+#if defined(THREADED_RTS)
+static Mutex all_tasks_mutex;
+#endif
+
 /* -----------------------------------------------------------------------------
  * Remembering the current thread's Task
  * -------------------------------------------------------------------------- */
@@ -61,6 +65,7 @@ initTaskManager (void)
        tasksInitialized = 1;
 #if defined(THREADED_RTS) && !defined(MYTASK_USE_TLV)
        newThreadLocalKey(&currentTaskKey);
+        initMutex(&all_tasks_mutex);
 #endif
     }
 }
@@ -71,7 +76,7 @@ freeTaskManager (void)
     Task *task, *next;
     nat tasksRunning = 0;
 
-    ASSERT_LOCK_HELD(&sched_mutex);
+    ACQUIRE_LOCK(&all_tasks_mutex);
 
     for (task = all_tasks; task != NULL; task = next) {
         next = task->all_link;
@@ -86,7 +91,11 @@ freeTaskManager (void)
                tasksRunning);
 
     all_tasks = NULL;
+
+    RELEASE_LOCK(&all_tasks_mutex);
+
 #if defined(THREADED_RTS) && !defined(MYTASK_USE_TLV)
+    closeMutex(&all_tasks_mutex); 
     freeThreadLocalKey(&currentTaskKey);
 #endif
 
@@ -177,14 +186,14 @@ newTask (rtsBool worker)
 
     task->next = NULL;
 
-    ACQUIRE_LOCK(&sched_mutex);
+    ACQUIRE_LOCK(&all_tasks_mutex);
 
     task->all_link = all_tasks;
     all_tasks = task;
 
     taskCount++;
 
-    RELEASE_LOCK(&sched_mutex);
+    RELEASE_LOCK(&all_tasks_mutex);
 
     return task;
 }
@@ -284,7 +293,7 @@ discardTasksExcept (Task *keep)
     Task *task, *next;
 
     // Wipe the task list, except the current Task.
-    ACQUIRE_LOCK(&sched_mutex);
+    ACQUIRE_LOCK(&all_tasks_mutex);
     for (task = all_tasks; task != NULL; task=next) {
         next = task->all_link;
         if (task != keep) {
@@ -294,7 +303,7 @@ discardTasksExcept (Task *keep)
     }
     all_tasks = keep;
     keep->all_link = NULL;
-    RELEASE_LOCK(&sched_mutex);
+    RELEASE_LOCK(&all_tasks_mutex);
 }
 
 void