[project @ 1999-01-20 16:07:40 by simonm]

[ghc-hetmet.git] / ghc / rts / GC.c
diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c

index 695e132..fb2eaa5 100644 (file)
--- a/ghc/rts/GC.c
+++ b/ghc/rts/GC.c
@@ -1,5 +1,5 @@
  /* -----------------------------------------------------------------------------
- * $Id: GC.c,v 1.7 1999/01/14 10:49:01 simonm Exp $
+ * $Id: GC.c,v 1.17 1999/01/20 16:07:40 simonm Exp $
   *
   * Two-space garbage collector
   *
@@ -84,6 +84,10 @@ static rtsBool weak_done;    /* all done for this pass */
   */
  static rtsBool failed_to_evac;
  
+/* Old to-space (used for two-space collector only)
+ */
+bdescr *old_to_space;
+
  /* -----------------------------------------------------------------------------
     Static function declarations
     -------------------------------------------------------------------------- */
@@ -165,6 +169,7 @@ void GarbageCollect(void (*get_roots)(void))
  
    /* Figure out which generation to collect
     */
+  N = 0;
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
      if (generations[g].steps[0].n_blocks >= generations[g].max_blocks) {
        N = g;
@@ -188,6 +193,13 @@ void GarbageCollect(void (*get_roots)(void))
      zeroMutableList(generations[RtsFlags.GcFlags.generations-1].mut_list);
    }
  
+  /* Save the old to-space if we're doing a two-space collection
+   */
+  if (RtsFlags.GcFlags.generations == 1) {
+    old_to_space = g0s0->to_space;
+    g0s0->to_space = NULL;
+  }
+
    /* Initialise to-space in all the generations/steps that we're
     * collecting.
     */
@@ -195,8 +207,12 @@ void GarbageCollect(void (*get_roots)(void))
      generations[g].mut_list = END_MUT_LIST;
  
      for (s = 0; s < generations[g].n_steps; s++) {
+
        /* generation 0, step 0 doesn't need to-space */
-      if (g == 0 && s == 0) { continue; }
+      if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { 
+       continue; 
+      }
+
        /* Get a free block for to-space.  Extra blocks will be chained on
         * as necessary.
         */
@@ -250,21 +266,11 @@ void GarbageCollect(void (*get_roots)(void))
        step->to_blocks = 0;
        step->new_large_objects = NULL;
        step->scavenged_large_objects = NULL;
-#ifdef DEBUG
-      /* retain these so we can sanity-check later on */
-      step->old_scan    = step->scan;
-      step->old_scan_bd = step->scan_bd;
-#endif
      }
    }
  
    /* -----------------------------------------------------------------------
-   * follow all the roots that the application knows about.
-   */
-  evac_gen = 0;
-  get_roots();
-
-  /* follow all the roots that we know about:
+   * follow all the roots that we know about:
     *   - mutable lists from each generation > N
     * we want to *scavenge* these roots, not evacuate them: they're not
     * going to move in this GC.
@@ -277,23 +283,26 @@ void GarbageCollect(void (*get_roots)(void))
     */
    { 
      StgMutClosure *tmp, **pp;
-    for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
-      /* the act of scavenging the mutable list for this generation
-       * might place more objects on the mutable list itself.  So we
-       * place the current mutable list in a temporary, scavenge it,
-       * and then append it to the new list.
-       */
-      tmp = generations[g].mut_list;
+    for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
+      generations[g].saved_mut_list = generations[g].mut_list;
        generations[g].mut_list = END_MUT_LIST;
-      tmp = scavenge_mutable_list(tmp, g);
+    }
  
+    for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
+      tmp = scavenge_mutable_list(generations[g].saved_mut_list, g);
        pp = &generations[g].mut_list;
        while (*pp != END_MUT_LIST) {
            pp = &(*pp)->mut_link;
        }
        *pp = tmp;
      }
-  }  
+  }
+
+  /* follow all the roots that the application knows about.
+   */
+  evac_gen = 0;
+  get_roots();
+
    /* And don't forget to mark the TSO if we got here direct from
     * Haskell! */
    if (CurrentTSO) {
@@ -383,6 +392,87 @@ void GarbageCollect(void (*get_roots)(void))
      }
    }
  
+  /* Set the maximum blocks for the oldest generation, based on twice
+   * the amount of live data now, adjusted to fit the maximum heap
+   * size if necessary.  
+   *
+   * This is an approximation, since in the worst case we'll need
+   * twice the amount of live data plus whatever space the other
+   * generations need.
+   */
+  if (RtsFlags.GcFlags.generations > 1) {
+    if (major_gc) {
+      oldest_gen->max_blocks = 
+       stg_max(oldest_gen->steps[0].to_blocks * RtsFlags.GcFlags.oldGenFactor,
+               RtsFlags.GcFlags.minOldGenSize);
+      if (oldest_gen->max_blocks > RtsFlags.GcFlags.maxHeapSize / 2) {
+       oldest_gen->max_blocks = RtsFlags.GcFlags.maxHeapSize / 2;
+       if (((int)oldest_gen->max_blocks - 
+            (int)oldest_gen->steps[0].to_blocks) < 
+           (RtsFlags.GcFlags.pcFreeHeap *
+            RtsFlags.GcFlags.maxHeapSize / 200)) {
+         heapOverflow();
+       }
+      }
+    }
+  } else {
+    /* For a two-space collector, we need to resize the nursery. */
+
+    /* set up a new nursery.  Allocate a nursery size based on a
+     * function of the amount of live data (currently a factor of 2,
+     * should be configurable (ToDo)).  Use the blocks from the old
+     * nursery if possible, freeing up any left over blocks.
+     *
+     * If we get near the maximum heap size, then adjust our nursery
+     * size accordingly.  If the nursery is the same size as the live
+     * data (L), then we need 3L bytes.  We can reduce the size of the
+     * nursery to bring the required memory down near 2L bytes.
+     * 
+     * A normal 2-space collector would need 4L bytes to give the same
+     * performance we get from 3L bytes, reducing to the same
+     * performance at 2L bytes.  
+     */
+    nat blocks = g0s0->to_blocks;
+
+    if ( blocks * 4 > RtsFlags.GcFlags.maxHeapSize ) {
+      int adjusted_blocks;  /* signed on purpose */
+      int pc_free; 
+      
+      adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
+      IF_DEBUG(gc, fprintf(stderr, "Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %d\n", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks));
+      pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
+      if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even be < 0 */ {
+       heapOverflow();
+      }
+      blocks = adjusted_blocks;
+      
+    } else {
+      blocks *= 2;
+      if (blocks < RtsFlags.GcFlags.minAllocAreaSize) {
+       blocks = RtsFlags.GcFlags.minAllocAreaSize;
+      }
+    }
+    
+    if (nursery_blocks < blocks) {
+      IF_DEBUG(gc, fprintf(stderr, "Increasing size of nursery to %d blocks\n", 
+                          blocks));
+      g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
+    } else {
+      bdescr *next_bd;
+      
+      IF_DEBUG(gc, fprintf(stderr, "Decreasing size of nursery to %d blocks\n", 
+                          blocks));
+      for (bd = g0s0->blocks; nursery_blocks > blocks; nursery_blocks--) {
+       next_bd = bd->link;
+       freeGroup(bd);
+       bd = next_bd;
+      }
+      g0s0->blocks = bd;
+    }
+
+    g0s0->n_blocks = nursery_blocks = blocks;
+  }
+
    /* run through all the generations/steps and tidy up 
     */
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
@@ -395,7 +485,7 @@ void GarbageCollect(void (*get_roots)(void))
        bdescr *next;
        step = &generations[g].steps[s];
  
-      if (!(g == 0 && s == 0)) {
+      if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
         /* Tidy the end of the to-space chains */
         step->hp_bd->free = step->hp;
         step->hp_bd->link = NULL;
@@ -436,24 +526,22 @@ void GarbageCollect(void (*get_roots)(void))
         }
         step->large_objects = step->scavenged_large_objects;
  
-       /* Set the maximum blocks for this generation,
-        * using an arbitrary factor of the no. of blocks in step 0.
+       /* Set the maximum blocks for this generation, interpolating
+        * between the maximum size of the oldest and youngest
+        * generations.
+        *
+        * max_blocks = alloc_area_size +  
+        *                 (oldgen_max_blocks - alloc_area_size) * G
+        *                 -----------------------------------------
+        *                              oldest_gen
          */
         if (g != 0) {
-         generation *gen = &generations[g];
-         gen->max_blocks = 
-           stg_max(gen->steps[s].n_blocks * 2,
-                   RtsFlags.GcFlags.minAllocAreaSize * 4);
-         if (gen->max_blocks > RtsFlags.GcFlags.maxHeapSize / 2) {
-           gen->max_blocks = RtsFlags.GcFlags.maxHeapSize / 2;
-           if (((int)gen->max_blocks - (int)gen->steps[0].n_blocks) < 
-               (RtsFlags.GcFlags.pcFreeHeap *
-                RtsFlags.GcFlags.maxHeapSize / 200)) {
-             heapOverflow();
-           }
-         }
+         generations[g].max_blocks = 
+           RtsFlags.GcFlags.minAllocAreaSize +
+            (((oldest_gen->max_blocks - RtsFlags.GcFlags.minAllocAreaSize) * g)
+              / (RtsFlags.GcFlags.generations-1));
         }
-       
+
        /* for older generations... */
        } else {
         
@@ -473,6 +561,37 @@ void GarbageCollect(void (*get_roots)(void))
      }
    }
    
+  /* Two-space collector:
+   * Free the old to-space, and estimate the amount of live data.
+   */
+  if (RtsFlags.GcFlags.generations == 1) {
+    if (old_to_space != NULL) {
+      freeChain(old_to_space);
+    }
+    for (bd = g0s0->to_space; bd != NULL; bd = bd->link) {
+      bd->evacuated = 0;       /* now from-space */
+    }
+    live = g0s0->to_blocks * BLOCK_SIZE_W + 
+      ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
+
+  /* Generational collector:
+   * estimate the amount of live data.
+   */
+  } else {
+    live = 0;
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+      for (s = 0; s < generations[g].n_steps; s++) {
+       /* approximate amount of live data (doesn't take into account slop
+        * at end of each block).  ToDo: this more accurately.
+        */
+       if (g == 0 && s == 0) { continue; }
+       step = &generations[g].steps[s];
+       live += step->n_blocks * BLOCK_SIZE_W + 
+         ((lnat)step->hp_bd->free -(lnat)step->hp_bd->start) / sizeof(W_);
+      }
+    }
+  }
+
    /* revert dead CAFs and update enteredCAFs list */
    revertDeadCAFs();
    
@@ -495,19 +614,6 @@ void GarbageCollect(void (*get_roots)(void))
    }
    current_nursery = g0s0->blocks;
  
-  live = 0;
-  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-    for (s = 0; s < generations[g].n_steps; s++) {
-      /* approximate amount of live data (doesn't take into account slop
-       * at end of each block).  ToDo: this more accurately.
-       */
-      if (g == 0 && s == 0) { continue; }
-      step = &generations[g].steps[s];
-      live += step->n_blocks * BLOCK_SIZE_W + 
-       ((lnat)step->hp_bd->free -(lnat)step->hp_bd->start) / sizeof(W_);
-    }
-  }
-
    /* Free the small objects allocated via allocate(), since this will
     * all have been copied into G0S1 now.  
     */
@@ -523,21 +629,26 @@ void GarbageCollect(void (*get_roots)(void))
    
    /* check sanity after GC */
  #ifdef DEBUG
-  for (g = 0; g <= N; g++) {
-    for (s = 0; s < generations[g].n_steps; s++) {
-      if (g == 0 && s == 0) { continue; }
-      IF_DEBUG(sanity, checkHeap(generations[g].steps[s].blocks, NULL));
-      IF_DEBUG(sanity, checkChain(generations[g].steps[s].large_objects));
+  if (RtsFlags.GcFlags.generations == 1) {
+    IF_DEBUG(sanity, checkHeap(g0s0->to_space, NULL));
+    IF_DEBUG(sanity, checkChain(g0s0->large_objects));
+  } else {
+
+    for (g = 0; g <= N; g++) {
+      for (s = 0; s < generations[g].n_steps; s++) {
+       if (g == 0 && s == 0) { continue; }
+       IF_DEBUG(sanity, checkHeap(generations[g].steps[s].blocks, NULL));
+      }
      }
-  }
-  for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
-    for (s = 0; s < generations[g].n_steps; s++) {
-      IF_DEBUG(sanity, checkHeap(generations[g].steps[s].old_scan_bd,
-                                generations[g].steps[s].old_scan));
-      IF_DEBUG(sanity, checkChain(generations[g].steps[s].large_objects));
+    for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
+      for (s = 0; s < generations[g].n_steps; s++) {
+       IF_DEBUG(sanity, checkHeap(generations[g].steps[s].blocks,
+                                  generations[g].steps[s].blocks->start));
+       IF_DEBUG(sanity, checkChain(generations[g].steps[s].large_objects));
+      }
      }
+    IF_DEBUG(sanity, checkFreeListSanity());
    }
-  IF_DEBUG(sanity, checkFreeListSanity());
  #endif
  
    IF_DEBUG(gc, stat_describe_gens());
@@ -692,7 +803,7 @@ static inline void addBlock(step *step)
  }
  
  static __inline__ StgClosure *
-copy(StgClosure *src, W_ size, bdescr *bd)
+copy(StgClosure *src, nat size, bdescr *bd)
  {
    step *step;
    P_ to, from, dest;
@@ -722,6 +833,35 @@ copy(StgClosure *src, W_ size, bdescr *bd)
    return (StgClosure *)dest;
  }
  
+/* Special version of copy() for when we only want to copy the info
+ * pointer of an object, but reserve some padding after it.  This is
+ * used to optimise evacuation of BLACKHOLEs.
+ */
+
+static __inline__ StgClosure *
+copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, bdescr *bd)
+{
+  step *step;
+  P_ dest, to, from;
+
+  step = bd->step->to;
+  if (step->gen->no < evac_gen) {
+    step = &generations[evac_gen].steps[0];
+  }
+
+  if (step->hp + size_to_reserve >= step->hpLim) {
+    addBlock(step);
+  }
+
+  dest = step->hp;
+  step->hp += size_to_reserve;
+  for(to = dest, from = (P_)src; size_to_copy>0; --size_to_copy) {
+    *to++ = *from++;
+  }
+  
+  return (StgClosure *)dest;
+}
+
  static __inline__ void 
  upd_evacuee(StgClosure *p, StgClosure *dest)
  {
@@ -926,10 +1066,16 @@ loop:
  
    case CAF_BLACKHOLE:
    case BLACKHOLE:
-    to = copy(q,BLACKHOLE_sizeW(),bd);
+    to = copyPart(q,BLACKHOLE_sizeW(),sizeofW(StgHeader),bd);
      upd_evacuee(q,to);
      return to;
  
+  case BLACKHOLE_BQ:
+    to = copy(q,BLACKHOLE_sizeW(),bd); 
+    upd_evacuee(q,to);
+    evacuate_mutable((StgMutClosure *)to);
+    return to;
+
    case THUNK_SELECTOR:
      {
        const StgInfoTable* selectee_info;
@@ -994,6 +1140,7 @@ loop:
        case CAF_UNENTERED:
        case CAF_BLACKHOLE:
        case BLACKHOLE:
+      case BLACKHOLE_BQ:
         /* not evaluated yet */
         break;
  
@@ -1126,8 +1273,7 @@ loop:
        /* Large TSOs don't get moved, so no relocation is required.
         */
        if (size >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
-       evacuate_large((P_)q, rtsFalse);
-       tso->mut_link = NULL;   /* see below */
+       evacuate_large((P_)q, rtsTrue);
         return q;
  
        /* To evacuate a small TSO, we need to relocate the update frame
@@ -1146,14 +1292,7 @@ loop:
         relocate_TSO(tso, new_tso);
         upd_evacuee(q,(StgClosure *)new_tso);
  
-       /* don't evac_mutable - these things are marked mutable as
-        * required.  We *do* need to zero the mut_link field, though:
-        * this TSO might have been on the mutable list for this
-        * generation, but we're collecting this generation anyway so
-        * we didn't follow the mutable list.
-        */
-       new_tso->mut_link = NULL;
-
+       evacuate_mutable((StgMutClosure *)new_tso);
         return (StgClosure *)new_tso;
        }
      }
@@ -1347,10 +1486,18 @@ scavenge(step *step)
  
      case CAF_BLACKHOLE:
      case BLACKHOLE:
+       p += BLACKHOLE_sizeW();
+       break;
+
+    case BLACKHOLE_BQ:
        { 
-       StgBlackHole *bh = (StgBlackHole *)p;
+       StgBlockingQueue *bh = (StgBlockingQueue *)p;
         (StgClosure *)bh->blocking_queue = 
           evacuate((StgClosure *)bh->blocking_queue);
+       if (failed_to_evac) {
+         failed_to_evac = rtsFalse;
+         evacuate_mutable((StgMutClosure *)bh);
+       }
         p += BLACKHOLE_sizeW();
         break;
        }
@@ -1520,12 +1667,7 @@ scavenge_one(StgPtr p)
  
    case CAF_BLACKHOLE:
    case BLACKHOLE:
-    { 
-      StgBlackHole *bh = (StgBlackHole *)p;
-      (StgClosure *)bh->blocking_queue = 
-       evacuate((StgClosure *)bh->blocking_queue);
        break;
-    }
  
    case THUNK_SELECTOR:
      { 
@@ -1654,6 +1796,16 @@ scavenge_mutable_list(StgMutClosure *p, nat gen)
        }
        continue;
        
+    case MVAR:
+      {
+       StgMVar *mvar = (StgMVar *)p;
+       (StgClosure *)mvar->head = evacuate((StgClosure *)mvar->head);
+       (StgClosure *)mvar->tail = evacuate((StgClosure *)mvar->tail);
+       (StgClosure *)mvar->value = evacuate((StgClosure *)mvar->value);
+       prev = &p->mut_link;
+       continue;
+      }
+
      case TSO:
        /* follow ptrs and remove this from the mutable list */
        { 
@@ -1704,6 +1856,15 @@ scavenge_mutable_list(StgMutClosure *p, nat gen)
        }
        continue;
        
+    case BLACKHOLE_BQ:
+      { 
+       StgBlockingQueue *bh = (StgBlockingQueue *)p;
+       (StgClosure *)bh->blocking_queue = 
+         evacuate((StgClosure *)bh->blocking_queue);
+       prev = &p->mut_link;
+       break;
+      }
+
      default:
        /* shouldn't have anything else on the mutables list */
        barf("scavenge_mutable_object: non-mutable object?");
@@ -1745,6 +1906,18 @@ scavenge_static(void)
        {
         StgInd *ind = (StgInd *)p;
         ind->indirectee = evacuate(ind->indirectee);
+
+       /* might fail to evacuate it, in which case we have to pop it
+        * back on the mutable list (and take it off the
+        * scavenged_static list because the static link and mut link
+        * pointers are one and the same).
+        */
+       if (failed_to_evac) {
+         failed_to_evac = rtsFalse;
+         scavenged_static_objects = STATIC_LINK(info,p);
+         ((StgMutClosure *)ind)->mut_link = oldest_gen->mut_list;
+         oldest_gen->mut_list = (StgMutClosure *)ind;
+       }
         break;
        }
        
@@ -1769,6 +1942,8 @@ scavenge_static(void)
        barf("scavenge_static");
      }
  
+    ASSERT(failed_to_evac == rtsFalse);
+
      /* get the next static object from the list.  Remeber, there might
       * be more stuff on this list now that we've done some evacuating!
       * (static_objects is a global)
@@ -1844,8 +2019,8 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
        goto follow_srt;
  
        /* Specialised code for update frames, since they're so common.
-       * We *know* the updatee points to a BLACKHOLE or CAF_BLACKHOLE,
-       * so just inline the code to evacuate it here.  
+       * We *know* the updatee points to a BLACKHOLE, CAF_BLACKHOLE,
+       * or BLACKHOLE_BQ, so just inline the code to evacuate it here.  
         */
      case UPDATE_FRAME:
        {
@@ -1859,12 +2034,29 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
           continue;
         } else {
           bdescr *bd = Bdescr((P_)frame->updatee);
-         ASSERT(type == BLACKHOLE || type == CAF_BLACKHOLE);
-         if (bd->gen->no >= evac_gen && bd->gen->no > N) { continue; }
-         to = copy(frame->updatee, BLACKHOLE_sizeW(), bd);
-         upd_evacuee(frame->updatee,to);
-         frame->updatee = to;
-         continue;
+         if (bd->gen->no > N) { 
+           if (bd->gen->no < evac_gen) {
+             failed_to_evac = rtsTrue;
+           }
+           continue;
+         }
+         switch (type) {
+         case BLACKHOLE:
+         case CAF_BLACKHOLE:
+           to = copyPart(frame->updatee, BLACKHOLE_sizeW(), 
+                         sizeofW(StgHeader), bd);
+           upd_evacuee(frame->updatee,to);
+           frame->updatee = to;
+           continue;
+         case BLACKHOLE_BQ:
+           to = copy(frame->updatee, BLACKHOLE_sizeW(), bd);
+           upd_evacuee(frame->updatee,to);
+           frame->updatee = to;
+           evacuate_mutable((StgMutClosure *)to);
+           continue;
+         default:
+           barf("scavenge_stack: UPDATE_FRAME updatee");
+         }
         }
        }
  
@@ -2172,7 +2364,7 @@ static void
  threadLazyBlackHole(StgTSO *tso)
  {
    StgUpdateFrame *update_frame;
-  StgBlackHole *bh;
+  StgBlockingQueue *bh;
    StgPtr stack_end;
  
    stack_end = &tso->stack[tso->stack_size];
@@ -2186,21 +2378,22 @@ threadLazyBlackHole(StgTSO *tso)
        break;
  
      case UPDATE_FRAME:
-      bh = stgCast(StgBlackHole*,update_frame->updatee);
+      bh = (StgBlockingQueue *)update_frame->updatee;
  
        /* if the thunk is already blackholed, it means we've also
         * already blackholed the rest of the thunks on this stack,
         * so we can stop early.
+       *
+       * The blackhole made for a CAF is a CAF_BLACKHOLE, so they
+       * don't interfere with this optimisation.
         */
+      if (bh->header.info == &BLACKHOLE_info) {
+       return;
+      }
  
-      /* Don't for now: when we enter a CAF, we create a black hole on
-       * the heap and make the update frame point to it.  Thus the
-       * above optimisation doesn't apply.
-       */
-      if (bh->header.info != &BLACKHOLE_info
-         && bh->header.info != &CAF_BLACKHOLE_info) {
+      if (bh->header.info != &BLACKHOLE_BQ_info &&
+         bh->header.info != &CAF_BLACKHOLE_info) {
         SET_INFO(bh,&BLACKHOLE_info);
-       bh->blocking_queue = END_TSO_QUEUE;
        }
  
        update_frame = update_frame->link;
@@ -2249,8 +2442,8 @@ threadSqueezeStack(StgTSO *tso)
     * added to the stack, rather than the way we see them in this
     * walk. (It makes the next loop less confusing.)  
     *
-   * Could stop if we find an update frame pointing to a black hole,
-   * but see comment in threadLazyBlackHole().
+   * Stop if we find an update frame pointing to a black hole 
+   * (see comment in threadLazyBlackHole()).
     */
    
    next_frame = NULL;
@@ -2259,6 +2452,10 @@ threadSqueezeStack(StgTSO *tso)
      frame->link = next_frame;
      next_frame = frame;
      frame = prev_frame;
+    if (get_itbl(frame)->type == UPDATE_FRAME
+       && frame->updatee->header.info == &BLACKHOLE_info) {
+        break;
+    }
    }
  
    /* Now, we're at the bottom.  Frame points to the lowest update
@@ -2318,20 +2515,16 @@ threadSqueezeStack(StgTSO *tso)
         * slower --SDM
         */
  #if 0 /* do it properly... */
-      if (GET_INFO(updatee_bypass) == BLACKHOLE_info
-         || GET_INFO(updatee_bypass) == CAF_BLACKHOLE_info
-         ) {
+      if (GET_INFO(updatee_bypass) == BLACKHOLE_BQ_info) {
         /* Sigh.  It has one.  Don't lose those threads! */
-       if (GET_INFO(updatee_keep) == BLACKHOLE_info
-           || GET_INFO(updatee_keep) == CAF_BLACKHOLE_info
-           ) {
+         if (GET_INFO(updatee_keep) == BLACKHOLE_BQ_info) {
           /* Urgh.  Two queues.  Merge them. */
-         P_ keep_tso = ((StgBlackHole *)updatee_keep)->blocking_queue;
+         P_ keep_tso = ((StgBlockingQueue *)updatee_keep)->blocking_queue;
           
           while (keep_tso->link != END_TSO_QUEUE) {
             keep_tso = keep_tso->link;
           }
-         keep_tso->link = ((StgBlackHole *)updatee_bypass)->blocking_queue;
+         keep_tso->link = ((StgBlockingQueue *)updatee_bypass)->blocking_queue;
  
         } else {
           /* For simplicity, just swap the BQ for the BH */
@@ -2359,12 +2552,10 @@ threadSqueezeStack(StgTSO *tso)
        /* Do lazy black-holing.
         */
        if (is_update_frame) {
-       StgBlackHole *bh = (StgBlackHole *)frame->updatee;
-       if (bh->header.info != &BLACKHOLE_info
-           && bh->header.info != &CAF_BLACKHOLE_info
-           ) {
+       StgBlockingQueue *bh = (StgBlockingQueue *)frame->updatee;
+       if (bh->header.info != &BLACKHOLE_BQ_info &&
+           bh->header.info != &CAF_BLACKHOLE_info) {
           SET_INFO(bh,&BLACKHOLE_info);
-         bh->blocking_queue = END_TSO_QUEUE;
         }
        }