Don't Terminate the ticker thread (#3748)

[ghc-hetmet.git] / rts / sm / BlockAlloc.c
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c

index 2e8ad73..edc3762 100644 (file)
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -1,6 +1,6 @@
  /* -----------------------------------------------------------------------------
   *
- * (c) The GHC Team 1998-2006
+ * (c) The GHC Team 1998-2008
   * 
   * The block allocator and free list manager.
   *
@@ -17,11 +17,10 @@
  
  #include "PosixSource.h"
  #include "Rts.h"
-#include "RtsFlags.h"
+
+#include "Storage.h"
  #include "RtsUtils.h"
  #include "BlockAlloc.h"
-#include "MBlock.h"
-#include "Storage.h"
  
  #include <string.h>
  
@@ -59,7 +58,8 @@ static void  initMBlock(void *mblock);
     The following fields are not used by the allocator:
       bd->flags
       bd->gen_no
-     bd->step
+     bd->gen
+     bd->dest
  
    Exceptions: we don't maintain invariants for all the blocks within a
    group on the free list, because it is expensive to modify every
@@ -69,44 +69,50 @@ static void  initMBlock(void *mblock);
  
    Free lists
    ~~~~~~~~~~
+
    Preliminaries:
-    - most allocations are for single blocks
-    - we cannot be dependent on address-space ordering; sometimes the
-      OS gives us new memory backwards in the address space, sometimes
-      forwards
-    - We want to avoid fragmentation in the free list
-    
+    - most allocations are for small blocks
+    - sometimes the OS gives us new memory backwards in the address
+      space, sometimes forwards, so we should not be biased towards
+      any particular layout in the address space
+    - We want to avoid fragmentation
+    - We want allocation and freeing to be O(1) or close.
+
    Coalescing trick: when a bgroup is freed (freeGroup()), we can check
-  whether it can be coalesced with othre free bgroups by checking the
-  bdescrs for the blocks on either side of it.  This means that:
-
-    - freeGroup is O(1) if we coalesce with an existing free block
-      group.  Otherwise we have to insert in the free list, but since
-      most blocks are small and the free list is sorted by size, this
-      is usually quick.
-    - the free list must be double-linked, so we can insert into the
-      middle.
-    - every free group in the free list must have its head and tail
-      bdescrs initialised, the rest don't matter.
-    - we cannot play this trick with mblocks, because there is no
-      requirement that the bdescrs in the second and subsequent mblock
-      of an mgroup are initialised (the mgroup might be filled with a
-      large array, overwriting the bdescrs for example).
-
-  So there are two free lists:
-
-    - free_list contains bgroups smaller than an mblock.
-       - it is doubly-linked
-       - sorted in *size* order: allocation is best-fit
-       - free bgroups are always fully coalesced
-       - we do the coalescing trick in freeGroup()
-
-    - free_mblock_list contains mgroups only
-       - it is singly-linked (no need to double-link)
-       - sorted in *address* order, so we can coalesce using the list
-       - allocation is best-fit by traversing the whole list: we don't
-         expect this list to be long, avoiding fragmentation is more
-         important. 
+  whether it can be coalesced with other free bgroups by checking the
+  bdescrs for the blocks on either side of it.  This means that we can
+  coalesce in O(1) time.  Every free bgroup must have its head and tail
+  bdescrs initialised, the rest don't matter.
+
+  We keep the free list in buckets, using a heap-sort strategy.
+  Bucket N contains blocks with sizes 2^N - 2^(N+1)-1.  The list of
+  blocks in each bucket is doubly-linked, so that if a block is
+  coalesced we can easily remove it from its current free list.
+
+  To allocate a new block of size S, grab a block from bucket
+  log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
+  least as big as S, and split it if necessary.  If there are no
+  blocks in that bucket, look at bigger buckets until a block is found
+  Allocation is therefore O(logN) time.
+
+  To free a block:
+    - coalesce it with neighbours.
+    - remove coalesced neighbour(s) from free list(s)
+    - add the new (coalesced) block to the front of the appropriate
+      bucket, given by log2(S) where S is the size of the block.
+
+  Free is O(1).
+
+  We cannot play this coalescing trick with mblocks, because there is
+  no requirement that the bdescrs in the second and subsequent mblock
+  of an mgroup are initialised (the mgroup might be filled with a
+  large array, overwriting the bdescrs for example).
+
+  So there is a separate free list for megablocks, sorted in *address*
+  order, so that we can coalesce.  Allocation in this list is best-fit
+  by traversing the whole list: we don't expect this list to be long,
+  and allocation/freeing of large blocks is rare; avoiding
+  fragmentation is more important than performance here.
  
    freeGroup() might end up moving a block from free_list to
    free_mblock_list, if after coalescing we end up with a full mblock.
@@ -115,9 +121,19 @@ static void  initMBlock(void *mblock);
  
    --------------------------------------------------------------------------- */
  
-static bdescr *free_list;
+#define MAX_FREE_LIST 9
+
+static bdescr *free_list[MAX_FREE_LIST];
  static bdescr *free_mblock_list;
  
+// free_list[i] contains blocks that are at least size 2^i, and at
+// most size 2^(i+1) - 1.  
+// 
+// To find the free list in which to place a block, use log_2(size).
+// To find a free block of the right size, use log_2_ceil(size).
+
+lnat n_alloc_blocks;   // currently allocated blocks
+lnat hw_alloc_blocks;  // high-water allocated blocks
  
  /* -----------------------------------------------------------------------------
     Initialisation
@@ -125,8 +141,13 @@ static bdescr *free_mblock_list;
  
  void initBlockAllocator(void)
  {
-    free_list = NULL;
+    nat i;
+    for (i=0; i < MAX_FREE_LIST; i++) {
+        free_list[i] = NULL;
+    }
      free_mblock_list = NULL;
+    n_alloc_blocks = 0;
+    hw_alloc_blocks = 0;
  }
  
  /* -----------------------------------------------------------------------------
@@ -134,85 +155,57 @@ void initBlockAllocator(void)
     -------------------------------------------------------------------------- */
  
  STATIC_INLINE void
-initGroup(nat n, bdescr *head)
+initGroup(bdescr *head)
  {
    bdescr *bd;
-  nat i;
+  nat i, n;
  
-  if (n != 0) {
-    head->free   = head->start;
-    head->link   = NULL;
-    for (i=1, bd = head+1; i < n; i++, bd++) {
+  n = head->blocks;
+  head->free   = head->start;
+  head->link   = NULL;
+  for (i=1, bd = head+1; i < n; i++, bd++) {
        bd->free = 0;
        bd->blocks = 0;
        bd->link = head;
-    }
    }
  }
  
-// when a block has been shortened by allocGroup(), we need to push
-// the remaining chunk backwards in the free list in order to keep the
-// list sorted by size.
-static void
-free_list_push_backwards (bdescr *bd)
+// There are quicker non-loopy ways to do log_2, but we expect n to be
+// usually small, and MAX_FREE_LIST is also small, so the loop version
+// might well be the best choice here.
+STATIC_INLINE nat
+log_2_ceil(nat n)
  {
-    bdescr *p;
-
-    p = bd->u.back;
-    while (p != NULL && p->blocks > bd->blocks) {
-        p = p->u.back;
-    }
-    if (p != bd->u.back) {
-        dbl_link_remove(bd, &free_list);
-        if (p != NULL)
-            dbl_link_insert_after(bd, p);
-        else
-            dbl_link_onto(bd, &free_list);
+    nat i, x;
+    x = 1;
+    for (i=0; i < MAX_FREE_LIST; i++) {
+        if (x >= n) return i;
+        x = x << 1;
      }
+    return MAX_FREE_LIST;
  }
  
-// when a block has been coalesced by freeGroup(), we need to push the
-// remaining chunk forwards in the free list in order to keep the list
-// sorted by size.
-static void
-free_list_push_forwards (bdescr *bd)
+STATIC_INLINE nat
+log_2(nat n)
  {
-    bdescr *p;
-
-    p = bd;
-    while (p->link != NULL && p->link->blocks < bd->blocks) {
-        p = p->link;
-    }
-    if (p != bd) {
-        dbl_link_remove(bd, &free_list);
-        dbl_link_insert_after(bd, p);
+    nat i, x;
+    x = n;
+    for (i=0; i < MAX_FREE_LIST; i++) {
+        x = x >> 1;
+        if (x == 0) return i;
      }
+    return MAX_FREE_LIST;
  }
  
-static void
+STATIC_INLINE void
  free_list_insert (bdescr *bd)
  {
-    bdescr *p, *prev;
+    nat ln;
  
-    if (!free_list) {
-        dbl_link_onto(bd, &free_list);
-        return;
-    }
-
-    prev = NULL;
-    p = free_list;
-    while (p != NULL && p->blocks < bd->blocks) {
-        prev = p;
-        p = p->link;
-    }
-    if (prev == NULL)
-    {
-        dbl_link_onto(bd, &free_list);
-    }
-    else 
-    {
-        dbl_link_insert_after(bd, prev);
-    }
+    ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
+    ln = log_2(bd->blocks);
+    
+    dbl_link_onto(bd, &free_list[ln]);
  }
  
  
@@ -241,16 +234,18 @@ setup_tail (bdescr *bd)
  // Take a free block group bd, and split off a group of size n from
  // it.  Adjust the free list as necessary, and return the new group.
  static bdescr *
-split_free_block (bdescr *bd, nat n)
+split_free_block (bdescr *bd, nat n, nat ln)
  {
      bdescr *fg; // free group
  
      ASSERT(bd->blocks > n);
+    dbl_link_remove(bd, &free_list[ln]);
      fg = bd + bd->blocks - n; // take n blocks off the end
      fg->blocks = n;
      bd->blocks -= n;
      setup_tail(bd);
-    free_list_push_backwards(bd);
+    ln = log_2(bd->blocks);
+    dbl_link_onto(bd, &free_list[ln]);
      return fg;
  }
  
@@ -273,7 +268,7 @@ alloc_mega_group (nat mblocks)
              } else {
                  free_mblock_list = bd->link;
              }
-            initGroup(n, bd);
+            initGroup(bd);
              return bd;
          }
          else if (bd->blocks > n)
@@ -289,7 +284,7 @@ alloc_mega_group (nat mblocks)
      {
          // we take our chunk off the end here.
          nat best_mblocks  = BLOCKS_TO_MBLOCKS(best->blocks);
-        bd = FIRST_BDESCR(MBLOCK_ROUND_DOWN(best) + 
+        bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) + 
                            (best_mblocks-mblocks)*MBLOCK_SIZE);
  
          best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
@@ -309,48 +304,76 @@ bdescr *
  allocGroup (nat n)
  {
      bdescr *bd, *rem;
-
-    // Todo: not true in multithreaded GC, where we use allocBlock_sync().
-    // ASSERT_SM_LOCK();
+    nat ln;
  
      if (n == 0) barf("allocGroup: requested zero blocks");
      
      if (n >= BLOCKS_PER_MBLOCK)
      {
-        bd = alloc_mega_group(BLOCKS_TO_MBLOCKS(n));
+        nat mblocks;
+
+        mblocks = BLOCKS_TO_MBLOCKS(n);
+
+        // n_alloc_blocks doesn't count the extra blocks we get in a
+        // megablock group.
+        n_alloc_blocks += mblocks * BLOCKS_PER_MBLOCK;
+        if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
+
+        bd = alloc_mega_group(mblocks);
          // only the bdescrs of the first MB are required to be initialised
-        initGroup(BLOCKS_PER_MBLOCK, bd);
+        initGroup(bd);
+
          IF_DEBUG(sanity, checkFreeListSanity());
          return bd;
      }
      
-    // The free list is sorted by size, so we get best fit.
-    for (bd = free_list; bd != NULL; bd = bd->link)
-    {
-        if (bd->blocks == n)           // exactly the right size!
-        {
-            dbl_link_remove(bd, &free_list);
-            initGroup(n, bd);          // initialise it
-            IF_DEBUG(sanity, checkFreeListSanity());
-            return bd;
-        }
-        if (bd->blocks >  n)            // block too big...
-        {                              
-            bd = split_free_block(bd, n);
-            initGroup(n, bd);          // initialise the new chunk
-            IF_DEBUG(sanity, checkFreeListSanity());
-            return bd;
+    n_alloc_blocks += n;
+    if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
+
+    ln = log_2_ceil(n);
+
+    while (ln < MAX_FREE_LIST && free_list[ln] == NULL) {
+        ln++;
+    }
+
+    if (ln == MAX_FREE_LIST) {
+#if 0
+        if ((mblocks_allocated * MBLOCK_SIZE_W - n_alloc_blocks * BLOCK_SIZE_W) > (1024*1024)/sizeof(W_)) {
+            debugBelch("Fragmentation, wanted %d blocks:", n);
+            RtsFlags.DebugFlags.block_alloc = 1;
+            checkFreeListSanity();
          }
+#endif
+
+        bd = alloc_mega_group(1);
+        bd->blocks = n;
+        initGroup(bd);                  // we know the group will fit
+        rem = bd + n;
+        rem->blocks = BLOCKS_PER_MBLOCK-n;
+        initGroup(rem); // init the slop
+        n_alloc_blocks += rem->blocks;
+        freeGroup(rem);                 // add the slop on to the free list
+        IF_DEBUG(sanity, checkFreeListSanity());
+        return bd;
      }
  
-    bd = alloc_mega_group(1);
-    bd->blocks = n;
-    initGroup(n,bd);                    // we know the group will fit
-    rem = bd + n;
-    rem->blocks = BLOCKS_PER_MBLOCK-n;
-    initGroup(BLOCKS_PER_MBLOCK-n, rem); // init the slop
-    freeGroup(rem);                     // add the slop on to the free list
+    bd = free_list[ln];
+
+    if (bd->blocks == n)               // exactly the right size!
+    {
+        dbl_link_remove(bd, &free_list[ln]);
+    }
+    else if (bd->blocks >  n)            // block too big...
+    {                              
+        bd = split_free_block(bd, n, ln);
+    }
+    else
+    {
+        barf("allocGroup: free list corrupted");
+    }
+    initGroup(bd);             // initialise it
      IF_DEBUG(sanity, checkFreeListSanity());
+    ASSERT(bd->blocks == n);
      return bd;
  }
  
@@ -392,7 +415,8 @@ coalesce_mblocks (bdescr *p)
      q = p->link;
      if (q != NULL && 
          MBLOCK_ROUND_DOWN(q) == 
-        MBLOCK_ROUND_DOWN(p) + BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
+        (StgWord8*)MBLOCK_ROUND_DOWN(p) + 
+        BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
          // can coalesce
          p->blocks  = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
                                           BLOCKS_TO_MBLOCKS(q->blocks));
@@ -438,7 +462,7 @@ free_mega_group (bdescr *mg)
  void
  freeGroup(bdescr *p)
  {
-  nat p_on_free_list = 0;
+  nat ln;
  
    // Todo: not true in multithreaded GC
    // ASSERT_SM_LOCK();
@@ -446,7 +470,7 @@ freeGroup(bdescr *p)
    ASSERT(p->free != (P_)-1);
  
    p->free = (void *)-1;  /* indicates that this block is free */
-  p->step = NULL;
+  p->gen = NULL;
    p->gen_no = 0;
    /* fill the block group with garbage if sanity checking is on */
    IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
@@ -455,12 +479,21 @@ freeGroup(bdescr *p)
  
    if (p->blocks >= BLOCKS_PER_MBLOCK)
    {
+      nat mblocks;
+
+      mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
        // If this is an mgroup, make sure it has the right number of blocks
-      ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks)));
+      ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
+
+      n_alloc_blocks -= mblocks * BLOCKS_PER_MBLOCK;
+
        free_mega_group(p);
        return;
    }
  
+  ASSERT(n_alloc_blocks >= p->blocks);
+  n_alloc_blocks -= p->blocks;
+
    // coalesce forwards
    {
        bdescr *next;
@@ -468,16 +501,14 @@ freeGroup(bdescr *p)
        if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
        {
            p->blocks += next->blocks;
+          ln = log_2(next->blocks);
+          dbl_link_remove(next, &free_list[ln]);
            if (p->blocks == BLOCKS_PER_MBLOCK)
            {
-              dbl_link_remove(next, &free_list);
                free_mega_group(p);
                return;
            }
-          dbl_link_replace(p, next, &free_list);
            setup_tail(p);
-          free_list_push_forwards(p);
-          p_on_free_list = 1;
        }
    }
  
@@ -490,34 +521,20 @@ freeGroup(bdescr *p)
  
        if (prev->free == (P_)-1)
        {
+          ln = log_2(prev->blocks);
+          dbl_link_remove(prev, &free_list[ln]);
            prev->blocks += p->blocks;
            if (prev->blocks >= BLOCKS_PER_MBLOCK)
            {
-              if (p_on_free_list)
-              {
-                  dbl_link_remove(p, &free_list);
-              }
-              dbl_link_remove(prev, &free_list);
                free_mega_group(prev);
                return;
            }
-          else if (p_on_free_list)
-          {
-              // p was already coalesced forwards
-              dbl_link_remove(p, &free_list);
-          }
-          setup_tail(prev);
-          free_list_push_forwards(prev);
            p = prev;
-          p_on_free_list = 1;
        }
    }
        
-  if (!p_on_free_list)
-  {
-      setup_tail(p);
-      free_list_insert(p);
-  }
+  setup_tail(p);
+  free_list_insert(p);
  
    IF_DEBUG(sanity, checkFreeListSanity());
  }
@@ -549,6 +566,9 @@ freeChain_lock(bdescr *bd)
      RELEASE_SM_LOCK;
  }
  
+// splitBlockGroup(bd,B) splits bd in two.  Afterward, bd will have B
+// blocks, and a new block descriptor pointing to the remainder is
+// returned.
  bdescr *
  splitBlockGroup (bdescr *bd, nat blocks)
  {
@@ -559,16 +579,21 @@ splitBlockGroup (bdescr *bd, nat blocks)
      }
  
      if (bd->blocks > BLOCKS_PER_MBLOCK) {
-        nat mblocks;
+        nat low_mblocks, high_mblocks;
          void *new_mblock;
          if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
              barf("splitLargeBlock: not a multiple of a megablock");
          }
-        mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
-        new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + mblocks * MBLOCK_SIZE_W);
+        low_mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
+        high_mblocks = (bd->blocks - blocks) / (MBLOCK_SIZE / BLOCK_SIZE);
+
+        new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + low_mblocks * MBLOCK_SIZE_W);
          initMBlock(new_mblock);
          new_bd = FIRST_BDESCR(new_mblock);
-        new_bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
+        new_bd->blocks = MBLOCK_GROUP_BLOCKS(high_mblocks);
+
+        ASSERT(blocks + new_bd->blocks == 
+               bd->blocks + BLOCKS_PER_MBLOCK - MBLOCK_SIZE/BLOCK_SIZE);
      }
      else
      {
@@ -586,20 +611,55 @@ splitBlockGroup (bdescr *bd, nat blocks)
  static void
  initMBlock(void *mblock)
  {
-  bdescr *bd;
-  void *block;
-
-  /* the first few Bdescr's in a block are unused, so we don't want to
-   * put them all on the free list.
-   */
-  block = FIRST_BLOCK(mblock);
-  bd    = FIRST_BDESCR(mblock);
-
-  /* Initialise the start field of each block descriptor
-   */
-  for (; block <= LAST_BLOCK(mblock); bd += 1, block += BLOCK_SIZE) {
-    bd->start = block;
-  }
+    bdescr *bd;
+    StgWord8 *block;
+
+    /* the first few Bdescr's in a block are unused, so we don't want to
+     * put them all on the free list.
+     */
+    block = FIRST_BLOCK(mblock);
+    bd    = FIRST_BDESCR(mblock);
+    
+    /* Initialise the start field of each block descriptor
+     */
+    for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1, 
+             block += BLOCK_SIZE) {
+        bd->start = (void*)block;
+    }
+}
+
+/* -----------------------------------------------------------------------------
+   Stats / metrics
+   -------------------------------------------------------------------------- */
+
+nat
+countBlocks(bdescr *bd)
+{
+    nat n;
+    for (n=0; bd != NULL; bd=bd->link) {
+       n += bd->blocks;
+    }
+    return n;
+}
+
+// (*1) Just like countBlocks, except that we adjust the count for a
+// megablock group so that it doesn't include the extra few blocks
+// that would be taken up by block descriptors in the second and
+// subsequent megablock.  This is so we can tally the count with the
+// number of blocks allocated in the system, for memInventory().
+nat
+countAllocdBlocks(bdescr *bd)
+{
+    nat n;
+    for (n=0; bd != NULL; bd=bd->link) {
+       n += bd->blocks;
+       // hack for megablock groups: see (*1) above
+       if (bd->blocks > BLOCKS_PER_MBLOCK) {
+           n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
+               * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
+       }
+    }
+    return n;
  }
  
  /* -----------------------------------------------------------------------------
@@ -624,39 +684,41 @@ void
  checkFreeListSanity(void)
  {
      bdescr *bd, *prev;
+    nat ln, min;
  
-    IF_DEBUG(block_alloc, debugBelch("free block list:\n"));
  
-    prev = NULL;
-    for (bd = free_list; bd != NULL; prev = bd, bd = bd->link)
-    {
-        IF_DEBUG(block_alloc,
-                 debugBelch("group at %p, length %ld blocks\n", 
-                            bd->start, (long)bd->blocks));
-        ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
-        ASSERT(bd->link != bd); // catch easy loops
+    min = 1;
+    for (ln = 0; ln < MAX_FREE_LIST; ln++) {
+        IF_DEBUG(block_alloc, debugBelch("free block list [%d]:\n", ln));
  
-        check_tail(bd);
+        prev = NULL;
+        for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
+        {
+            IF_DEBUG(block_alloc,
+                     debugBelch("group at %p, length %ld blocks\n", 
+                                bd->start, (long)bd->blocks));
+            ASSERT(bd->free == (P_)-1);
+            ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
+            ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
+            ASSERT(bd->link != bd); // catch easy loops
  
-        if (prev)
-            ASSERT(bd->u.back == prev);
-        else 
-            ASSERT(bd->u.back == NULL);
+            check_tail(bd);
  
-        if (bd->link != NULL)
-        {
-            // make sure the list is sorted
-            ASSERT(bd->blocks <= bd->link->blocks);
-        }
+            if (prev)
+                ASSERT(bd->u.back == prev);
+            else 
+                ASSERT(bd->u.back == NULL);
  
-        {
-            bdescr *next;
-            next = bd + bd->blocks;
-            if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
              {
-                ASSERT(next->free != (P_)-1);
+                bdescr *next;
+                next = bd + bd->blocks;
+                if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
+                {
+                    ASSERT(next->free != (P_)-1);
+                }
              }
          }
+        min = min << 1;
      }
  
      prev = NULL;
@@ -682,7 +744,7 @@ checkFreeListSanity(void)
          if (bd->link != NULL)
          {
              ASSERT (MBLOCK_ROUND_DOWN(bd->link) != 
-                    MBLOCK_ROUND_DOWN(bd) + 
+                    (StgWord8*)MBLOCK_ROUND_DOWN(bd) + 
                      BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
          }
      }
@@ -693,9 +755,12 @@ countFreeList(void)
  {
    bdescr *bd;
    lnat total_blocks = 0;
+  nat ln;
  
-  for (bd = free_list; bd != NULL; bd = bd->link) {
-      total_blocks += bd->blocks;
+  for (ln=0; ln < MAX_FREE_LIST; ln++) {
+      for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
+          total_blocks += bd->blocks;
+      }
    }
    for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
        total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
@@ -706,4 +771,37 @@ countFreeList(void)
    }
    return total_blocks;
  }
+
+void
+markBlocks (bdescr *bd)
+{
+    for (; bd != NULL; bd = bd->link) {
+        bd->flags |= BF_KNOWN;
+    }
+}
+
+void
+reportUnmarkedBlocks (void)
+{
+    void *mblock;
+    bdescr *bd;
+
+    debugBelch("Unreachable blocks:\n");
+    for (mblock = getFirstMBlock(); mblock != NULL;
+         mblock = getNextMBlock(mblock)) {
+        for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
+            if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
+                debugBelch("  %p\n",bd);
+            }
+            if (bd->blocks >= BLOCKS_PER_MBLOCK) {
+                mblock = (StgWord8*)mblock +
+                    (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;
+                break;
+            } else {
+                bd += bd->blocks;
+            }
+        }
+    }
+}
+
  #endif