rts/sm/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2006
   4  *
   5  * Storage manager front end
   6  *
   7  * Documentation on the architecture of the Storage Manager can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsUtils.h"
  17 #include "RtsFlags.h"
  18 #include "Stats.h"
  19 #include "Hooks.h"
  20 #include "BlockAlloc.h"
  21 #include "MBlock.h"
  22 #include "Weak.h"
  23 #include "Sanity.h"
  24 #include "Arena.h"
  25 #include "OSThreads.h"
  26 #include "Capability.h"
  27 #include "Storage.h"
  28 #include "Schedule.h"
  29 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  30 #include "OSMem.h"
  31 #include "Trace.h"
  32 #include "GC.h"
  33 #include "GCUtils.h"
  34 #include "Evac.h"
  35
  36 #include <stdlib.h>
  37 #include <string.h>
  38
  39 /*
  40  * All these globals require sm_mutex to access in THREADED_RTS mode.
  41  */
  42 StgClosure    *caf_list         = NULL;
  43 StgClosure    *revertible_caf_list = NULL;
  44 rtsBool       keepCAFs;
  45
  46 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  47 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  48 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  49
  50 generation *generations = NULL; /* all the generations */
  51 generation *g0          = NULL; /* generation 0, for convenience */
  52 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  53 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  54
  55 ullong total_allocated = 0;     /* total memory allocated during run */
  56
  57 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  58 step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
  59
  60 #ifdef THREADED_RTS
  61 /*
  62  * Storage manager mutex:  protects all the above state from
  63  * simultaneous access by two STG threads.
  64  */
  65 Mutex sm_mutex;
  66 /*
  67  * This mutex is used by atomicModifyMutVar# only
  68  */
  69 Mutex atomic_modify_mutvar_mutex;
  70 #endif
  71
  72
  73 /*
  74  * Forward references
  75  */
  76 static void *stgAllocForGMP   (size_t size_in_bytes);
  77 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  78 static void  stgDeallocForGMP (void *ptr, size_t size);
  79
  80 static void
  81 initStep (step *stp, int g, int s)
  82 {
  83     stp->no = s;
  84     stp->blocks = NULL;
  85     stp->n_blocks = 0;
  86     stp->old_blocks = NULL;
  87     stp->n_old_blocks = 0;
  88     stp->gen = &generations[g];
  89     stp->gen_no = g;
  90     stp->large_objects = NULL;
  91     stp->n_large_blocks = 0;
  92     stp->scavenged_large_objects = NULL;
  93     stp->n_scavenged_large_blocks = 0;
  94     stp->is_compacted = 0;
  95     stp->bitmap = NULL;
  96 #ifdef THREADED_RTS
  97     initSpinLock(&stp->sync_todo);
  98     initSpinLock(&stp->sync_large_objects);
  99 #endif
 100 }
 101
 102 void
 103 initStorage( void )
 104 {
 105   nat g, s;
 106   generation *gen;
 107   step *step_arr;
 108
 109   if (generations != NULL) {
 110       // multi-init protection
 111       return;
 112   }
 113
 114   initMBlocks();
 115
 116   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 117    * doing something reasonable.
 118    */
 119   /* We use the NOT_NULL variant or gcc warns that the test is always true */
 120   ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL(&stg_BLACKHOLE_info));
 121   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 122   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 123
 124   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 125       RtsFlags.GcFlags.heapSizeSuggestion >
 126       RtsFlags.GcFlags.maxHeapSize) {
 127     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 128   }
 129
 130   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 131       RtsFlags.GcFlags.minAllocAreaSize >
 132       RtsFlags.GcFlags.maxHeapSize) {
 133       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 134       RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
 135   }
 136
 137   initBlockAllocator();
 138
 139 #if defined(THREADED_RTS)
 140   initMutex(&sm_mutex);
 141   initMutex(&atomic_modify_mutvar_mutex);
 142 #endif
 143
 144   ACQUIRE_SM_LOCK;
 145
 146   /* allocate generation info array */
 147   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 148                                              * sizeof(struct generation_),
 149                                              "initStorage: gens");
 150
 151   /* allocate all the steps into an array.  It is important that we do
 152      it this way, because we need the invariant that two step pointers
 153      can be directly compared to see which is the oldest.
 154      Remember that the last generation has only one step. */
 155   step_arr = stgMallocBytes(sizeof(struct step_)
 156                             * (1 + ((RtsFlags.GcFlags.generations - 1)
 157                                     * RtsFlags.GcFlags.steps)),
 158                             "initStorage: steps");
 159
 160   /* Initialise all generations */
 161   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 162     gen = &generations[g];
 163     gen->no = g;
 164     gen->mut_list = allocBlock();
 165     gen->collections = 0;
 166     gen->failed_promotions = 0;
 167     gen->max_blocks = 0;
 168   }
 169
 170   /* A couple of convenience pointers */
 171   g0 = &generations[0];
 172   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 173
 174   /* Allocate step structures in each generation */
 175   if (RtsFlags.GcFlags.generations > 1) {
 176     /* Only for multiple-generations */
 177
 178     /* Oldest generation: one step */
 179     oldest_gen->n_steps = 1;
 180     oldest_gen->steps   = step_arr + (RtsFlags.GcFlags.generations - 1)
 181                                       * RtsFlags.GcFlags.steps;
 182
 183     /* set up all except the oldest generation with 2 steps */
 184     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 185       generations[g].n_steps = RtsFlags.GcFlags.steps;
 186       generations[g].steps   = step_arr + g * RtsFlags.GcFlags.steps;
 187     }
 188
 189   } else {
 190     /* single generation, i.e. a two-space collector */
 191     g0->n_steps = 1;
 192     g0->steps   = step_arr;
 193   }
 194
 195 #ifdef THREADED_RTS
 196   n_nurseries = n_capabilities;
 197 #else
 198   n_nurseries = 1;
 199 #endif
 200   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 201                               "initStorage: nurseries");
 202
 203   /* Initialise all steps */
 204   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 205     for (s = 0; s < generations[g].n_steps; s++) {
 206         initStep(&generations[g].steps[s], g, s);
 207     }
 208   }
 209
 210   for (s = 0; s < n_nurseries; s++) {
 211       initStep(&nurseries[s], 0, s);
 212   }
 213
 214   /* Set up the destination pointers in each younger gen. step */
 215   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 216     for (s = 0; s < generations[g].n_steps-1; s++) {
 217       generations[g].steps[s].to = &generations[g].steps[s+1];
 218     }
 219     generations[g].steps[s].to = &generations[g+1].steps[0];
 220   }
 221   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 222
 223   for (s = 0; s < n_nurseries; s++) {
 224       nurseries[s].to = generations[0].steps[0].to;
 225   }
 226
 227   /* The oldest generation has one step. */
 228   if (RtsFlags.GcFlags.compact) {
 229       if (RtsFlags.GcFlags.generations == 1) {
 230           errorBelch("WARNING: compaction is incompatible with -G1; disabled");
 231       } else {
 232           oldest_gen->steps[0].is_compacted = 1;
 233       }
 234   }
 235
 236   generations[0].max_blocks = 0;
 237   g0s0 = &generations[0].steps[0];
 238
 239   /* The allocation area.  Policy: keep the allocation area
 240    * small to begin with, even if we have a large suggested heap
 241    * size.  Reason: we're going to do a major collection first, and we
 242    * don't want it to be a big one.  This vague idea is borne out by
 243    * rigorous experimental evidence.
 244    */
 245   allocNurseries();
 246
 247   weak_ptr_list = NULL;
 248   caf_list = NULL;
 249   revertible_caf_list = NULL;
 250
 251   /* initialise the allocate() interface */
 252   alloc_blocks = 0;
 253   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 254
 255   /* Tell GNU multi-precision pkg about our custom alloc functions */
 256   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 257
 258 #ifdef THREADED_RTS
 259   initSpinLock(&gc_alloc_block_sync);
 260   initSpinLock(&static_objects_sync);
 261   initSpinLock(&recordMutableGen_sync);
 262   whitehole_spin = 0;
 263 #endif
 264
 265   IF_DEBUG(gc, statDescribeGens());
 266
 267   RELEASE_SM_LOCK;
 268 }
 269
 270 void
 271 exitStorage (void)
 272 {
 273     stat_exit(calcAllocated());
 274 }
 275
 276 void
 277 freeStorage (void)
 278 {
 279     stgFree(g0s0); // frees all the steps
 280     stgFree(generations);
 281     freeAllMBlocks();
 282 #if defined(THREADED_RTS)
 283     closeMutex(&sm_mutex);
 284     closeMutex(&atomic_modify_mutvar_mutex);
 285 #endif
 286     stgFree(nurseries);
 287 }
 288
 289 /* -----------------------------------------------------------------------------
 290    CAF management.
 291
 292    The entry code for every CAF does the following:
 293
 294       - builds a CAF_BLACKHOLE in the heap
 295       - pushes an update frame pointing to the CAF_BLACKHOLE
 296       - invokes UPD_CAF(), which:
 297           - calls newCaf, below
 298           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 299
 300    Why do we build a BLACKHOLE in the heap rather than just updating
 301    the thunk directly?  It's so that we only need one kind of update
 302    frame - otherwise we'd need a static version of the update frame too.
 303
 304    newCaf() does the following:
 305
 306       - it puts the CAF on the oldest generation's mut-once list.
 307         This is so that we can treat the CAF as a root when collecting
 308         younger generations.
 309
 310    For GHCI, we have additional requirements when dealing with CAFs:
 311
 312       - we must *retain* all dynamically-loaded CAFs ever entered,
 313         just in case we need them again.
 314       - we must be able to *revert* CAFs that have been evaluated, to
 315         their pre-evaluated form.
 316
 317       To do this, we use an additional CAF list.  When newCaf() is
 318       called on a dynamically-loaded CAF, we add it to the CAF list
 319       instead of the old-generation mutable list, and save away its
 320       old info pointer (in caf->saved_info) for later reversion.
 321
 322       To revert all the CAFs, we traverse the CAF list and reset the
 323       info pointer to caf->saved_info, then throw away the CAF list.
 324       (see GC.c:revertCAFs()).
 325
 326       -- SDM 29/1/01
 327
 328    -------------------------------------------------------------------------- */
 329
 330 void
 331 newCAF(StgClosure* caf)
 332 {
 333   ACQUIRE_SM_LOCK;
 334
 335   if(keepCAFs)
 336   {
 337     // HACK:
 338     // If we are in GHCi _and_ we are using dynamic libraries,
 339     // then we can't redirect newCAF calls to newDynCAF (see below),
 340     // so we make newCAF behave almost like newDynCAF.
 341     // The dynamic libraries might be used by both the interpreted
 342     // program and GHCi itself, so they must not be reverted.
 343     // This also means that in GHCi with dynamic libraries, CAFs are not
 344     // garbage collected. If this turns out to be a problem, we could
 345     // do another hack here and do an address range test on caf to figure
 346     // out whether it is from a dynamic library.
 347     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 348     ((StgIndStatic *)caf)->static_link = caf_list;
 349     caf_list = caf;
 350   }
 351   else
 352   {
 353     /* Put this CAF on the mutable list for the old generation.
 354     * This is a HACK - the IND_STATIC closure doesn't really have
 355     * a mut_link field, but we pretend it has - in fact we re-use
 356     * the STATIC_LINK field for the time being, because when we
 357     * come to do a major GC we won't need the mut_link field
 358     * any more and can use it as a STATIC_LINK.
 359     */
 360     ((StgIndStatic *)caf)->saved_info = NULL;
 361     recordMutableGen(caf, oldest_gen);
 362   }
 363
 364   RELEASE_SM_LOCK;
 365 }
 366
 367 // An alternate version of newCaf which is used for dynamically loaded
 368 // object code in GHCi.  In this case we want to retain *all* CAFs in
 369 // the object code, because they might be demanded at any time from an
 370 // expression evaluated on the command line.
 371 // Also, GHCi might want to revert CAFs, so we add these to the
 372 // revertible_caf_list.
 373 //
 374 // The linker hackily arranges that references to newCaf from dynamic
 375 // code end up pointing to newDynCAF.
 376 void
 377 newDynCAF(StgClosure *caf)
 378 {
 379     ACQUIRE_SM_LOCK;
 380
 381     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 382     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 383     revertible_caf_list = caf;
 384
 385     RELEASE_SM_LOCK;
 386 }
 387
 388 /* -----------------------------------------------------------------------------
 389    Nursery management.
 390    -------------------------------------------------------------------------- */
 391
 392 static bdescr *
 393 allocNursery (step *stp, bdescr *tail, nat blocks)
 394 {
 395     bdescr *bd;
 396     nat i;
 397
 398     // Allocate a nursery: we allocate fresh blocks one at a time and
 399     // cons them on to the front of the list, not forgetting to update
 400     // the back pointer on the tail of the list to point to the new block.
 401     for (i=0; i < blocks; i++) {
 402         // @LDV profiling
 403         /*
 404           processNursery() in LdvProfile.c assumes that every block group in
 405           the nursery contains only a single block. So, if a block group is
 406           given multiple blocks, change processNursery() accordingly.
 407         */
 408         bd = allocBlock();
 409         bd->link = tail;
 410         // double-link the nursery: we might need to insert blocks
 411         if (tail != NULL) {
 412             tail->u.back = bd;
 413         }
 414         bd->step = stp;
 415         bd->gen_no = 0;
 416         bd->flags = 0;
 417         bd->free = bd->start;
 418         tail = bd;
 419     }
 420     tail->u.back = NULL;
 421     return tail;
 422 }
 423
 424 static void
 425 assignNurseriesToCapabilities (void)
 426 {
 427 #ifdef THREADED_RTS
 428     nat i;
 429
 430     for (i = 0; i < n_nurseries; i++) {
 431         capabilities[i].r.rNursery        = &nurseries[i];
 432         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 433         capabilities[i].r.rCurrentAlloc   = NULL;
 434     }
 435 #else /* THREADED_RTS */
 436     MainCapability.r.rNursery        = &nurseries[0];
 437     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 438     MainCapability.r.rCurrentAlloc   = NULL;
 439 #endif
 440 }
 441
 442 void
 443 allocNurseries( void )
 444 {
 445     nat i;
 446
 447     for (i = 0; i < n_nurseries; i++) {
 448         nurseries[i].blocks =
 449             allocNursery(&nurseries[i], NULL,
 450                          RtsFlags.GcFlags.minAllocAreaSize);
 451         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 452         nurseries[i].old_blocks   = NULL;
 453         nurseries[i].n_old_blocks = 0;
 454     }
 455     assignNurseriesToCapabilities();
 456 }
 457
 458 void
 459 resetNurseries( void )
 460 {
 461     nat i;
 462     bdescr *bd;
 463     step *stp;
 464
 465     for (i = 0; i < n_nurseries; i++) {
 466         stp = &nurseries[i];
 467         for (bd = stp->blocks; bd; bd = bd->link) {
 468             bd->free = bd->start;
 469             ASSERT(bd->gen_no == 0);
 470             ASSERT(bd->step == stp);
 471             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 472         }
 473     }
 474     assignNurseriesToCapabilities();
 475 }
 476
 477 lnat
 478 countNurseryBlocks (void)
 479 {
 480     nat i;
 481     lnat blocks = 0;
 482
 483     for (i = 0; i < n_nurseries; i++) {
 484         blocks += nurseries[i].n_blocks;
 485     }
 486     return blocks;
 487 }
 488
 489 static void
 490 resizeNursery ( step *stp, nat blocks )
 491 {
 492   bdescr *bd;
 493   nat nursery_blocks;
 494
 495   nursery_blocks = stp->n_blocks;
 496   if (nursery_blocks == blocks) return;
 497
 498   if (nursery_blocks < blocks) {
 499       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
 500                  blocks);
 501     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 502   }
 503   else {
 504     bdescr *next_bd;
 505
 506     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
 507                blocks);
 508
 509     bd = stp->blocks;
 510     while (nursery_blocks > blocks) {
 511         next_bd = bd->link;
 512         next_bd->u.back = NULL;
 513         nursery_blocks -= bd->blocks; // might be a large block
 514         freeGroup(bd);
 515         bd = next_bd;
 516     }
 517     stp->blocks = bd;
 518     // might have gone just under, by freeing a large block, so make
 519     // up the difference.
 520     if (nursery_blocks < blocks) {
 521         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 522     }
 523   }
 524
 525   stp->n_blocks = blocks;
 526   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 527 }
 528
 529 //
 530 // Resize each of the nurseries to the specified size.
 531 //
 532 void
 533 resizeNurseriesFixed (nat blocks)
 534 {
 535     nat i;
 536     for (i = 0; i < n_nurseries; i++) {
 537         resizeNursery(&nurseries[i], blocks);
 538     }
 539 }
 540
 541 //
 542 // Resize the nurseries to the total specified size.
 543 //
 544 void
 545 resizeNurseries (nat blocks)
 546 {
 547     // If there are multiple nurseries, then we just divide the number
 548     // of available blocks between them.
 549     resizeNurseriesFixed(blocks / n_nurseries);
 550 }
 551
 552 /* -----------------------------------------------------------------------------
 553    The allocate() interface
 554
 555    allocateInGen() function allocates memory directly into a specific
 556    generation.  It always succeeds, and returns a chunk of memory n
 557    words long.  n can be larger than the size of a block if necessary,
 558    in which case a contiguous block group will be allocated.
 559
 560    allocate(n) is equivalent to allocateInGen(g0).
 561    -------------------------------------------------------------------------- */
 562
 563 StgPtr
 564 allocateInGen (generation *g, nat n)
 565 {
 566     step *stp;
 567     bdescr *bd;
 568     StgPtr ret;
 569
 570     ACQUIRE_SM_LOCK;
 571
 572     TICK_ALLOC_HEAP_NOCTR(n);
 573     CCS_ALLOC(CCCS,n);
 574
 575     stp = &g->steps[0];
 576
 577     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
 578     {
 579         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 580
 581         // Attempting to allocate an object larger than maxHeapSize
 582         // should definitely be disallowed.  (bug #1791)
 583         if (RtsFlags.GcFlags.maxHeapSize > 0 &&
 584             req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
 585             heapOverflow();
 586         }
 587
 588         bd = allocGroup(req_blocks);
 589         dbl_link_onto(bd, &stp->large_objects);
 590         stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
 591         bd->gen_no  = g->no;
 592         bd->step = stp;
 593         bd->flags = BF_LARGE;
 594         bd->free = bd->start + n;
 595         ret = bd->start;
 596     }
 597     else
 598     {
 599         // small allocation (<LARGE_OBJECT_THRESHOLD) */
 600         bd = stp->blocks;
 601         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 602             bd = allocBlock();
 603             bd->gen_no = g->no;
 604             bd->step = stp;
 605             bd->flags = 0;
 606             bd->link = stp->blocks;
 607             stp->blocks = bd;
 608             stp->n_blocks++;
 609             alloc_blocks++;
 610         }
 611         ret = bd->free;
 612         bd->free += n;
 613     }
 614
 615     RELEASE_SM_LOCK;
 616
 617     return ret;
 618 }
 619
 620 StgPtr
 621 allocate (nat n)
 622 {
 623     return allocateInGen(g0,n);
 624 }
 625
 626 lnat
 627 allocatedBytes( void )
 628 {
 629     lnat allocated;
 630
 631     allocated = alloc_blocks * BLOCK_SIZE_W;
 632     if (pinned_object_block != NULL) {
 633         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 634             pinned_object_block->free;
 635     }
 636
 637     return allocated;
 638 }
 639
 640 // split N blocks off the start of the given bdescr, returning the
 641 // remainder as a new block group.  We treat the remainder as if it
 642 // had been freshly allocated in generation 0.
 643 bdescr *
 644 splitLargeBlock (bdescr *bd, nat blocks)
 645 {
 646     bdescr *new_bd;
 647
 648     // subtract the original number of blocks from the counter first
 649     bd->step->n_large_blocks -= bd->blocks;
 650
 651     new_bd = splitBlockGroup (bd, blocks);
 652
 653     dbl_link_onto(new_bd, &g0s0->large_objects);
 654     g0s0->n_large_blocks += new_bd->blocks;
 655     new_bd->gen_no  = g0s0->no;
 656     new_bd->step    = g0s0;
 657     new_bd->flags   = BF_LARGE;
 658     new_bd->free    = bd->free;
 659
 660     // add the new number of blocks to the counter.  Due to the gaps
 661     // for block descriptor, new_bd->blocks + bd->blocks might not be
 662     // equal to the original bd->blocks, which is why we do it this way.
 663     bd->step->n_large_blocks += bd->blocks;
 664
 665     return new_bd;
 666 }
 667
 668 /* -----------------------------------------------------------------------------
 669    allocateLocal()
 670
 671    This allocates memory in the current thread - it is intended for
 672    use primarily from STG-land where we have a Capability.  It is
 673    better than allocate() because it doesn't require taking the
 674    sm_mutex lock in the common case.
 675
 676    Memory is allocated directly from the nursery if possible (but not
 677    from the current nursery block, so as not to interfere with
 678    Hp/HpLim).
 679    -------------------------------------------------------------------------- */
 680
 681 StgPtr
 682 allocateLocal (Capability *cap, nat n)
 683 {
 684     bdescr *bd;
 685     StgPtr p;
 686
 687     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 688         return allocateInGen(g0,n);
 689     }
 690
 691     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 692
 693     TICK_ALLOC_HEAP_NOCTR(n);
 694     CCS_ALLOC(CCCS,n);
 695
 696     bd = cap->r.rCurrentAlloc;
 697     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 698
 699         // The CurrentAlloc block is full, we need to find another
 700         // one.  First, we try taking the next block from the
 701         // nursery:
 702         bd = cap->r.rCurrentNursery->link;
 703
 704         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 705             // The nursery is empty, or the next block is already
 706             // full: allocate a fresh block (we can't fail here).
 707             ACQUIRE_SM_LOCK;
 708             bd = allocBlock();
 709             cap->r.rNursery->n_blocks++;
 710             RELEASE_SM_LOCK;
 711             bd->gen_no = 0;
 712             bd->step = cap->r.rNursery;
 713             bd->flags = 0;
 714             // NO: alloc_blocks++;
 715             // calcAllocated() uses the size of the nursery, and we've
 716             // already bumpted nursery->n_blocks above.
 717         } else {
 718             // we have a block in the nursery: take it and put
 719             // it at the *front* of the nursery list, and use it
 720             // to allocate() from.
 721             cap->r.rCurrentNursery->link = bd->link;
 722             if (bd->link != NULL) {
 723                 bd->link->u.back = cap->r.rCurrentNursery;
 724             }
 725         }
 726         dbl_link_onto(bd, &cap->r.rNursery->blocks);
 727         cap->r.rCurrentAlloc = bd;
 728         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 729     }
 730     p = bd->free;
 731     bd->free += n;
 732     return p;
 733 }
 734
 735 /* ---------------------------------------------------------------------------
 736    Allocate a fixed/pinned object.
 737
 738    We allocate small pinned objects into a single block, allocating a
 739    new block when the current one overflows.  The block is chained
 740    onto the large_object_list of generation 0 step 0.
 741
 742    NOTE: The GC can't in general handle pinned objects.  This
 743    interface is only safe to use for ByteArrays, which have no
 744    pointers and don't require scavenging.  It works because the
 745    block's descriptor has the BF_LARGE flag set, so the block is
 746    treated as a large object and chained onto various lists, rather
 747    than the individual objects being copied.  However, when it comes
 748    to scavenge the block, the GC will only scavenge the first object.
 749    The reason is that the GC can't linearly scan a block of pinned
 750    objects at the moment (doing so would require using the
 751    mostly-copying techniques).  But since we're restricting ourselves
 752    to pinned ByteArrays, not scavenging is ok.
 753
 754    This function is called by newPinnedByteArray# which immediately
 755    fills the allocated memory with a MutableByteArray#.
 756    ------------------------------------------------------------------------- */
 757
 758 StgPtr
 759 allocatePinned( nat n )
 760 {
 761     StgPtr p;
 762     bdescr *bd = pinned_object_block;
 763
 764     // If the request is for a large object, then allocate()
 765     // will give us a pinned object anyway.
 766     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 767         return allocate(n);
 768     }
 769
 770     ACQUIRE_SM_LOCK;
 771
 772     TICK_ALLOC_HEAP_NOCTR(n);
 773     CCS_ALLOC(CCCS,n);
 774
 775     // we always return 8-byte aligned memory.  bd->free must be
 776     // 8-byte aligned to begin with, so we just round up n to
 777     // the nearest multiple of 8 bytes.
 778     if (sizeof(StgWord) == 4) {
 779         n = (n+1) & ~1;
 780     }
 781
 782     // If we don't have a block of pinned objects yet, or the current
 783     // one isn't large enough to hold the new object, allocate a new one.
 784     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 785         pinned_object_block = bd = allocBlock();
 786         dbl_link_onto(bd, &g0s0->large_objects);
 787         g0s0->n_large_blocks++;
 788         bd->gen_no = 0;
 789         bd->step   = g0s0;
 790         bd->flags  = BF_PINNED | BF_LARGE;
 791         bd->free   = bd->start;
 792         alloc_blocks++;
 793     }
 794
 795     p = bd->free;
 796     bd->free += n;
 797     RELEASE_SM_LOCK;
 798     return p;
 799 }
 800
 801 /* -----------------------------------------------------------------------------
 802    Write Barriers
 803    -------------------------------------------------------------------------- */
 804
 805 /*
 806    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 807    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 808    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 809    and is put on the mutable list.
 810 */
 811 void
 812 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 813 {
 814     Capability *cap = regTableToCapability(reg);
 815     bdescr *bd;
 816     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 817         p->header.info = &stg_MUT_VAR_DIRTY_info;
 818         bd = Bdescr((StgPtr)p);
 819         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 820     }
 821 }
 822
 823 /*
 824    This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
 825    on the mutable list; a MVAR_DIRTY is.  When written to, a
 826    MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
 827    The check for MVAR_CLEAN is inlined at the call site for speed,
 828    this really does make a difference on concurrency-heavy benchmarks
 829    such as Chaneneos and cheap-concurrency.
 830 */
 831 void
 832 dirty_MVAR(StgRegTable *reg, StgClosure *p)
 833 {
 834     Capability *cap = regTableToCapability(reg);
 835     bdescr *bd;
 836     bd = Bdescr((StgPtr)p);
 837     if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 838 }
 839
 840 /* -----------------------------------------------------------------------------
 841    Allocation functions for GMP.
 842
 843    These all use the allocate() interface - we can't have any garbage
 844    collection going on during a gmp operation, so we use allocate()
 845    which always succeeds.  The gmp operations which might need to
 846    allocate will ask the storage manager (via doYouWantToGC()) whether
 847    a garbage collection is required, in case we get into a loop doing
 848    only allocate() style allocation.
 849    -------------------------------------------------------------------------- */
 850
 851 static void *
 852 stgAllocForGMP (size_t size_in_bytes)
 853 {
 854   StgArrWords* arr;
 855   nat data_size_in_words, total_size_in_words;
 856
 857   /* round up to a whole number of words */
 858   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 859   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 860
 861   /* allocate and fill it in. */
 862 #if defined(THREADED_RTS)
 863   arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
 864 #else
 865   arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
 866 #endif
 867   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 868
 869   /* and return a ptr to the goods inside the array */
 870   return arr->payload;
 871 }
 872
 873 static void *
 874 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 875 {
 876     void *new_stuff_ptr = stgAllocForGMP(new_size);
 877     nat i = 0;
 878     char *p = (char *) ptr;
 879     char *q = (char *) new_stuff_ptr;
 880
 881     for (; i < old_size; i++, p++, q++) {
 882         *q = *p;
 883     }
 884
 885     return(new_stuff_ptr);
 886 }
 887
 888 static void
 889 stgDeallocForGMP (void *ptr STG_UNUSED,
 890                   size_t size STG_UNUSED)
 891 {
 892     /* easy for us: the garbage collector does the dealloc'n */
 893 }
 894
 895 /* -----------------------------------------------------------------------------
 896  * Stats and stuff
 897  * -------------------------------------------------------------------------- */
 898
 899 /* -----------------------------------------------------------------------------
 900  * calcAllocated()
 901  *
 902  * Approximate how much we've allocated: number of blocks in the
 903  * nursery + blocks allocated via allocate() - unused nusery blocks.
 904  * This leaves a little slop at the end of each block, and doesn't
 905  * take into account large objects (ToDo).
 906  * -------------------------------------------------------------------------- */
 907
 908 lnat
 909 calcAllocated( void )
 910 {
 911   nat allocated;
 912   bdescr *bd;
 913
 914   allocated = allocatedBytes();
 915   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 916
 917   {
 918 #ifdef THREADED_RTS
 919   nat i;
 920   for (i = 0; i < n_nurseries; i++) {
 921       Capability *cap;
 922       for ( bd = capabilities[i].r.rCurrentNursery->link;
 923             bd != NULL; bd = bd->link ) {
 924           allocated -= BLOCK_SIZE_W;
 925       }
 926       cap = &capabilities[i];
 927       if (cap->r.rCurrentNursery->free <
 928           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 929           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 930               - cap->r.rCurrentNursery->free;
 931       }
 932   }
 933 #else
 934   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 935
 936   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 937       allocated -= BLOCK_SIZE_W;
 938   }
 939   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 940       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 941           - current_nursery->free;
 942   }
 943 #endif
 944   }
 945
 946   total_allocated += allocated;
 947   return allocated;
 948 }
 949
 950 /* Approximate the amount of live data in the heap.  To be called just
 951  * after garbage collection (see GarbageCollect()).
 952  */
 953 lnat
 954 calcLiveBlocks(void)
 955 {
 956   nat g, s;
 957   lnat live = 0;
 958   step *stp;
 959
 960   if (RtsFlags.GcFlags.generations == 1) {
 961       return g0s0->n_large_blocks + g0s0->n_blocks;
 962   }
 963
 964   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 965     for (s = 0; s < generations[g].n_steps; s++) {
 966       /* approximate amount of live data (doesn't take into account slop
 967        * at end of each block).
 968        */
 969       if (g == 0 && s == 0) {
 970           continue;
 971       }
 972       stp = &generations[g].steps[s];
 973       live += stp->n_large_blocks + stp->n_blocks;
 974     }
 975   }
 976   return live;
 977 }
 978
 979 lnat
 980 countOccupied(bdescr *bd)
 981 {
 982     lnat words;
 983
 984     words = 0;
 985     for (; bd != NULL; bd = bd->link) {
 986         words += bd->free - bd->start;
 987     }
 988     return words;
 989 }
 990
 991 // Return an accurate count of the live data in the heap, excluding
 992 // generation 0.
 993 lnat
 994 calcLiveWords(void)
 995 {
 996     nat g, s;
 997     lnat live;
 998     step *stp;
 999
1000     if (RtsFlags.GcFlags.generations == 1) {
1001         return countOccupied(g0s0->blocks) + countOccupied(g0s0->large_objects);
1002     }
1003
1004     live = 0;
1005     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1006         for (s = 0; s < generations[g].n_steps; s++) {
1007             if (g == 0 && s == 0) continue;
1008             stp = &generations[g].steps[s];
1009             live += countOccupied(stp->blocks) +
1010                     countOccupied(stp->large_objects);
1011         }
1012     }
1013     return live;
1014 }
1015
1016 /* Approximate the number of blocks that will be needed at the next
1017  * garbage collection.
1018  *
1019  * Assume: all data currently live will remain live.  Steps that will
1020  * be collected next time will therefore need twice as many blocks
1021  * since all the data will be copied.
1022  */
1023 extern lnat
1024 calcNeeded(void)
1025 {
1026     lnat needed = 0;
1027     nat g, s;
1028     step *stp;
1029
1030     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1031         for (s = 0; s < generations[g].n_steps; s++) {
1032             if (g == 0 && s == 0) { continue; }
1033             stp = &generations[g].steps[s];
1034             if (g == 0 || // always collect gen 0
1035                 (generations[g].steps[0].n_blocks +
1036                  generations[g].steps[0].n_large_blocks
1037                  > generations[g].max_blocks
1038                  && stp->is_compacted == 0)) {
1039                 needed += 2 * stp->n_blocks + stp->n_large_blocks;
1040             } else {
1041                 needed += stp->n_blocks + stp->n_large_blocks;
1042             }
1043         }
1044     }
1045     return needed;
1046 }
1047
1048 /* ----------------------------------------------------------------------------
1049    Executable memory
1050
1051    Executable memory must be managed separately from non-executable
1052    memory.  Most OSs these days require you to jump through hoops to
1053    dynamically allocate executable memory, due to various security
1054    measures.
1055
1056    Here we provide a small memory allocator for executable memory.
1057    Memory is managed with a page granularity; we allocate linearly
1058    in the page, and when the page is emptied (all objects on the page
1059    are free) we free the page again, not forgetting to make it
1060    non-executable.
1061
1062    TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1063          the linker cannot use allocateExec for loading object code files
1064          on Windows. Once allocateExec can handle larger objects, the linker
1065          should be modified to use allocateExec instead of VirtualAlloc.
1066    ------------------------------------------------------------------------- */
1067
1068 static bdescr *exec_block;
1069
1070 void *allocateExec (nat bytes)
1071 {
1072     void *ret;
1073     nat n;
1074
1075     ACQUIRE_SM_LOCK;
1076
1077     // round up to words.
1078     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
1079
1080     if (n+1 > BLOCK_SIZE_W) {
1081         barf("allocateExec: can't handle large objects");
1082     }
1083
1084     if (exec_block == NULL ||
1085         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1086         bdescr *bd;
1087         lnat pagesize = getPageSize();
1088         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1089         debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1090         bd->gen_no = 0;
1091         bd->flags = BF_EXEC;
1092         bd->link = exec_block;
1093         if (exec_block != NULL) {
1094             exec_block->u.back = bd;
1095         }
1096         bd->u.back = NULL;
1097         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1098         exec_block = bd;
1099     }
1100     *(exec_block->free) = n;  // store the size of this chunk
1101     exec_block->gen_no += n;  // gen_no stores the number of words allocated
1102     ret = exec_block->free + 1;
1103     exec_block->free += n + 1;
1104
1105     RELEASE_SM_LOCK
1106     return ret;
1107 }
1108
1109 void freeExec (void *addr)
1110 {
1111     StgPtr p = (StgPtr)addr - 1;
1112     bdescr *bd = Bdescr((StgPtr)p);
1113
1114     if ((bd->flags & BF_EXEC) == 0) {
1115         barf("freeExec: not executable");
1116     }
1117
1118     if (*(StgPtr)p == 0) {
1119         barf("freeExec: already free?");
1120     }
1121
1122     ACQUIRE_SM_LOCK;
1123
1124     bd->gen_no -= *(StgPtr)p;
1125     *(StgPtr)p = 0;
1126
1127     if (bd->gen_no == 0) {
1128         // Free the block if it is empty, but not if it is the block at
1129         // the head of the queue.
1130         if (bd != exec_block) {
1131             debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1132             dbl_link_remove(bd, &exec_block);
1133             setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1134             freeGroup(bd);
1135         } else {
1136             bd->free = bd->start;
1137         }
1138     }
1139
1140     RELEASE_SM_LOCK
1141 }
1142
1143 /* -----------------------------------------------------------------------------
1144    Debugging
1145
1146    memInventory() checks for memory leaks by counting up all the
1147    blocks we know about and comparing that to the number of blocks
1148    allegedly floating around in the system.
1149    -------------------------------------------------------------------------- */
1150
1151 #ifdef DEBUG
1152
1153 // Useful for finding partially full blocks in gdb
1154 void findSlop(bdescr *bd);
1155 void findSlop(bdescr *bd)
1156 {
1157     lnat slop;
1158
1159     for (; bd != NULL; bd = bd->link) {
1160         slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1161         if (slop > (1024/sizeof(W_))) {
1162             debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1163                        bd->start, bd, slop / (1024/sizeof(W_)));
1164         }
1165     }
1166 }
1167
1168 nat
1169 countBlocks(bdescr *bd)
1170 {
1171     nat n;
1172     for (n=0; bd != NULL; bd=bd->link) {
1173         n += bd->blocks;
1174     }
1175     return n;
1176 }
1177
1178 // (*1) Just like countBlocks, except that we adjust the count for a
1179 // megablock group so that it doesn't include the extra few blocks
1180 // that would be taken up by block descriptors in the second and
1181 // subsequent megablock.  This is so we can tally the count with the
1182 // number of blocks allocated in the system, for memInventory().
1183 static nat
1184 countAllocdBlocks(bdescr *bd)
1185 {
1186     nat n;
1187     for (n=0; bd != NULL; bd=bd->link) {
1188         n += bd->blocks;
1189         // hack for megablock groups: see (*1) above
1190         if (bd->blocks > BLOCKS_PER_MBLOCK) {
1191             n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1192                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1193         }
1194     }
1195     return n;
1196 }
1197
1198 static lnat
1199 stepBlocks (step *stp)
1200 {
1201     ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1202     ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1203     return stp->n_blocks + stp->n_old_blocks +
1204             countAllocdBlocks(stp->large_objects);
1205 }
1206
1207 void
1208 memInventory (rtsBool show)
1209 {
1210   nat g, s, i;
1211   step *stp;
1212   lnat gen_blocks[RtsFlags.GcFlags.generations];
1213   lnat nursery_blocks, retainer_blocks,
1214        arena_blocks, exec_blocks;
1215   lnat live_blocks = 0, free_blocks = 0;
1216   rtsBool leak;
1217
1218   // count the blocks we current have
1219
1220   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1221       gen_blocks[g] = 0;
1222       for (i = 0; i < n_capabilities; i++) {
1223           gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1224       }
1225       gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1226       for (s = 0; s < generations[g].n_steps; s++) {
1227           stp = &generations[g].steps[s];
1228           gen_blocks[g] += stepBlocks(stp);
1229       }
1230   }
1231
1232   nursery_blocks = 0;
1233   for (i = 0; i < n_nurseries; i++) {
1234       nursery_blocks += stepBlocks(&nurseries[i]);
1235   }
1236
1237   retainer_blocks = 0;
1238 #ifdef PROFILING
1239   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1240       retainer_blocks = retainerStackBlocks();
1241   }
1242 #endif
1243
1244   // count the blocks allocated by the arena allocator
1245   arena_blocks = arenaBlocks();
1246
1247   // count the blocks containing executable memory
1248   exec_blocks = countAllocdBlocks(exec_block);
1249
1250   /* count the blocks on the free list */
1251   free_blocks = countFreeList();
1252
1253   live_blocks = 0;
1254   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1255       live_blocks += gen_blocks[g];
1256   }
1257   live_blocks += nursery_blocks +
1258                + retainer_blocks + arena_blocks + exec_blocks;
1259
1260 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1261
1262   leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1263   if (show || leak)
1264   {
1265       if (leak) {
1266           debugBelch("Memory leak detected:\n");
1267       } else {
1268           debugBelch("Memory inventory:\n");
1269       }
1270       for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1271           debugBelch("  gen %d blocks : %5lu blocks (%lu MB)\n", g,
1272                      gen_blocks[g], MB(gen_blocks[g]));
1273       }
1274       debugBelch("  nursery      : %5lu blocks (%lu MB)\n",
1275                  nursery_blocks, MB(nursery_blocks));
1276       debugBelch("  retainer     : %5lu blocks (%lu MB)\n",
1277                  retainer_blocks, MB(retainer_blocks));
1278       debugBelch("  arena blocks : %5lu blocks (%lu MB)\n",
1279                  arena_blocks, MB(arena_blocks));
1280       debugBelch("  exec         : %5lu blocks (%lu MB)\n",
1281                  exec_blocks, MB(exec_blocks));
1282       debugBelch("  free         : %5lu blocks (%lu MB)\n",
1283                  free_blocks, MB(free_blocks));
1284       debugBelch("  total        : %5lu blocks (%lu MB)\n",
1285                  live_blocks + free_blocks, MB(live_blocks+free_blocks));
1286       if (leak) {
1287           debugBelch("\n  in system    : %5lu blocks (%lu MB)\n",
1288                      mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1289       }
1290   }
1291 }
1292
1293
1294 /* Full heap sanity check. */
1295 void
1296 checkSanity( void )
1297 {
1298     nat g, s;
1299
1300     if (RtsFlags.GcFlags.generations == 1) {
1301         checkHeap(g0s0->blocks);
1302         checkChain(g0s0->large_objects);
1303     } else {
1304
1305         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1306             for (s = 0; s < generations[g].n_steps; s++) {
1307                 if (g == 0 && s == 0) { continue; }
1308                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1309                        == generations[g].steps[s].n_blocks);
1310                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1311                        == generations[g].steps[s].n_large_blocks);
1312                 checkHeap(generations[g].steps[s].blocks);
1313                 checkChain(generations[g].steps[s].large_objects);
1314                 if (g > 0) {
1315                     checkMutableList(generations[g].mut_list, g);
1316                 }
1317             }
1318         }
1319
1320         for (s = 0; s < n_nurseries; s++) {
1321             ASSERT(countBlocks(nurseries[s].blocks)
1322                    == nurseries[s].n_blocks);
1323             ASSERT(countBlocks(nurseries[s].large_objects)
1324                    == nurseries[s].n_large_blocks);
1325         }
1326
1327         checkFreeListSanity();
1328     }
1329 }
1330
1331 /* Nursery sanity check */
1332 void
1333 checkNurserySanity( step *stp )
1334 {
1335     bdescr *bd, *prev;
1336     nat blocks = 0;
1337
1338     prev = NULL;
1339     for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1340         ASSERT(bd->u.back == prev);
1341         prev = bd;
1342         blocks += bd->blocks;
1343     }
1344     ASSERT(blocks == stp->n_blocks);
1345 }
1346
1347 // handy function for use in gdb, because Bdescr() is inlined.
1348 extern bdescr *_bdescr( StgPtr p );
1349
1350 bdescr *
1351 _bdescr( StgPtr p )
1352 {
1353     return Bdescr(p);
1354 }
1355
1356 #endif