rts/sm/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2006
   4  *
   5  * Storage manager front end
   6  *
   7  * Documentation on the architecture of the Storage Manager can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsUtils.h"
  17 #include "RtsFlags.h"
  18 #include "Stats.h"
  19 #include "Hooks.h"
  20 #include "BlockAlloc.h"
  21 #include "MBlock.h"
  22 #include "Weak.h"
  23 #include "Sanity.h"
  24 #include "Arena.h"
  25 #include "OSThreads.h"
  26 #include "Capability.h"
  27 #include "Storage.h"
  28 #include "Schedule.h"
  29 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  30 #include "OSMem.h"
  31 #include "Trace.h"
  32 #include "GC.h"
  33 #include "GCUtils.h"
  34 #include "Evac.h"
  35
  36 #include <stdlib.h>
  37 #include <string.h>
  38
  39 /*
  40  * All these globals require sm_mutex to access in THREADED_RTS mode.
  41  */
  42 StgClosure    *caf_list         = NULL;
  43 StgClosure    *revertible_caf_list = NULL;
  44 rtsBool       keepCAFs;
  45
  46 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  47 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  48 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  49
  50 generation *generations = NULL; /* all the generations */
  51 generation *g0          = NULL; /* generation 0, for convenience */
  52 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  53 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  54
  55 nat total_steps         = 0;
  56 step *all_steps         = NULL; /* single array of steps */
  57
  58 ullong total_allocated = 0;     /* total memory allocated during run */
  59
  60 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  61 step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
  62
  63 #ifdef THREADED_RTS
  64 /*
  65  * Storage manager mutex:  protects all the above state from
  66  * simultaneous access by two STG threads.
  67  */
  68 Mutex sm_mutex;
  69 /*
  70  * This mutex is used by atomicModifyMutVar# only
  71  */
  72 Mutex atomic_modify_mutvar_mutex;
  73 #endif
  74
  75
  76 /*
  77  * Forward references
  78  */
  79 static void *stgAllocForGMP   (size_t size_in_bytes);
  80 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  81 static void  stgDeallocForGMP (void *ptr, size_t size);
  82
  83 static void
  84 initStep (step *stp, int g, int s)
  85 {
  86     stp->no = s;
  87     stp->abs_no = RtsFlags.GcFlags.steps * g + s;
  88     stp->blocks = NULL;
  89     stp->n_blocks = 0;
  90     stp->old_blocks = NULL;
  91     stp->n_old_blocks = 0;
  92     stp->gen = &generations[g];
  93     stp->gen_no = g;
  94     stp->large_objects = NULL;
  95     stp->n_large_blocks = 0;
  96     stp->scavenged_large_objects = NULL;
  97     stp->n_scavenged_large_blocks = 0;
  98     stp->is_compacted = 0;
  99     stp->bitmap = NULL;
 100 #ifdef THREADED_RTS
 101     initSpinLock(&stp->sync_todo);
 102     initSpinLock(&stp->sync_large_objects);
 103 #endif
 104 }
 105
 106 void
 107 initStorage( void )
 108 {
 109   nat g, s;
 110   generation *gen;
 111
 112   if (generations != NULL) {
 113       // multi-init protection
 114       return;
 115   }
 116
 117   initMBlocks();
 118
 119   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 120    * doing something reasonable.
 121    */
 122   /* We use the NOT_NULL variant or gcc warns that the test is always true */
 123   ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL(&stg_BLACKHOLE_info));
 124   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 125   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 126
 127   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 128       RtsFlags.GcFlags.heapSizeSuggestion >
 129       RtsFlags.GcFlags.maxHeapSize) {
 130     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 131   }
 132
 133   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 134       RtsFlags.GcFlags.minAllocAreaSize >
 135       RtsFlags.GcFlags.maxHeapSize) {
 136       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 137       RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
 138   }
 139
 140   initBlockAllocator();
 141
 142 #if defined(THREADED_RTS)
 143   initMutex(&sm_mutex);
 144   initMutex(&atomic_modify_mutvar_mutex);
 145 #endif
 146
 147   ACQUIRE_SM_LOCK;
 148
 149   /* allocate generation info array */
 150   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 151                                              * sizeof(struct generation_),
 152                                              "initStorage: gens");
 153
 154   /* allocate all the steps into an array.  It is important that we do
 155      it this way, because we need the invariant that two step pointers
 156      can be directly compared to see which is the oldest.
 157      Remember that the last generation has only one step. */
 158   total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
 159   all_steps   = stgMallocBytes(total_steps * sizeof(struct step_),
 160                                "initStorage: steps");
 161
 162   /* Initialise all generations */
 163   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 164     gen = &generations[g];
 165     gen->no = g;
 166     gen->mut_list = allocBlock();
 167     gen->collections = 0;
 168     gen->failed_promotions = 0;
 169     gen->max_blocks = 0;
 170   }
 171
 172   /* A couple of convenience pointers */
 173   g0 = &generations[0];
 174   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 175
 176   /* Allocate step structures in each generation */
 177   if (RtsFlags.GcFlags.generations > 1) {
 178     /* Only for multiple-generations */
 179
 180     /* Oldest generation: one step */
 181     oldest_gen->n_steps = 1;
 182     oldest_gen->steps   = all_steps + (RtsFlags.GcFlags.generations - 1)
 183                                       * RtsFlags.GcFlags.steps;
 184
 185     /* set up all except the oldest generation with 2 steps */
 186     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 187       generations[g].n_steps = RtsFlags.GcFlags.steps;
 188       generations[g].steps   = all_steps + g * RtsFlags.GcFlags.steps;
 189     }
 190
 191   } else {
 192     /* single generation, i.e. a two-space collector */
 193     g0->n_steps = 1;
 194     g0->steps   = all_steps;
 195   }
 196
 197 #ifdef THREADED_RTS
 198   n_nurseries = n_capabilities;
 199 #else
 200   n_nurseries = 1;
 201 #endif
 202   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 203                               "initStorage: nurseries");
 204
 205   /* Initialise all steps */
 206   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 207     for (s = 0; s < generations[g].n_steps; s++) {
 208         initStep(&generations[g].steps[s], g, s);
 209     }
 210   }
 211
 212   for (s = 0; s < n_nurseries; s++) {
 213       initStep(&nurseries[s], 0, s);
 214   }
 215
 216   /* Set up the destination pointers in each younger gen. step */
 217   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 218     for (s = 0; s < generations[g].n_steps-1; s++) {
 219       generations[g].steps[s].to = &generations[g].steps[s+1];
 220     }
 221     generations[g].steps[s].to = &generations[g+1].steps[0];
 222   }
 223   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 224
 225   for (s = 0; s < n_nurseries; s++) {
 226       nurseries[s].to = generations[0].steps[0].to;
 227   }
 228
 229   /* The oldest generation has one step. */
 230   if (RtsFlags.GcFlags.compact) {
 231       if (RtsFlags.GcFlags.generations == 1) {
 232           errorBelch("WARNING: compaction is incompatible with -G1; disabled");
 233       } else {
 234           oldest_gen->steps[0].is_compacted = 1;
 235       }
 236   }
 237
 238   generations[0].max_blocks = 0;
 239   g0s0 = &generations[0].steps[0];
 240
 241   /* The allocation area.  Policy: keep the allocation area
 242    * small to begin with, even if we have a large suggested heap
 243    * size.  Reason: we're going to do a major collection first, and we
 244    * don't want it to be a big one.  This vague idea is borne out by
 245    * rigorous experimental evidence.
 246    */
 247   allocNurseries();
 248
 249   weak_ptr_list = NULL;
 250   caf_list = NULL;
 251   revertible_caf_list = NULL;
 252
 253   /* initialise the allocate() interface */
 254   alloc_blocks = 0;
 255   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 256
 257   /* Tell GNU multi-precision pkg about our custom alloc functions */
 258   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 259
 260 #ifdef THREADED_RTS
 261   initSpinLock(&gc_alloc_block_sync);
 262   initSpinLock(&recordMutableGen_sync);
 263   whitehole_spin = 0;
 264 #endif
 265
 266   IF_DEBUG(gc, statDescribeGens());
 267
 268   RELEASE_SM_LOCK;
 269 }
 270
 271 void
 272 exitStorage (void)
 273 {
 274     stat_exit(calcAllocated());
 275 }
 276
 277 void
 278 freeStorage (void)
 279 {
 280     stgFree(g0s0); // frees all the steps
 281     stgFree(generations);
 282     freeAllMBlocks();
 283 #if defined(THREADED_RTS)
 284     closeMutex(&sm_mutex);
 285     closeMutex(&atomic_modify_mutvar_mutex);
 286 #endif
 287     stgFree(nurseries);
 288 }
 289
 290 /* -----------------------------------------------------------------------------
 291    CAF management.
 292
 293    The entry code for every CAF does the following:
 294
 295       - builds a CAF_BLACKHOLE in the heap
 296       - pushes an update frame pointing to the CAF_BLACKHOLE
 297       - invokes UPD_CAF(), which:
 298           - calls newCaf, below
 299           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 300
 301    Why do we build a BLACKHOLE in the heap rather than just updating
 302    the thunk directly?  It's so that we only need one kind of update
 303    frame - otherwise we'd need a static version of the update frame too.
 304
 305    newCaf() does the following:
 306
 307       - it puts the CAF on the oldest generation's mut-once list.
 308         This is so that we can treat the CAF as a root when collecting
 309         younger generations.
 310
 311    For GHCI, we have additional requirements when dealing with CAFs:
 312
 313       - we must *retain* all dynamically-loaded CAFs ever entered,
 314         just in case we need them again.
 315       - we must be able to *revert* CAFs that have been evaluated, to
 316         their pre-evaluated form.
 317
 318       To do this, we use an additional CAF list.  When newCaf() is
 319       called on a dynamically-loaded CAF, we add it to the CAF list
 320       instead of the old-generation mutable list, and save away its
 321       old info pointer (in caf->saved_info) for later reversion.
 322
 323       To revert all the CAFs, we traverse the CAF list and reset the
 324       info pointer to caf->saved_info, then throw away the CAF list.
 325       (see GC.c:revertCAFs()).
 326
 327       -- SDM 29/1/01
 328
 329    -------------------------------------------------------------------------- */
 330
 331 void
 332 newCAF(StgClosure* caf)
 333 {
 334   ACQUIRE_SM_LOCK;
 335
 336   if(keepCAFs)
 337   {
 338     // HACK:
 339     // If we are in GHCi _and_ we are using dynamic libraries,
 340     // then we can't redirect newCAF calls to newDynCAF (see below),
 341     // so we make newCAF behave almost like newDynCAF.
 342     // The dynamic libraries might be used by both the interpreted
 343     // program and GHCi itself, so they must not be reverted.
 344     // This also means that in GHCi with dynamic libraries, CAFs are not
 345     // garbage collected. If this turns out to be a problem, we could
 346     // do another hack here and do an address range test on caf to figure
 347     // out whether it is from a dynamic library.
 348     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 349     ((StgIndStatic *)caf)->static_link = caf_list;
 350     caf_list = caf;
 351   }
 352   else
 353   {
 354     /* Put this CAF on the mutable list for the old generation.
 355     * This is a HACK - the IND_STATIC closure doesn't really have
 356     * a mut_link field, but we pretend it has - in fact we re-use
 357     * the STATIC_LINK field for the time being, because when we
 358     * come to do a major GC we won't need the mut_link field
 359     * any more and can use it as a STATIC_LINK.
 360     */
 361     ((StgIndStatic *)caf)->saved_info = NULL;
 362     recordMutableGen(caf, oldest_gen);
 363   }
 364
 365   RELEASE_SM_LOCK;
 366 }
 367
 368 // An alternate version of newCaf which is used for dynamically loaded
 369 // object code in GHCi.  In this case we want to retain *all* CAFs in
 370 // the object code, because they might be demanded at any time from an
 371 // expression evaluated on the command line.
 372 // Also, GHCi might want to revert CAFs, so we add these to the
 373 // revertible_caf_list.
 374 //
 375 // The linker hackily arranges that references to newCaf from dynamic
 376 // code end up pointing to newDynCAF.
 377 void
 378 newDynCAF(StgClosure *caf)
 379 {
 380     ACQUIRE_SM_LOCK;
 381
 382     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 383     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 384     revertible_caf_list = caf;
 385
 386     RELEASE_SM_LOCK;
 387 }
 388
 389 /* -----------------------------------------------------------------------------
 390    Nursery management.
 391    -------------------------------------------------------------------------- */
 392
 393 static bdescr *
 394 allocNursery (step *stp, bdescr *tail, nat blocks)
 395 {
 396     bdescr *bd;
 397     nat i;
 398
 399     // Allocate a nursery: we allocate fresh blocks one at a time and
 400     // cons them on to the front of the list, not forgetting to update
 401     // the back pointer on the tail of the list to point to the new block.
 402     for (i=0; i < blocks; i++) {
 403         // @LDV profiling
 404         /*
 405           processNursery() in LdvProfile.c assumes that every block group in
 406           the nursery contains only a single block. So, if a block group is
 407           given multiple blocks, change processNursery() accordingly.
 408         */
 409         bd = allocBlock();
 410         bd->link = tail;
 411         // double-link the nursery: we might need to insert blocks
 412         if (tail != NULL) {
 413             tail->u.back = bd;
 414         }
 415         bd->step = stp;
 416         bd->gen_no = 0;
 417         bd->flags = 0;
 418         bd->free = bd->start;
 419         tail = bd;
 420     }
 421     tail->u.back = NULL;
 422     return tail;
 423 }
 424
 425 static void
 426 assignNurseriesToCapabilities (void)
 427 {
 428 #ifdef THREADED_RTS
 429     nat i;
 430
 431     for (i = 0; i < n_nurseries; i++) {
 432         capabilities[i].r.rNursery        = &nurseries[i];
 433         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 434         capabilities[i].r.rCurrentAlloc   = NULL;
 435     }
 436 #else /* THREADED_RTS */
 437     MainCapability.r.rNursery        = &nurseries[0];
 438     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 439     MainCapability.r.rCurrentAlloc   = NULL;
 440 #endif
 441 }
 442
 443 void
 444 allocNurseries( void )
 445 {
 446     nat i;
 447
 448     for (i = 0; i < n_nurseries; i++) {
 449         nurseries[i].blocks =
 450             allocNursery(&nurseries[i], NULL,
 451                          RtsFlags.GcFlags.minAllocAreaSize);
 452         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 453         nurseries[i].old_blocks   = NULL;
 454         nurseries[i].n_old_blocks = 0;
 455     }
 456     assignNurseriesToCapabilities();
 457 }
 458
 459 void
 460 resetNurseries( void )
 461 {
 462     nat i;
 463     bdescr *bd;
 464     step *stp;
 465
 466     for (i = 0; i < n_nurseries; i++) {
 467         stp = &nurseries[i];
 468         for (bd = stp->blocks; bd; bd = bd->link) {
 469             bd->free = bd->start;
 470             ASSERT(bd->gen_no == 0);
 471             ASSERT(bd->step == stp);
 472             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 473         }
 474     }
 475     assignNurseriesToCapabilities();
 476 }
 477
 478 lnat
 479 countNurseryBlocks (void)
 480 {
 481     nat i;
 482     lnat blocks = 0;
 483
 484     for (i = 0; i < n_nurseries; i++) {
 485         blocks += nurseries[i].n_blocks;
 486     }
 487     return blocks;
 488 }
 489
 490 static void
 491 resizeNursery ( step *stp, nat blocks )
 492 {
 493   bdescr *bd;
 494   nat nursery_blocks;
 495
 496   nursery_blocks = stp->n_blocks;
 497   if (nursery_blocks == blocks) return;
 498
 499   if (nursery_blocks < blocks) {
 500       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
 501                  blocks);
 502     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 503   }
 504   else {
 505     bdescr *next_bd;
 506
 507     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
 508                blocks);
 509
 510     bd = stp->blocks;
 511     while (nursery_blocks > blocks) {
 512         next_bd = bd->link;
 513         next_bd->u.back = NULL;
 514         nursery_blocks -= bd->blocks; // might be a large block
 515         freeGroup(bd);
 516         bd = next_bd;
 517     }
 518     stp->blocks = bd;
 519     // might have gone just under, by freeing a large block, so make
 520     // up the difference.
 521     if (nursery_blocks < blocks) {
 522         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 523     }
 524   }
 525
 526   stp->n_blocks = blocks;
 527   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 528 }
 529
 530 //
 531 // Resize each of the nurseries to the specified size.
 532 //
 533 void
 534 resizeNurseriesFixed (nat blocks)
 535 {
 536     nat i;
 537     for (i = 0; i < n_nurseries; i++) {
 538         resizeNursery(&nurseries[i], blocks);
 539     }
 540 }
 541
 542 //
 543 // Resize the nurseries to the total specified size.
 544 //
 545 void
 546 resizeNurseries (nat blocks)
 547 {
 548     // If there are multiple nurseries, then we just divide the number
 549     // of available blocks between them.
 550     resizeNurseriesFixed(blocks / n_nurseries);
 551 }
 552
 553 /* -----------------------------------------------------------------------------
 554    The allocate() interface
 555
 556    allocateInGen() function allocates memory directly into a specific
 557    generation.  It always succeeds, and returns a chunk of memory n
 558    words long.  n can be larger than the size of a block if necessary,
 559    in which case a contiguous block group will be allocated.
 560
 561    allocate(n) is equivalent to allocateInGen(g0).
 562    -------------------------------------------------------------------------- */
 563
 564 StgPtr
 565 allocateInGen (generation *g, nat n)
 566 {
 567     step *stp;
 568     bdescr *bd;
 569     StgPtr ret;
 570
 571     ACQUIRE_SM_LOCK;
 572
 573     TICK_ALLOC_HEAP_NOCTR(n);
 574     CCS_ALLOC(CCCS,n);
 575
 576     stp = &g->steps[0];
 577
 578     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
 579     {
 580         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 581
 582         // Attempting to allocate an object larger than maxHeapSize
 583         // should definitely be disallowed.  (bug #1791)
 584         if (RtsFlags.GcFlags.maxHeapSize > 0 &&
 585             req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
 586             heapOverflow();
 587         }
 588
 589         bd = allocGroup(req_blocks);
 590         dbl_link_onto(bd, &stp->large_objects);
 591         stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
 592         bd->gen_no  = g->no;
 593         bd->step = stp;
 594         bd->flags = BF_LARGE;
 595         bd->free = bd->start + n;
 596         ret = bd->start;
 597     }
 598     else
 599     {
 600         // small allocation (<LARGE_OBJECT_THRESHOLD) */
 601         bd = stp->blocks;
 602         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 603             bd = allocBlock();
 604             bd->gen_no = g->no;
 605             bd->step = stp;
 606             bd->flags = 0;
 607             bd->link = stp->blocks;
 608             stp->blocks = bd;
 609             stp->n_blocks++;
 610             alloc_blocks++;
 611         }
 612         ret = bd->free;
 613         bd->free += n;
 614     }
 615
 616     RELEASE_SM_LOCK;
 617
 618     return ret;
 619 }
 620
 621 StgPtr
 622 allocate (nat n)
 623 {
 624     return allocateInGen(g0,n);
 625 }
 626
 627 lnat
 628 allocatedBytes( void )
 629 {
 630     lnat allocated;
 631
 632     allocated = alloc_blocks * BLOCK_SIZE_W;
 633     if (pinned_object_block != NULL) {
 634         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 635             pinned_object_block->free;
 636     }
 637
 638     return allocated;
 639 }
 640
 641 // split N blocks off the start of the given bdescr, returning the
 642 // remainder as a new block group.  We treat the remainder as if it
 643 // had been freshly allocated in generation 0.
 644 bdescr *
 645 splitLargeBlock (bdescr *bd, nat blocks)
 646 {
 647     bdescr *new_bd;
 648
 649     // subtract the original number of blocks from the counter first
 650     bd->step->n_large_blocks -= bd->blocks;
 651
 652     new_bd = splitBlockGroup (bd, blocks);
 653
 654     dbl_link_onto(new_bd, &g0s0->large_objects);
 655     g0s0->n_large_blocks += new_bd->blocks;
 656     new_bd->gen_no  = g0s0->no;
 657     new_bd->step    = g0s0;
 658     new_bd->flags   = BF_LARGE;
 659     new_bd->free    = bd->free;
 660
 661     // add the new number of blocks to the counter.  Due to the gaps
 662     // for block descriptor, new_bd->blocks + bd->blocks might not be
 663     // equal to the original bd->blocks, which is why we do it this way.
 664     bd->step->n_large_blocks += bd->blocks;
 665
 666     return new_bd;
 667 }
 668
 669 /* -----------------------------------------------------------------------------
 670    allocateLocal()
 671
 672    This allocates memory in the current thread - it is intended for
 673    use primarily from STG-land where we have a Capability.  It is
 674    better than allocate() because it doesn't require taking the
 675    sm_mutex lock in the common case.
 676
 677    Memory is allocated directly from the nursery if possible (but not
 678    from the current nursery block, so as not to interfere with
 679    Hp/HpLim).
 680    -------------------------------------------------------------------------- */
 681
 682 StgPtr
 683 allocateLocal (Capability *cap, nat n)
 684 {
 685     bdescr *bd;
 686     StgPtr p;
 687
 688     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 689         return allocateInGen(g0,n);
 690     }
 691
 692     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 693
 694     TICK_ALLOC_HEAP_NOCTR(n);
 695     CCS_ALLOC(CCCS,n);
 696
 697     bd = cap->r.rCurrentAlloc;
 698     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 699
 700         // The CurrentAlloc block is full, we need to find another
 701         // one.  First, we try taking the next block from the
 702         // nursery:
 703         bd = cap->r.rCurrentNursery->link;
 704
 705         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 706             // The nursery is empty, or the next block is already
 707             // full: allocate a fresh block (we can't fail here).
 708             ACQUIRE_SM_LOCK;
 709             bd = allocBlock();
 710             cap->r.rNursery->n_blocks++;
 711             RELEASE_SM_LOCK;
 712             bd->gen_no = 0;
 713             bd->step = cap->r.rNursery;
 714             bd->flags = 0;
 715             // NO: alloc_blocks++;
 716             // calcAllocated() uses the size of the nursery, and we've
 717             // already bumpted nursery->n_blocks above.
 718         } else {
 719             // we have a block in the nursery: take it and put
 720             // it at the *front* of the nursery list, and use it
 721             // to allocate() from.
 722             cap->r.rCurrentNursery->link = bd->link;
 723             if (bd->link != NULL) {
 724                 bd->link->u.back = cap->r.rCurrentNursery;
 725             }
 726         }
 727         dbl_link_onto(bd, &cap->r.rNursery->blocks);
 728         cap->r.rCurrentAlloc = bd;
 729         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 730     }
 731     p = bd->free;
 732     bd->free += n;
 733     return p;
 734 }
 735
 736 /* ---------------------------------------------------------------------------
 737    Allocate a fixed/pinned object.
 738
 739    We allocate small pinned objects into a single block, allocating a
 740    new block when the current one overflows.  The block is chained
 741    onto the large_object_list of generation 0 step 0.
 742
 743    NOTE: The GC can't in general handle pinned objects.  This
 744    interface is only safe to use for ByteArrays, which have no
 745    pointers and don't require scavenging.  It works because the
 746    block's descriptor has the BF_LARGE flag set, so the block is
 747    treated as a large object and chained onto various lists, rather
 748    than the individual objects being copied.  However, when it comes
 749    to scavenge the block, the GC will only scavenge the first object.
 750    The reason is that the GC can't linearly scan a block of pinned
 751    objects at the moment (doing so would require using the
 752    mostly-copying techniques).  But since we're restricting ourselves
 753    to pinned ByteArrays, not scavenging is ok.
 754
 755    This function is called by newPinnedByteArray# which immediately
 756    fills the allocated memory with a MutableByteArray#.
 757    ------------------------------------------------------------------------- */
 758
 759 StgPtr
 760 allocatePinned( nat n )
 761 {
 762     StgPtr p;
 763     bdescr *bd = pinned_object_block;
 764
 765     // If the request is for a large object, then allocate()
 766     // will give us a pinned object anyway.
 767     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 768         return allocate(n);
 769     }
 770
 771     ACQUIRE_SM_LOCK;
 772
 773     TICK_ALLOC_HEAP_NOCTR(n);
 774     CCS_ALLOC(CCCS,n);
 775
 776     // we always return 8-byte aligned memory.  bd->free must be
 777     // 8-byte aligned to begin with, so we just round up n to
 778     // the nearest multiple of 8 bytes.
 779     if (sizeof(StgWord) == 4) {
 780         n = (n+1) & ~1;
 781     }
 782
 783     // If we don't have a block of pinned objects yet, or the current
 784     // one isn't large enough to hold the new object, allocate a new one.
 785     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 786         pinned_object_block = bd = allocBlock();
 787         dbl_link_onto(bd, &g0s0->large_objects);
 788         g0s0->n_large_blocks++;
 789         bd->gen_no = 0;
 790         bd->step   = g0s0;
 791         bd->flags  = BF_PINNED | BF_LARGE;
 792         bd->free   = bd->start;
 793         alloc_blocks++;
 794     }
 795
 796     p = bd->free;
 797     bd->free += n;
 798     RELEASE_SM_LOCK;
 799     return p;
 800 }
 801
 802 /* -----------------------------------------------------------------------------
 803    Write Barriers
 804    -------------------------------------------------------------------------- */
 805
 806 /*
 807    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 808    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 809    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 810    and is put on the mutable list.
 811 */
 812 void
 813 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 814 {
 815     Capability *cap = regTableToCapability(reg);
 816     bdescr *bd;
 817     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 818         p->header.info = &stg_MUT_VAR_DIRTY_info;
 819         bd = Bdescr((StgPtr)p);
 820         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 821     }
 822 }
 823
 824 /*
 825    This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
 826    on the mutable list; a MVAR_DIRTY is.  When written to, a
 827    MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
 828    The check for MVAR_CLEAN is inlined at the call site for speed,
 829    this really does make a difference on concurrency-heavy benchmarks
 830    such as Chaneneos and cheap-concurrency.
 831 */
 832 void
 833 dirty_MVAR(StgRegTable *reg, StgClosure *p)
 834 {
 835     Capability *cap = regTableToCapability(reg);
 836     bdescr *bd;
 837     bd = Bdescr((StgPtr)p);
 838     if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 839 }
 840
 841 /* -----------------------------------------------------------------------------
 842    Allocation functions for GMP.
 843
 844    These all use the allocate() interface - we can't have any garbage
 845    collection going on during a gmp operation, so we use allocate()
 846    which always succeeds.  The gmp operations which might need to
 847    allocate will ask the storage manager (via doYouWantToGC()) whether
 848    a garbage collection is required, in case we get into a loop doing
 849    only allocate() style allocation.
 850    -------------------------------------------------------------------------- */
 851
 852 static void *
 853 stgAllocForGMP (size_t size_in_bytes)
 854 {
 855   StgArrWords* arr;
 856   nat data_size_in_words, total_size_in_words;
 857
 858   /* round up to a whole number of words */
 859   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 860   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 861
 862   /* allocate and fill it in. */
 863 #if defined(THREADED_RTS)
 864   arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
 865 #else
 866   arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
 867 #endif
 868   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 869
 870   /* and return a ptr to the goods inside the array */
 871   return arr->payload;
 872 }
 873
 874 static void *
 875 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 876 {
 877     void *new_stuff_ptr = stgAllocForGMP(new_size);
 878     nat i = 0;
 879     char *p = (char *) ptr;
 880     char *q = (char *) new_stuff_ptr;
 881
 882     for (; i < old_size; i++, p++, q++) {
 883         *q = *p;
 884     }
 885
 886     return(new_stuff_ptr);
 887 }
 888
 889 static void
 890 stgDeallocForGMP (void *ptr STG_UNUSED,
 891                   size_t size STG_UNUSED)
 892 {
 893     /* easy for us: the garbage collector does the dealloc'n */
 894 }
 895
 896 /* -----------------------------------------------------------------------------
 897  * Stats and stuff
 898  * -------------------------------------------------------------------------- */
 899
 900 /* -----------------------------------------------------------------------------
 901  * calcAllocated()
 902  *
 903  * Approximate how much we've allocated: number of blocks in the
 904  * nursery + blocks allocated via allocate() - unused nusery blocks.
 905  * This leaves a little slop at the end of each block, and doesn't
 906  * take into account large objects (ToDo).
 907  * -------------------------------------------------------------------------- */
 908
 909 lnat
 910 calcAllocated( void )
 911 {
 912   nat allocated;
 913   bdescr *bd;
 914
 915   allocated = allocatedBytes();
 916   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 917
 918   {
 919 #ifdef THREADED_RTS
 920   nat i;
 921   for (i = 0; i < n_nurseries; i++) {
 922       Capability *cap;
 923       for ( bd = capabilities[i].r.rCurrentNursery->link;
 924             bd != NULL; bd = bd->link ) {
 925           allocated -= BLOCK_SIZE_W;
 926       }
 927       cap = &capabilities[i];
 928       if (cap->r.rCurrentNursery->free <
 929           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 930           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 931               - cap->r.rCurrentNursery->free;
 932       }
 933   }
 934 #else
 935   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 936
 937   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 938       allocated -= BLOCK_SIZE_W;
 939   }
 940   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 941       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 942           - current_nursery->free;
 943   }
 944 #endif
 945   }
 946
 947   total_allocated += allocated;
 948   return allocated;
 949 }
 950
 951 /* Approximate the amount of live data in the heap.  To be called just
 952  * after garbage collection (see GarbageCollect()).
 953  */
 954 lnat
 955 calcLiveBlocks(void)
 956 {
 957   nat g, s;
 958   lnat live = 0;
 959   step *stp;
 960
 961   if (RtsFlags.GcFlags.generations == 1) {
 962       return g0s0->n_large_blocks + g0s0->n_blocks;
 963   }
 964
 965   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 966     for (s = 0; s < generations[g].n_steps; s++) {
 967       /* approximate amount of live data (doesn't take into account slop
 968        * at end of each block).
 969        */
 970       if (g == 0 && s == 0) {
 971           continue;
 972       }
 973       stp = &generations[g].steps[s];
 974       live += stp->n_large_blocks + stp->n_blocks;
 975     }
 976   }
 977   return live;
 978 }
 979
 980 lnat
 981 countOccupied(bdescr *bd)
 982 {
 983     lnat words;
 984
 985     words = 0;
 986     for (; bd != NULL; bd = bd->link) {
 987         words += bd->free - bd->start;
 988     }
 989     return words;
 990 }
 991
 992 // Return an accurate count of the live data in the heap, excluding
 993 // generation 0.
 994 lnat
 995 calcLiveWords(void)
 996 {
 997     nat g, s;
 998     lnat live;
 999     step *stp;
1000
1001     if (RtsFlags.GcFlags.generations == 1) {
1002         return countOccupied(g0s0->blocks) + countOccupied(g0s0->large_objects);
1003     }
1004
1005     live = 0;
1006     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1007         for (s = 0; s < generations[g].n_steps; s++) {
1008             if (g == 0 && s == 0) continue;
1009             stp = &generations[g].steps[s];
1010             live += countOccupied(stp->blocks) +
1011                     countOccupied(stp->large_objects);
1012         }
1013     }
1014     return live;
1015 }
1016
1017 /* Approximate the number of blocks that will be needed at the next
1018  * garbage collection.
1019  *
1020  * Assume: all data currently live will remain live.  Steps that will
1021  * be collected next time will therefore need twice as many blocks
1022  * since all the data will be copied.
1023  */
1024 extern lnat
1025 calcNeeded(void)
1026 {
1027     lnat needed = 0;
1028     nat g, s;
1029     step *stp;
1030
1031     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1032         for (s = 0; s < generations[g].n_steps; s++) {
1033             if (g == 0 && s == 0) { continue; }
1034             stp = &generations[g].steps[s];
1035             if (g == 0 || // always collect gen 0
1036                 (generations[g].steps[0].n_blocks +
1037                  generations[g].steps[0].n_large_blocks
1038                  > generations[g].max_blocks
1039                  && stp->is_compacted == 0)) {
1040                 needed += 2 * stp->n_blocks + stp->n_large_blocks;
1041             } else {
1042                 needed += stp->n_blocks + stp->n_large_blocks;
1043             }
1044         }
1045     }
1046     return needed;
1047 }
1048
1049 /* ----------------------------------------------------------------------------
1050    Executable memory
1051
1052    Executable memory must be managed separately from non-executable
1053    memory.  Most OSs these days require you to jump through hoops to
1054    dynamically allocate executable memory, due to various security
1055    measures.
1056
1057    Here we provide a small memory allocator for executable memory.
1058    Memory is managed with a page granularity; we allocate linearly
1059    in the page, and when the page is emptied (all objects on the page
1060    are free) we free the page again, not forgetting to make it
1061    non-executable.
1062
1063    TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1064          the linker cannot use allocateExec for loading object code files
1065          on Windows. Once allocateExec can handle larger objects, the linker
1066          should be modified to use allocateExec instead of VirtualAlloc.
1067    ------------------------------------------------------------------------- */
1068
1069 static bdescr *exec_block;
1070
1071 void *allocateExec (nat bytes)
1072 {
1073     void *ret;
1074     nat n;
1075
1076     ACQUIRE_SM_LOCK;
1077
1078     // round up to words.
1079     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
1080
1081     if (n+1 > BLOCK_SIZE_W) {
1082         barf("allocateExec: can't handle large objects");
1083     }
1084
1085     if (exec_block == NULL ||
1086         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1087         bdescr *bd;
1088         lnat pagesize = getPageSize();
1089         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1090         debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1091         bd->gen_no = 0;
1092         bd->flags = BF_EXEC;
1093         bd->link = exec_block;
1094         if (exec_block != NULL) {
1095             exec_block->u.back = bd;
1096         }
1097         bd->u.back = NULL;
1098         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1099         exec_block = bd;
1100     }
1101     *(exec_block->free) = n;  // store the size of this chunk
1102     exec_block->gen_no += n;  // gen_no stores the number of words allocated
1103     ret = exec_block->free + 1;
1104     exec_block->free += n + 1;
1105
1106     RELEASE_SM_LOCK
1107     return ret;
1108 }
1109
1110 void freeExec (void *addr)
1111 {
1112     StgPtr p = (StgPtr)addr - 1;
1113     bdescr *bd = Bdescr((StgPtr)p);
1114
1115     if ((bd->flags & BF_EXEC) == 0) {
1116         barf("freeExec: not executable");
1117     }
1118
1119     if (*(StgPtr)p == 0) {
1120         barf("freeExec: already free?");
1121     }
1122
1123     ACQUIRE_SM_LOCK;
1124
1125     bd->gen_no -= *(StgPtr)p;
1126     *(StgPtr)p = 0;
1127
1128     if (bd->gen_no == 0) {
1129         // Free the block if it is empty, but not if it is the block at
1130         // the head of the queue.
1131         if (bd != exec_block) {
1132             debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1133             dbl_link_remove(bd, &exec_block);
1134             setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1135             freeGroup(bd);
1136         } else {
1137             bd->free = bd->start;
1138         }
1139     }
1140
1141     RELEASE_SM_LOCK
1142 }
1143
1144 /* -----------------------------------------------------------------------------
1145    Debugging
1146
1147    memInventory() checks for memory leaks by counting up all the
1148    blocks we know about and comparing that to the number of blocks
1149    allegedly floating around in the system.
1150    -------------------------------------------------------------------------- */
1151
1152 #ifdef DEBUG
1153
1154 // Useful for finding partially full blocks in gdb
1155 void findSlop(bdescr *bd);
1156 void findSlop(bdescr *bd)
1157 {
1158     lnat slop;
1159
1160     for (; bd != NULL; bd = bd->link) {
1161         slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1162         if (slop > (1024/sizeof(W_))) {
1163             debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1164                        bd->start, bd, slop / (1024/sizeof(W_)));
1165         }
1166     }
1167 }
1168
1169 nat
1170 countBlocks(bdescr *bd)
1171 {
1172     nat n;
1173     for (n=0; bd != NULL; bd=bd->link) {
1174         n += bd->blocks;
1175     }
1176     return n;
1177 }
1178
1179 // (*1) Just like countBlocks, except that we adjust the count for a
1180 // megablock group so that it doesn't include the extra few blocks
1181 // that would be taken up by block descriptors in the second and
1182 // subsequent megablock.  This is so we can tally the count with the
1183 // number of blocks allocated in the system, for memInventory().
1184 static nat
1185 countAllocdBlocks(bdescr *bd)
1186 {
1187     nat n;
1188     for (n=0; bd != NULL; bd=bd->link) {
1189         n += bd->blocks;
1190         // hack for megablock groups: see (*1) above
1191         if (bd->blocks > BLOCKS_PER_MBLOCK) {
1192             n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1193                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1194         }
1195     }
1196     return n;
1197 }
1198
1199 static lnat
1200 stepBlocks (step *stp)
1201 {
1202     ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1203     ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1204     return stp->n_blocks + stp->n_old_blocks +
1205             countAllocdBlocks(stp->large_objects);
1206 }
1207
1208 void
1209 memInventory (rtsBool show)
1210 {
1211   nat g, s, i;
1212   step *stp;
1213   lnat gen_blocks[RtsFlags.GcFlags.generations];
1214   lnat nursery_blocks, retainer_blocks,
1215        arena_blocks, exec_blocks;
1216   lnat live_blocks = 0, free_blocks = 0;
1217   rtsBool leak;
1218
1219   // count the blocks we current have
1220
1221   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1222       gen_blocks[g] = 0;
1223       for (i = 0; i < n_capabilities; i++) {
1224           gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1225       }
1226       gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1227       for (s = 0; s < generations[g].n_steps; s++) {
1228           stp = &generations[g].steps[s];
1229           gen_blocks[g] += stepBlocks(stp);
1230       }
1231   }
1232
1233   nursery_blocks = 0;
1234   for (i = 0; i < n_nurseries; i++) {
1235       nursery_blocks += stepBlocks(&nurseries[i]);
1236   }
1237
1238   retainer_blocks = 0;
1239 #ifdef PROFILING
1240   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1241       retainer_blocks = retainerStackBlocks();
1242   }
1243 #endif
1244
1245   // count the blocks allocated by the arena allocator
1246   arena_blocks = arenaBlocks();
1247
1248   // count the blocks containing executable memory
1249   exec_blocks = countAllocdBlocks(exec_block);
1250
1251   /* count the blocks on the free list */
1252   free_blocks = countFreeList();
1253
1254   live_blocks = 0;
1255   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1256       live_blocks += gen_blocks[g];
1257   }
1258   live_blocks += nursery_blocks +
1259                + retainer_blocks + arena_blocks + exec_blocks;
1260
1261 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1262
1263   leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1264   if (show || leak)
1265   {
1266       if (leak) {
1267           debugBelch("Memory leak detected:\n");
1268       } else {
1269           debugBelch("Memory inventory:\n");
1270       }
1271       for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1272           debugBelch("  gen %d blocks : %5lu blocks (%lu MB)\n", g,
1273                      gen_blocks[g], MB(gen_blocks[g]));
1274       }
1275       debugBelch("  nursery      : %5lu blocks (%lu MB)\n",
1276                  nursery_blocks, MB(nursery_blocks));
1277       debugBelch("  retainer     : %5lu blocks (%lu MB)\n",
1278                  retainer_blocks, MB(retainer_blocks));
1279       debugBelch("  arena blocks : %5lu blocks (%lu MB)\n",
1280                  arena_blocks, MB(arena_blocks));
1281       debugBelch("  exec         : %5lu blocks (%lu MB)\n",
1282                  exec_blocks, MB(exec_blocks));
1283       debugBelch("  free         : %5lu blocks (%lu MB)\n",
1284                  free_blocks, MB(free_blocks));
1285       debugBelch("  total        : %5lu blocks (%lu MB)\n",
1286                  live_blocks + free_blocks, MB(live_blocks+free_blocks));
1287       if (leak) {
1288           debugBelch("\n  in system    : %5lu blocks (%lu MB)\n",
1289                      mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1290       }
1291   }
1292 }
1293
1294
1295 /* Full heap sanity check. */
1296 void
1297 checkSanity( void )
1298 {
1299     nat g, s;
1300
1301     if (RtsFlags.GcFlags.generations == 1) {
1302         checkHeap(g0s0->blocks);
1303         checkChain(g0s0->large_objects);
1304     } else {
1305
1306         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1307             for (s = 0; s < generations[g].n_steps; s++) {
1308                 if (g == 0 && s == 0) { continue; }
1309                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1310                        == generations[g].steps[s].n_blocks);
1311                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1312                        == generations[g].steps[s].n_large_blocks);
1313                 checkHeap(generations[g].steps[s].blocks);
1314                 checkChain(generations[g].steps[s].large_objects);
1315                 if (g > 0) {
1316                     checkMutableList(generations[g].mut_list, g);
1317                 }
1318             }
1319         }
1320
1321         for (s = 0; s < n_nurseries; s++) {
1322             ASSERT(countBlocks(nurseries[s].blocks)
1323                    == nurseries[s].n_blocks);
1324             ASSERT(countBlocks(nurseries[s].large_objects)
1325                    == nurseries[s].n_large_blocks);
1326         }
1327
1328         checkFreeListSanity();
1329     }
1330 }
1331
1332 /* Nursery sanity check */
1333 void
1334 checkNurserySanity( step *stp )
1335 {
1336     bdescr *bd, *prev;
1337     nat blocks = 0;
1338
1339     prev = NULL;
1340     for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1341         ASSERT(bd->u.back == prev);
1342         prev = bd;
1343         blocks += bd->blocks;
1344     }
1345     ASSERT(blocks == stp->n_blocks);
1346 }
1347
1348 // handy function for use in gdb, because Bdescr() is inlined.
1349 extern bdescr *_bdescr( StgPtr p );
1350
1351 bdescr *
1352 _bdescr( StgPtr p )
1353 {
1354     return Bdescr(p);
1355 }
1356
1357 #endif