rts/sm/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2006
   4  *
   5  * Storage manager front end
   6  *
   7  * Documentation on the architecture of the Storage Manager can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsUtils.h"
  17 #include "RtsFlags.h"
  18 #include "Stats.h"
  19 #include "Hooks.h"
  20 #include "BlockAlloc.h"
  21 #include "MBlock.h"
  22 #include "Weak.h"
  23 #include "Sanity.h"
  24 #include "Arena.h"
  25 #include "OSThreads.h"
  26 #include "Capability.h"
  27 #include "Storage.h"
  28 #include "Schedule.h"
  29 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  30 #include "OSMem.h"
  31 #include "Trace.h"
  32 #include "GC.h"
  33 #include "GCUtils.h"
  34 #include "Evac.h"
  35
  36 #include <stdlib.h>
  37 #include <string.h>
  38
  39 /*
  40  * All these globals require sm_mutex to access in THREADED_RTS mode.
  41  */
  42 StgClosure    *caf_list         = NULL;
  43 StgClosure    *revertible_caf_list = NULL;
  44 rtsBool       keepCAFs;
  45
  46 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  47 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  48 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  49
  50 generation *generations = NULL; /* all the generations */
  51 generation *g0          = NULL; /* generation 0, for convenience */
  52 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  53 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  54
  55 nat total_steps         = 0;
  56 step *all_steps         = NULL; /* single array of steps */
  57
  58 ullong total_allocated = 0;     /* total memory allocated during run */
  59
  60 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  61 step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
  62
  63 #ifdef THREADED_RTS
  64 /*
  65  * Storage manager mutex:  protects all the above state from
  66  * simultaneous access by two STG threads.
  67  */
  68 Mutex sm_mutex;
  69 /*
  70  * This mutex is used by atomicModifyMutVar# only
  71  */
  72 Mutex atomic_modify_mutvar_mutex;
  73 #endif
  74
  75
  76 /*
  77  * Forward references
  78  */
  79 static void *stgAllocForGMP   (size_t size_in_bytes);
  80 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  81 static void  stgDeallocForGMP (void *ptr, size_t size);
  82
  83 static void
  84 initStep (step *stp, int g, int s)
  85 {
  86     stp->no = s;
  87     stp->abs_no = RtsFlags.GcFlags.steps * g + s;
  88     stp->blocks = NULL;
  89     stp->n_blocks = 0;
  90     stp->n_words = 0;
  91     stp->old_blocks = NULL;
  92     stp->n_old_blocks = 0;
  93     stp->gen = &generations[g];
  94     stp->gen_no = g;
  95     stp->large_objects = NULL;
  96     stp->n_large_blocks = 0;
  97     stp->scavenged_large_objects = NULL;
  98     stp->n_scavenged_large_blocks = 0;
  99     stp->is_compacted = 0;
 100     stp->bitmap = NULL;
 101 #ifdef THREADED_RTS
 102     initSpinLock(&stp->sync_todo);
 103     initSpinLock(&stp->sync_large_objects);
 104 #endif
 105 }
 106
 107 void
 108 initStorage( void )
 109 {
 110   nat g, s;
 111   generation *gen;
 112
 113   if (generations != NULL) {
 114       // multi-init protection
 115       return;
 116   }
 117
 118   initMBlocks();
 119
 120   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 121    * doing something reasonable.
 122    */
 123   /* We use the NOT_NULL variant or gcc warns that the test is always true */
 124   ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL(&stg_BLACKHOLE_info));
 125   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 126   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 127
 128   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 129       RtsFlags.GcFlags.heapSizeSuggestion >
 130       RtsFlags.GcFlags.maxHeapSize) {
 131     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 132   }
 133
 134   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 135       RtsFlags.GcFlags.minAllocAreaSize >
 136       RtsFlags.GcFlags.maxHeapSize) {
 137       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 138       RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
 139   }
 140
 141   initBlockAllocator();
 142
 143 #if defined(THREADED_RTS)
 144   initMutex(&sm_mutex);
 145   initMutex(&atomic_modify_mutvar_mutex);
 146 #endif
 147
 148   ACQUIRE_SM_LOCK;
 149
 150   /* allocate generation info array */
 151   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 152                                              * sizeof(struct generation_),
 153                                              "initStorage: gens");
 154
 155   /* allocate all the steps into an array.  It is important that we do
 156      it this way, because we need the invariant that two step pointers
 157      can be directly compared to see which is the oldest.
 158      Remember that the last generation has only one step. */
 159   total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
 160   all_steps   = stgMallocBytes(total_steps * sizeof(struct step_),
 161                                "initStorage: steps");
 162
 163   /* Initialise all generations */
 164   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 165     gen = &generations[g];
 166     gen->no = g;
 167     gen->mut_list = allocBlock();
 168     gen->collections = 0;
 169     gen->par_collections = 0;
 170     gen->failed_promotions = 0;
 171     gen->max_blocks = 0;
 172   }
 173
 174   /* A couple of convenience pointers */
 175   g0 = &generations[0];
 176   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 177
 178   /* Allocate step structures in each generation */
 179   if (RtsFlags.GcFlags.generations > 1) {
 180     /* Only for multiple-generations */
 181
 182     /* Oldest generation: one step */
 183     oldest_gen->n_steps = 1;
 184     oldest_gen->steps   = all_steps + (RtsFlags.GcFlags.generations - 1)
 185                                       * RtsFlags.GcFlags.steps;
 186
 187     /* set up all except the oldest generation with 2 steps */
 188     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 189       generations[g].n_steps = RtsFlags.GcFlags.steps;
 190       generations[g].steps   = all_steps + g * RtsFlags.GcFlags.steps;
 191     }
 192
 193   } else {
 194     /* single generation, i.e. a two-space collector */
 195     g0->n_steps = 1;
 196     g0->steps   = all_steps;
 197   }
 198
 199 #ifdef THREADED_RTS
 200   n_nurseries = n_capabilities;
 201 #else
 202   n_nurseries = 1;
 203 #endif
 204   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 205                               "initStorage: nurseries");
 206
 207   /* Initialise all steps */
 208   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 209     for (s = 0; s < generations[g].n_steps; s++) {
 210         initStep(&generations[g].steps[s], g, s);
 211     }
 212   }
 213
 214   for (s = 0; s < n_nurseries; s++) {
 215       initStep(&nurseries[s], 0, s);
 216   }
 217
 218   /* Set up the destination pointers in each younger gen. step */
 219   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 220     for (s = 0; s < generations[g].n_steps-1; s++) {
 221       generations[g].steps[s].to = &generations[g].steps[s+1];
 222     }
 223     generations[g].steps[s].to = &generations[g+1].steps[0];
 224   }
 225   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 226
 227   for (s = 0; s < n_nurseries; s++) {
 228       nurseries[s].to = generations[0].steps[0].to;
 229   }
 230
 231   /* The oldest generation has one step. */
 232   if (RtsFlags.GcFlags.compact) {
 233       if (RtsFlags.GcFlags.generations == 1) {
 234           errorBelch("WARNING: compaction is incompatible with -G1; disabled");
 235       } else {
 236           oldest_gen->steps[0].is_compacted = 1;
 237       }
 238   }
 239
 240   generations[0].max_blocks = 0;
 241   g0s0 = &generations[0].steps[0];
 242
 243   /* The allocation area.  Policy: keep the allocation area
 244    * small to begin with, even if we have a large suggested heap
 245    * size.  Reason: we're going to do a major collection first, and we
 246    * don't want it to be a big one.  This vague idea is borne out by
 247    * rigorous experimental evidence.
 248    */
 249   allocNurseries();
 250
 251   weak_ptr_list = NULL;
 252   caf_list = NULL;
 253   revertible_caf_list = NULL;
 254
 255   /* initialise the allocate() interface */
 256   alloc_blocks = 0;
 257   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 258
 259   /* Tell GNU multi-precision pkg about our custom alloc functions */
 260   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 261
 262 #ifdef THREADED_RTS
 263   initSpinLock(&gc_alloc_block_sync);
 264   initSpinLock(&recordMutableGen_sync);
 265   whitehole_spin = 0;
 266 #endif
 267
 268   IF_DEBUG(gc, statDescribeGens());
 269
 270   RELEASE_SM_LOCK;
 271 }
 272
 273 void
 274 exitStorage (void)
 275 {
 276     stat_exit(calcAllocated());
 277 }
 278
 279 void
 280 freeStorage (void)
 281 {
 282     stgFree(g0s0); // frees all the steps
 283     stgFree(generations);
 284     freeAllMBlocks();
 285 #if defined(THREADED_RTS)
 286     closeMutex(&sm_mutex);
 287     closeMutex(&atomic_modify_mutvar_mutex);
 288 #endif
 289     stgFree(nurseries);
 290 }
 291
 292 /* -----------------------------------------------------------------------------
 293    CAF management.
 294
 295    The entry code for every CAF does the following:
 296
 297       - builds a CAF_BLACKHOLE in the heap
 298       - pushes an update frame pointing to the CAF_BLACKHOLE
 299       - invokes UPD_CAF(), which:
 300           - calls newCaf, below
 301           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 302
 303    Why do we build a BLACKHOLE in the heap rather than just updating
 304    the thunk directly?  It's so that we only need one kind of update
 305    frame - otherwise we'd need a static version of the update frame too.
 306
 307    newCaf() does the following:
 308
 309       - it puts the CAF on the oldest generation's mut-once list.
 310         This is so that we can treat the CAF as a root when collecting
 311         younger generations.
 312
 313    For GHCI, we have additional requirements when dealing with CAFs:
 314
 315       - we must *retain* all dynamically-loaded CAFs ever entered,
 316         just in case we need them again.
 317       - we must be able to *revert* CAFs that have been evaluated, to
 318         their pre-evaluated form.
 319
 320       To do this, we use an additional CAF list.  When newCaf() is
 321       called on a dynamically-loaded CAF, we add it to the CAF list
 322       instead of the old-generation mutable list, and save away its
 323       old info pointer (in caf->saved_info) for later reversion.
 324
 325       To revert all the CAFs, we traverse the CAF list and reset the
 326       info pointer to caf->saved_info, then throw away the CAF list.
 327       (see GC.c:revertCAFs()).
 328
 329       -- SDM 29/1/01
 330
 331    -------------------------------------------------------------------------- */
 332
 333 void
 334 newCAF(StgClosure* caf)
 335 {
 336   ACQUIRE_SM_LOCK;
 337
 338   if(keepCAFs)
 339   {
 340     // HACK:
 341     // If we are in GHCi _and_ we are using dynamic libraries,
 342     // then we can't redirect newCAF calls to newDynCAF (see below),
 343     // so we make newCAF behave almost like newDynCAF.
 344     // The dynamic libraries might be used by both the interpreted
 345     // program and GHCi itself, so they must not be reverted.
 346     // This also means that in GHCi with dynamic libraries, CAFs are not
 347     // garbage collected. If this turns out to be a problem, we could
 348     // do another hack here and do an address range test on caf to figure
 349     // out whether it is from a dynamic library.
 350     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 351     ((StgIndStatic *)caf)->static_link = caf_list;
 352     caf_list = caf;
 353   }
 354   else
 355   {
 356     /* Put this CAF on the mutable list for the old generation.
 357     * This is a HACK - the IND_STATIC closure doesn't really have
 358     * a mut_link field, but we pretend it has - in fact we re-use
 359     * the STATIC_LINK field for the time being, because when we
 360     * come to do a major GC we won't need the mut_link field
 361     * any more and can use it as a STATIC_LINK.
 362     */
 363     ((StgIndStatic *)caf)->saved_info = NULL;
 364     recordMutableGen(caf, oldest_gen);
 365   }
 366
 367   RELEASE_SM_LOCK;
 368 }
 369
 370 // An alternate version of newCaf which is used for dynamically loaded
 371 // object code in GHCi.  In this case we want to retain *all* CAFs in
 372 // the object code, because they might be demanded at any time from an
 373 // expression evaluated on the command line.
 374 // Also, GHCi might want to revert CAFs, so we add these to the
 375 // revertible_caf_list.
 376 //
 377 // The linker hackily arranges that references to newCaf from dynamic
 378 // code end up pointing to newDynCAF.
 379 void
 380 newDynCAF(StgClosure *caf)
 381 {
 382     ACQUIRE_SM_LOCK;
 383
 384     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 385     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 386     revertible_caf_list = caf;
 387
 388     RELEASE_SM_LOCK;
 389 }
 390
 391 /* -----------------------------------------------------------------------------
 392    Nursery management.
 393    -------------------------------------------------------------------------- */
 394
 395 static bdescr *
 396 allocNursery (step *stp, bdescr *tail, nat blocks)
 397 {
 398     bdescr *bd;
 399     nat i;
 400
 401     // Allocate a nursery: we allocate fresh blocks one at a time and
 402     // cons them on to the front of the list, not forgetting to update
 403     // the back pointer on the tail of the list to point to the new block.
 404     for (i=0; i < blocks; i++) {
 405         // @LDV profiling
 406         /*
 407           processNursery() in LdvProfile.c assumes that every block group in
 408           the nursery contains only a single block. So, if a block group is
 409           given multiple blocks, change processNursery() accordingly.
 410         */
 411         bd = allocBlock();
 412         bd->link = tail;
 413         // double-link the nursery: we might need to insert blocks
 414         if (tail != NULL) {
 415             tail->u.back = bd;
 416         }
 417         bd->step = stp;
 418         bd->gen_no = 0;
 419         bd->flags = 0;
 420         bd->free = bd->start;
 421         tail = bd;
 422     }
 423     tail->u.back = NULL;
 424     return tail;
 425 }
 426
 427 static void
 428 assignNurseriesToCapabilities (void)
 429 {
 430 #ifdef THREADED_RTS
 431     nat i;
 432
 433     for (i = 0; i < n_nurseries; i++) {
 434         capabilities[i].r.rNursery        = &nurseries[i];
 435         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 436         capabilities[i].r.rCurrentAlloc   = NULL;
 437     }
 438 #else /* THREADED_RTS */
 439     MainCapability.r.rNursery        = &nurseries[0];
 440     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 441     MainCapability.r.rCurrentAlloc   = NULL;
 442 #endif
 443 }
 444
 445 void
 446 allocNurseries( void )
 447 {
 448     nat i;
 449
 450     for (i = 0; i < n_nurseries; i++) {
 451         nurseries[i].blocks =
 452             allocNursery(&nurseries[i], NULL,
 453                          RtsFlags.GcFlags.minAllocAreaSize);
 454         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 455         nurseries[i].old_blocks   = NULL;
 456         nurseries[i].n_old_blocks = 0;
 457     }
 458     assignNurseriesToCapabilities();
 459 }
 460
 461 void
 462 resetNurseries( void )
 463 {
 464     nat i;
 465     bdescr *bd;
 466     step *stp;
 467
 468     for (i = 0; i < n_nurseries; i++) {
 469         stp = &nurseries[i];
 470         for (bd = stp->blocks; bd; bd = bd->link) {
 471             bd->free = bd->start;
 472             ASSERT(bd->gen_no == 0);
 473             ASSERT(bd->step == stp);
 474             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 475         }
 476     }
 477     assignNurseriesToCapabilities();
 478 }
 479
 480 lnat
 481 countNurseryBlocks (void)
 482 {
 483     nat i;
 484     lnat blocks = 0;
 485
 486     for (i = 0; i < n_nurseries; i++) {
 487         blocks += nurseries[i].n_blocks;
 488     }
 489     return blocks;
 490 }
 491
 492 static void
 493 resizeNursery ( step *stp, nat blocks )
 494 {
 495   bdescr *bd;
 496   nat nursery_blocks;
 497
 498   nursery_blocks = stp->n_blocks;
 499   if (nursery_blocks == blocks) return;
 500
 501   if (nursery_blocks < blocks) {
 502       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
 503                  blocks);
 504     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 505   }
 506   else {
 507     bdescr *next_bd;
 508
 509     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
 510                blocks);
 511
 512     bd = stp->blocks;
 513     while (nursery_blocks > blocks) {
 514         next_bd = bd->link;
 515         next_bd->u.back = NULL;
 516         nursery_blocks -= bd->blocks; // might be a large block
 517         freeGroup(bd);
 518         bd = next_bd;
 519     }
 520     stp->blocks = bd;
 521     // might have gone just under, by freeing a large block, so make
 522     // up the difference.
 523     if (nursery_blocks < blocks) {
 524         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 525     }
 526   }
 527
 528   stp->n_blocks = blocks;
 529   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 530 }
 531
 532 //
 533 // Resize each of the nurseries to the specified size.
 534 //
 535 void
 536 resizeNurseriesFixed (nat blocks)
 537 {
 538     nat i;
 539     for (i = 0; i < n_nurseries; i++) {
 540         resizeNursery(&nurseries[i], blocks);
 541     }
 542 }
 543
 544 //
 545 // Resize the nurseries to the total specified size.
 546 //
 547 void
 548 resizeNurseries (nat blocks)
 549 {
 550     // If there are multiple nurseries, then we just divide the number
 551     // of available blocks between them.
 552     resizeNurseriesFixed(blocks / n_nurseries);
 553 }
 554
 555 /* -----------------------------------------------------------------------------
 556    The allocate() interface
 557
 558    allocateInGen() function allocates memory directly into a specific
 559    generation.  It always succeeds, and returns a chunk of memory n
 560    words long.  n can be larger than the size of a block if necessary,
 561    in which case a contiguous block group will be allocated.
 562
 563    allocate(n) is equivalent to allocateInGen(g0).
 564    -------------------------------------------------------------------------- */
 565
 566 StgPtr
 567 allocateInGen (generation *g, nat n)
 568 {
 569     step *stp;
 570     bdescr *bd;
 571     StgPtr ret;
 572
 573     ACQUIRE_SM_LOCK;
 574
 575     TICK_ALLOC_HEAP_NOCTR(n);
 576     CCS_ALLOC(CCCS,n);
 577
 578     stp = &g->steps[0];
 579
 580     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
 581     {
 582         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 583
 584         // Attempting to allocate an object larger than maxHeapSize
 585         // should definitely be disallowed.  (bug #1791)
 586         if (RtsFlags.GcFlags.maxHeapSize > 0 &&
 587             req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
 588             heapOverflow();
 589         }
 590
 591         bd = allocGroup(req_blocks);
 592         dbl_link_onto(bd, &stp->large_objects);
 593         stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
 594         bd->gen_no  = g->no;
 595         bd->step = stp;
 596         bd->flags = BF_LARGE;
 597         bd->free = bd->start + n;
 598         ret = bd->start;
 599     }
 600     else
 601     {
 602         // small allocation (<LARGE_OBJECT_THRESHOLD) */
 603         bd = stp->blocks;
 604         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 605             bd = allocBlock();
 606             bd->gen_no = g->no;
 607             bd->step = stp;
 608             bd->flags = 0;
 609             bd->link = stp->blocks;
 610             stp->blocks = bd;
 611             stp->n_blocks++;
 612             alloc_blocks++;
 613         }
 614         ret = bd->free;
 615         bd->free += n;
 616     }
 617
 618     RELEASE_SM_LOCK;
 619
 620     return ret;
 621 }
 622
 623 StgPtr
 624 allocate (nat n)
 625 {
 626     return allocateInGen(g0,n);
 627 }
 628
 629 lnat
 630 allocatedBytes( void )
 631 {
 632     lnat allocated;
 633
 634     allocated = alloc_blocks * BLOCK_SIZE_W;
 635     if (pinned_object_block != NULL) {
 636         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 637             pinned_object_block->free;
 638     }
 639
 640     return allocated;
 641 }
 642
 643 // split N blocks off the start of the given bdescr, returning the
 644 // remainder as a new block group.  We treat the remainder as if it
 645 // had been freshly allocated in generation 0.
 646 bdescr *
 647 splitLargeBlock (bdescr *bd, nat blocks)
 648 {
 649     bdescr *new_bd;
 650
 651     // subtract the original number of blocks from the counter first
 652     bd->step->n_large_blocks -= bd->blocks;
 653
 654     new_bd = splitBlockGroup (bd, blocks);
 655
 656     dbl_link_onto(new_bd, &g0s0->large_objects);
 657     g0s0->n_large_blocks += new_bd->blocks;
 658     new_bd->gen_no  = g0s0->no;
 659     new_bd->step    = g0s0;
 660     new_bd->flags   = BF_LARGE;
 661     new_bd->free    = bd->free;
 662
 663     // add the new number of blocks to the counter.  Due to the gaps
 664     // for block descriptor, new_bd->blocks + bd->blocks might not be
 665     // equal to the original bd->blocks, which is why we do it this way.
 666     bd->step->n_large_blocks += bd->blocks;
 667
 668     return new_bd;
 669 }
 670
 671 /* -----------------------------------------------------------------------------
 672    allocateLocal()
 673
 674    This allocates memory in the current thread - it is intended for
 675    use primarily from STG-land where we have a Capability.  It is
 676    better than allocate() because it doesn't require taking the
 677    sm_mutex lock in the common case.
 678
 679    Memory is allocated directly from the nursery if possible (but not
 680    from the current nursery block, so as not to interfere with
 681    Hp/HpLim).
 682    -------------------------------------------------------------------------- */
 683
 684 StgPtr
 685 allocateLocal (Capability *cap, nat n)
 686 {
 687     bdescr *bd;
 688     StgPtr p;
 689
 690     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 691         return allocateInGen(g0,n);
 692     }
 693
 694     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 695
 696     TICK_ALLOC_HEAP_NOCTR(n);
 697     CCS_ALLOC(CCCS,n);
 698
 699     bd = cap->r.rCurrentAlloc;
 700     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 701
 702         // The CurrentAlloc block is full, we need to find another
 703         // one.  First, we try taking the next block from the
 704         // nursery:
 705         bd = cap->r.rCurrentNursery->link;
 706
 707         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 708             // The nursery is empty, or the next block is already
 709             // full: allocate a fresh block (we can't fail here).
 710             ACQUIRE_SM_LOCK;
 711             bd = allocBlock();
 712             cap->r.rNursery->n_blocks++;
 713             RELEASE_SM_LOCK;
 714             bd->gen_no = 0;
 715             bd->step = cap->r.rNursery;
 716             bd->flags = 0;
 717             // NO: alloc_blocks++;
 718             // calcAllocated() uses the size of the nursery, and we've
 719             // already bumpted nursery->n_blocks above.
 720         } else {
 721             // we have a block in the nursery: take it and put
 722             // it at the *front* of the nursery list, and use it
 723             // to allocate() from.
 724             cap->r.rCurrentNursery->link = bd->link;
 725             if (bd->link != NULL) {
 726                 bd->link->u.back = cap->r.rCurrentNursery;
 727             }
 728         }
 729         dbl_link_onto(bd, &cap->r.rNursery->blocks);
 730         cap->r.rCurrentAlloc = bd;
 731         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 732     }
 733     p = bd->free;
 734     bd->free += n;
 735     return p;
 736 }
 737
 738 /* ---------------------------------------------------------------------------
 739    Allocate a fixed/pinned object.
 740
 741    We allocate small pinned objects into a single block, allocating a
 742    new block when the current one overflows.  The block is chained
 743    onto the large_object_list of generation 0 step 0.
 744
 745    NOTE: The GC can't in general handle pinned objects.  This
 746    interface is only safe to use for ByteArrays, which have no
 747    pointers and don't require scavenging.  It works because the
 748    block's descriptor has the BF_LARGE flag set, so the block is
 749    treated as a large object and chained onto various lists, rather
 750    than the individual objects being copied.  However, when it comes
 751    to scavenge the block, the GC will only scavenge the first object.
 752    The reason is that the GC can't linearly scan a block of pinned
 753    objects at the moment (doing so would require using the
 754    mostly-copying techniques).  But since we're restricting ourselves
 755    to pinned ByteArrays, not scavenging is ok.
 756
 757    This function is called by newPinnedByteArray# which immediately
 758    fills the allocated memory with a MutableByteArray#.
 759    ------------------------------------------------------------------------- */
 760
 761 StgPtr
 762 allocatePinned( nat n )
 763 {
 764     StgPtr p;
 765     bdescr *bd = pinned_object_block;
 766
 767     // If the request is for a large object, then allocate()
 768     // will give us a pinned object anyway.
 769     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 770         return allocate(n);
 771     }
 772
 773     ACQUIRE_SM_LOCK;
 774
 775     TICK_ALLOC_HEAP_NOCTR(n);
 776     CCS_ALLOC(CCCS,n);
 777
 778     // we always return 8-byte aligned memory.  bd->free must be
 779     // 8-byte aligned to begin with, so we just round up n to
 780     // the nearest multiple of 8 bytes.
 781     if (sizeof(StgWord) == 4) {
 782         n = (n+1) & ~1;
 783     }
 784
 785     // If we don't have a block of pinned objects yet, or the current
 786     // one isn't large enough to hold the new object, allocate a new one.
 787     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 788         pinned_object_block = bd = allocBlock();
 789         dbl_link_onto(bd, &g0s0->large_objects);
 790         g0s0->n_large_blocks++;
 791         bd->gen_no = 0;
 792         bd->step   = g0s0;
 793         bd->flags  = BF_PINNED | BF_LARGE;
 794         bd->free   = bd->start;
 795         alloc_blocks++;
 796     }
 797
 798     p = bd->free;
 799     bd->free += n;
 800     RELEASE_SM_LOCK;
 801     return p;
 802 }
 803
 804 /* -----------------------------------------------------------------------------
 805    Write Barriers
 806    -------------------------------------------------------------------------- */
 807
 808 /*
 809    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 810    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 811    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 812    and is put on the mutable list.
 813 */
 814 void
 815 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 816 {
 817     Capability *cap = regTableToCapability(reg);
 818     bdescr *bd;
 819     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 820         p->header.info = &stg_MUT_VAR_DIRTY_info;
 821         bd = Bdescr((StgPtr)p);
 822         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 823     }
 824 }
 825
 826 /*
 827    This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
 828    on the mutable list; a MVAR_DIRTY is.  When written to, a
 829    MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
 830    The check for MVAR_CLEAN is inlined at the call site for speed,
 831    this really does make a difference on concurrency-heavy benchmarks
 832    such as Chaneneos and cheap-concurrency.
 833 */
 834 void
 835 dirty_MVAR(StgRegTable *reg, StgClosure *p)
 836 {
 837     Capability *cap = regTableToCapability(reg);
 838     bdescr *bd;
 839     bd = Bdescr((StgPtr)p);
 840     if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 841 }
 842
 843 /* -----------------------------------------------------------------------------
 844    Allocation functions for GMP.
 845
 846    These all use the allocate() interface - we can't have any garbage
 847    collection going on during a gmp operation, so we use allocate()
 848    which always succeeds.  The gmp operations which might need to
 849    allocate will ask the storage manager (via doYouWantToGC()) whether
 850    a garbage collection is required, in case we get into a loop doing
 851    only allocate() style allocation.
 852    -------------------------------------------------------------------------- */
 853
 854 static void *
 855 stgAllocForGMP (size_t size_in_bytes)
 856 {
 857   StgArrWords* arr;
 858   nat data_size_in_words, total_size_in_words;
 859
 860   /* round up to a whole number of words */
 861   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 862   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 863
 864   /* allocate and fill it in. */
 865 #if defined(THREADED_RTS)
 866   arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
 867 #else
 868   arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
 869 #endif
 870   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 871
 872   /* and return a ptr to the goods inside the array */
 873   return arr->payload;
 874 }
 875
 876 static void *
 877 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 878 {
 879     void *new_stuff_ptr = stgAllocForGMP(new_size);
 880     nat i = 0;
 881     char *p = (char *) ptr;
 882     char *q = (char *) new_stuff_ptr;
 883
 884     for (; i < old_size; i++, p++, q++) {
 885         *q = *p;
 886     }
 887
 888     return(new_stuff_ptr);
 889 }
 890
 891 static void
 892 stgDeallocForGMP (void *ptr STG_UNUSED,
 893                   size_t size STG_UNUSED)
 894 {
 895     /* easy for us: the garbage collector does the dealloc'n */
 896 }
 897
 898 /* -----------------------------------------------------------------------------
 899  * Stats and stuff
 900  * -------------------------------------------------------------------------- */
 901
 902 /* -----------------------------------------------------------------------------
 903  * calcAllocated()
 904  *
 905  * Approximate how much we've allocated: number of blocks in the
 906  * nursery + blocks allocated via allocate() - unused nusery blocks.
 907  * This leaves a little slop at the end of each block, and doesn't
 908  * take into account large objects (ToDo).
 909  * -------------------------------------------------------------------------- */
 910
 911 lnat
 912 calcAllocated( void )
 913 {
 914   nat allocated;
 915   bdescr *bd;
 916
 917   allocated = allocatedBytes();
 918   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 919
 920   {
 921 #ifdef THREADED_RTS
 922   nat i;
 923   for (i = 0; i < n_nurseries; i++) {
 924       Capability *cap;
 925       for ( bd = capabilities[i].r.rCurrentNursery->link;
 926             bd != NULL; bd = bd->link ) {
 927           allocated -= BLOCK_SIZE_W;
 928       }
 929       cap = &capabilities[i];
 930       if (cap->r.rCurrentNursery->free <
 931           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 932           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 933               - cap->r.rCurrentNursery->free;
 934       }
 935   }
 936 #else
 937   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 938
 939   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 940       allocated -= BLOCK_SIZE_W;
 941   }
 942   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 943       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 944           - current_nursery->free;
 945   }
 946 #endif
 947   }
 948
 949   total_allocated += allocated;
 950   return allocated;
 951 }
 952
 953 /* Approximate the amount of live data in the heap.  To be called just
 954  * after garbage collection (see GarbageCollect()).
 955  */
 956 lnat
 957 calcLiveBlocks(void)
 958 {
 959   nat g, s;
 960   lnat live = 0;
 961   step *stp;
 962
 963   if (RtsFlags.GcFlags.generations == 1) {
 964       return g0s0->n_large_blocks + g0s0->n_blocks;
 965   }
 966
 967   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 968     for (s = 0; s < generations[g].n_steps; s++) {
 969       /* approximate amount of live data (doesn't take into account slop
 970        * at end of each block).
 971        */
 972       if (g == 0 && s == 0) {
 973           continue;
 974       }
 975       stp = &generations[g].steps[s];
 976       live += stp->n_large_blocks + stp->n_blocks;
 977     }
 978   }
 979   return live;
 980 }
 981
 982 lnat
 983 countOccupied(bdescr *bd)
 984 {
 985     lnat words;
 986
 987     words = 0;
 988     for (; bd != NULL; bd = bd->link) {
 989         words += bd->free - bd->start;
 990     }
 991     return words;
 992 }
 993
 994 // Return an accurate count of the live data in the heap, excluding
 995 // generation 0.
 996 lnat
 997 calcLiveWords(void)
 998 {
 999     nat g, s;
1000     lnat live;
1001     step *stp;
1002
1003     if (RtsFlags.GcFlags.generations == 1) {
1004         return g0s0->n_words + countOccupied(g0s0->large_objects);
1005     }
1006
1007     live = 0;
1008     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1009         for (s = 0; s < generations[g].n_steps; s++) {
1010             if (g == 0 && s == 0) continue;
1011             stp = &generations[g].steps[s];
1012             live += stp->n_words + countOccupied(stp->large_objects);
1013         }
1014     }
1015     return live;
1016 }
1017
1018 /* Approximate the number of blocks that will be needed at the next
1019  * garbage collection.
1020  *
1021  * Assume: all data currently live will remain live.  Steps that will
1022  * be collected next time will therefore need twice as many blocks
1023  * since all the data will be copied.
1024  */
1025 extern lnat
1026 calcNeeded(void)
1027 {
1028     lnat needed = 0;
1029     nat g, s;
1030     step *stp;
1031
1032     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1033         for (s = 0; s < generations[g].n_steps; s++) {
1034             if (g == 0 && s == 0) { continue; }
1035             stp = &generations[g].steps[s];
1036             if (g == 0 || // always collect gen 0
1037                 (generations[g].steps[0].n_blocks +
1038                  generations[g].steps[0].n_large_blocks
1039                  > generations[g].max_blocks
1040                  && stp->is_compacted == 0)) {
1041                 needed += 2 * stp->n_blocks + stp->n_large_blocks;
1042             } else {
1043                 needed += stp->n_blocks + stp->n_large_blocks;
1044             }
1045         }
1046     }
1047     return needed;
1048 }
1049
1050 /* ----------------------------------------------------------------------------
1051    Executable memory
1052
1053    Executable memory must be managed separately from non-executable
1054    memory.  Most OSs these days require you to jump through hoops to
1055    dynamically allocate executable memory, due to various security
1056    measures.
1057
1058    Here we provide a small memory allocator for executable memory.
1059    Memory is managed with a page granularity; we allocate linearly
1060    in the page, and when the page is emptied (all objects on the page
1061    are free) we free the page again, not forgetting to make it
1062    non-executable.
1063
1064    TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1065          the linker cannot use allocateExec for loading object code files
1066          on Windows. Once allocateExec can handle larger objects, the linker
1067          should be modified to use allocateExec instead of VirtualAlloc.
1068    ------------------------------------------------------------------------- */
1069
1070 static bdescr *exec_block;
1071
1072 void *allocateExec (nat bytes)
1073 {
1074     void *ret;
1075     nat n;
1076
1077     ACQUIRE_SM_LOCK;
1078
1079     // round up to words.
1080     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
1081
1082     if (n+1 > BLOCK_SIZE_W) {
1083         barf("allocateExec: can't handle large objects");
1084     }
1085
1086     if (exec_block == NULL ||
1087         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1088         bdescr *bd;
1089         lnat pagesize = getPageSize();
1090         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1091         debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1092         bd->gen_no = 0;
1093         bd->flags = BF_EXEC;
1094         bd->link = exec_block;
1095         if (exec_block != NULL) {
1096             exec_block->u.back = bd;
1097         }
1098         bd->u.back = NULL;
1099         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1100         exec_block = bd;
1101     }
1102     *(exec_block->free) = n;  // store the size of this chunk
1103     exec_block->gen_no += n;  // gen_no stores the number of words allocated
1104     ret = exec_block->free + 1;
1105     exec_block->free += n + 1;
1106
1107     RELEASE_SM_LOCK
1108     return ret;
1109 }
1110
1111 void freeExec (void *addr)
1112 {
1113     StgPtr p = (StgPtr)addr - 1;
1114     bdescr *bd = Bdescr((StgPtr)p);
1115
1116     if ((bd->flags & BF_EXEC) == 0) {
1117         barf("freeExec: not executable");
1118     }
1119
1120     if (*(StgPtr)p == 0) {
1121         barf("freeExec: already free?");
1122     }
1123
1124     ACQUIRE_SM_LOCK;
1125
1126     bd->gen_no -= *(StgPtr)p;
1127     *(StgPtr)p = 0;
1128
1129     if (bd->gen_no == 0) {
1130         // Free the block if it is empty, but not if it is the block at
1131         // the head of the queue.
1132         if (bd != exec_block) {
1133             debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1134             dbl_link_remove(bd, &exec_block);
1135             setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1136             freeGroup(bd);
1137         } else {
1138             bd->free = bd->start;
1139         }
1140     }
1141
1142     RELEASE_SM_LOCK
1143 }
1144
1145 /* -----------------------------------------------------------------------------
1146    Debugging
1147
1148    memInventory() checks for memory leaks by counting up all the
1149    blocks we know about and comparing that to the number of blocks
1150    allegedly floating around in the system.
1151    -------------------------------------------------------------------------- */
1152
1153 #ifdef DEBUG
1154
1155 // Useful for finding partially full blocks in gdb
1156 void findSlop(bdescr *bd);
1157 void findSlop(bdescr *bd)
1158 {
1159     lnat slop;
1160
1161     for (; bd != NULL; bd = bd->link) {
1162         slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1163         if (slop > (1024/sizeof(W_))) {
1164             debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1165                        bd->start, bd, slop / (1024/sizeof(W_)));
1166         }
1167     }
1168 }
1169
1170 nat
1171 countBlocks(bdescr *bd)
1172 {
1173     nat n;
1174     for (n=0; bd != NULL; bd=bd->link) {
1175         n += bd->blocks;
1176     }
1177     return n;
1178 }
1179
1180 // (*1) Just like countBlocks, except that we adjust the count for a
1181 // megablock group so that it doesn't include the extra few blocks
1182 // that would be taken up by block descriptors in the second and
1183 // subsequent megablock.  This is so we can tally the count with the
1184 // number of blocks allocated in the system, for memInventory().
1185 static nat
1186 countAllocdBlocks(bdescr *bd)
1187 {
1188     nat n;
1189     for (n=0; bd != NULL; bd=bd->link) {
1190         n += bd->blocks;
1191         // hack for megablock groups: see (*1) above
1192         if (bd->blocks > BLOCKS_PER_MBLOCK) {
1193             n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1194                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1195         }
1196     }
1197     return n;
1198 }
1199
1200 static lnat
1201 stepBlocks (step *stp)
1202 {
1203     ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1204     ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1205     return stp->n_blocks + stp->n_old_blocks +
1206             countAllocdBlocks(stp->large_objects);
1207 }
1208
1209 void
1210 memInventory (rtsBool show)
1211 {
1212   nat g, s, i;
1213   step *stp;
1214   lnat gen_blocks[RtsFlags.GcFlags.generations];
1215   lnat nursery_blocks, retainer_blocks,
1216        arena_blocks, exec_blocks;
1217   lnat live_blocks = 0, free_blocks = 0;
1218   rtsBool leak;
1219
1220   // count the blocks we current have
1221
1222   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1223       gen_blocks[g] = 0;
1224       for (i = 0; i < n_capabilities; i++) {
1225           gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1226       }
1227       gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1228       for (s = 0; s < generations[g].n_steps; s++) {
1229           stp = &generations[g].steps[s];
1230           gen_blocks[g] += stepBlocks(stp);
1231       }
1232   }
1233
1234   nursery_blocks = 0;
1235   for (i = 0; i < n_nurseries; i++) {
1236       nursery_blocks += stepBlocks(&nurseries[i]);
1237   }
1238
1239   retainer_blocks = 0;
1240 #ifdef PROFILING
1241   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1242       retainer_blocks = retainerStackBlocks();
1243   }
1244 #endif
1245
1246   // count the blocks allocated by the arena allocator
1247   arena_blocks = arenaBlocks();
1248
1249   // count the blocks containing executable memory
1250   exec_blocks = countAllocdBlocks(exec_block);
1251
1252   /* count the blocks on the free list */
1253   free_blocks = countFreeList();
1254
1255   live_blocks = 0;
1256   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1257       live_blocks += gen_blocks[g];
1258   }
1259   live_blocks += nursery_blocks +
1260                + retainer_blocks + arena_blocks + exec_blocks;
1261
1262 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1263
1264   leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1265   if (show || leak)
1266   {
1267       if (leak) {
1268           debugBelch("Memory leak detected:\n");
1269       } else {
1270           debugBelch("Memory inventory:\n");
1271       }
1272       for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1273           debugBelch("  gen %d blocks : %5lu blocks (%lu MB)\n", g,
1274                      gen_blocks[g], MB(gen_blocks[g]));
1275       }
1276       debugBelch("  nursery      : %5lu blocks (%lu MB)\n",
1277                  nursery_blocks, MB(nursery_blocks));
1278       debugBelch("  retainer     : %5lu blocks (%lu MB)\n",
1279                  retainer_blocks, MB(retainer_blocks));
1280       debugBelch("  arena blocks : %5lu blocks (%lu MB)\n",
1281                  arena_blocks, MB(arena_blocks));
1282       debugBelch("  exec         : %5lu blocks (%lu MB)\n",
1283                  exec_blocks, MB(exec_blocks));
1284       debugBelch("  free         : %5lu blocks (%lu MB)\n",
1285                  free_blocks, MB(free_blocks));
1286       debugBelch("  total        : %5lu blocks (%lu MB)\n",
1287                  live_blocks + free_blocks, MB(live_blocks+free_blocks));
1288       if (leak) {
1289           debugBelch("\n  in system    : %5lu blocks (%lu MB)\n",
1290                      mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1291       }
1292   }
1293 }
1294
1295
1296 /* Full heap sanity check. */
1297 void
1298 checkSanity( void )
1299 {
1300     nat g, s;
1301
1302     if (RtsFlags.GcFlags.generations == 1) {
1303         checkHeap(g0s0->blocks);
1304         checkChain(g0s0->large_objects);
1305     } else {
1306
1307         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1308             for (s = 0; s < generations[g].n_steps; s++) {
1309                 if (g == 0 && s == 0) { continue; }
1310                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1311                        == generations[g].steps[s].n_blocks);
1312                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1313                        == generations[g].steps[s].n_large_blocks);
1314                 checkHeap(generations[g].steps[s].blocks);
1315                 checkChain(generations[g].steps[s].large_objects);
1316                 if (g > 0) {
1317                     checkMutableList(generations[g].mut_list, g);
1318                 }
1319             }
1320         }
1321
1322         for (s = 0; s < n_nurseries; s++) {
1323             ASSERT(countBlocks(nurseries[s].blocks)
1324                    == nurseries[s].n_blocks);
1325             ASSERT(countBlocks(nurseries[s].large_objects)
1326                    == nurseries[s].n_large_blocks);
1327         }
1328
1329         checkFreeListSanity();
1330     }
1331 }
1332
1333 /* Nursery sanity check */
1334 void
1335 checkNurserySanity( step *stp )
1336 {
1337     bdescr *bd, *prev;
1338     nat blocks = 0;
1339
1340     prev = NULL;
1341     for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1342         ASSERT(bd->u.back == prev);
1343         prev = bd;
1344         blocks += bd->blocks;
1345     }
1346     ASSERT(blocks == stp->n_blocks);
1347 }
1348
1349 // handy function for use in gdb, because Bdescr() is inlined.
1350 extern bdescr *_bdescr( StgPtr p );
1351
1352 bdescr *
1353 _bdescr( StgPtr p )
1354 {
1355     return Bdescr(p);
1356 }
1357
1358 #endif