rts/sm/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2008
   4  *
   5  * Storage manager front end
   6  *
   7  * Documentation on the architecture of the Storage Manager can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsUtils.h"
  17 #include "RtsFlags.h"
  18 #include "Stats.h"
  19 #include "Hooks.h"
  20 #include "BlockAlloc.h"
  21 #include "MBlock.h"
  22 #include "Weak.h"
  23 #include "Sanity.h"
  24 #include "Arena.h"
  25 #include "OSThreads.h"
  26 #include "Capability.h"
  27 #include "Storage.h"
  28 #include "Schedule.h"
  29 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  30 #include "OSMem.h"
  31 #include "Trace.h"
  32 #include "GC.h"
  33 #include "Evac.h"
  34
  35 #include <stdlib.h>
  36 #include <string.h>
  37
  38 /*
  39  * All these globals require sm_mutex to access in THREADED_RTS mode.
  40  */
  41 StgClosure    *caf_list         = NULL;
  42 StgClosure    *revertible_caf_list = NULL;
  43 rtsBool       keepCAFs;
  44
  45 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  46 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  47 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  48
  49 generation *generations = NULL; /* all the generations */
  50 generation *g0          = NULL; /* generation 0, for convenience */
  51 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  52 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  53
  54 nat total_steps         = 0;
  55 step *all_steps         = NULL; /* single array of steps */
  56
  57 ullong total_allocated = 0;     /* total memory allocated during run */
  58
  59 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  60 step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
  61
  62 #ifdef THREADED_RTS
  63 /*
  64  * Storage manager mutex:  protects all the above state from
  65  * simultaneous access by two STG threads.
  66  */
  67 Mutex sm_mutex;
  68 /*
  69  * This mutex is used by atomicModifyMutVar# only
  70  */
  71 Mutex atomic_modify_mutvar_mutex;
  72 #endif
  73
  74
  75 /*
  76  * Forward references
  77  */
  78 static void *stgAllocForGMP   (size_t size_in_bytes);
  79 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  80 static void  stgDeallocForGMP (void *ptr, size_t size);
  81
  82 static void
  83 initStep (step *stp, int g, int s)
  84 {
  85     stp->no = s;
  86     stp->abs_no = RtsFlags.GcFlags.steps * g + s;
  87     stp->blocks = NULL;
  88     stp->n_blocks = 0;
  89     stp->n_words = 0;
  90     stp->live_estimate = 0;
  91     stp->old_blocks = NULL;
  92     stp->n_old_blocks = 0;
  93     stp->gen = &generations[g];
  94     stp->gen_no = g;
  95     stp->large_objects = NULL;
  96     stp->n_large_blocks = 0;
  97     stp->scavenged_large_objects = NULL;
  98     stp->n_scavenged_large_blocks = 0;
  99     stp->mark = 0;
 100     stp->compact = 0;
 101     stp->bitmap = NULL;
 102 #ifdef THREADED_RTS
 103     initSpinLock(&stp->sync_todo);
 104     initSpinLock(&stp->sync_large_objects);
 105 #endif
 106     stp->threads = END_TSO_QUEUE;
 107     stp->old_threads = END_TSO_QUEUE;
 108 }
 109
 110 void
 111 initStorage( void )
 112 {
 113   nat g, s;
 114   generation *gen;
 115
 116   if (generations != NULL) {
 117       // multi-init protection
 118       return;
 119   }
 120
 121   initMBlocks();
 122
 123   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 124    * doing something reasonable.
 125    */
 126   /* We use the NOT_NULL variant or gcc warns that the test is always true */
 127   ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLACKHOLE_info));
 128   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 129   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 130
 131   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 132       RtsFlags.GcFlags.heapSizeSuggestion >
 133       RtsFlags.GcFlags.maxHeapSize) {
 134     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 135   }
 136
 137   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 138       RtsFlags.GcFlags.minAllocAreaSize >
 139       RtsFlags.GcFlags.maxHeapSize) {
 140       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 141       RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
 142   }
 143
 144   initBlockAllocator();
 145
 146 #if defined(THREADED_RTS)
 147   initMutex(&sm_mutex);
 148   initMutex(&atomic_modify_mutvar_mutex);
 149 #endif
 150
 151   ACQUIRE_SM_LOCK;
 152
 153   /* allocate generation info array */
 154   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 155                                              * sizeof(struct generation_),
 156                                              "initStorage: gens");
 157
 158   /* allocate all the steps into an array.  It is important that we do
 159      it this way, because we need the invariant that two step pointers
 160      can be directly compared to see which is the oldest.
 161      Remember that the last generation has only one step. */
 162   total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
 163   all_steps   = stgMallocBytes(total_steps * sizeof(struct step_),
 164                                "initStorage: steps");
 165
 166   /* Initialise all generations */
 167   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 168     gen = &generations[g];
 169     gen->no = g;
 170     gen->mut_list = allocBlock();
 171     gen->collections = 0;
 172     gen->par_collections = 0;
 173     gen->failed_promotions = 0;
 174     gen->max_blocks = 0;
 175   }
 176
 177   /* A couple of convenience pointers */
 178   g0 = &generations[0];
 179   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 180
 181   /* Allocate step structures in each generation */
 182   if (RtsFlags.GcFlags.generations > 1) {
 183     /* Only for multiple-generations */
 184
 185     /* Oldest generation: one step */
 186     oldest_gen->n_steps = 1;
 187     oldest_gen->steps   = all_steps + (RtsFlags.GcFlags.generations - 1)
 188                                       * RtsFlags.GcFlags.steps;
 189
 190     /* set up all except the oldest generation with 2 steps */
 191     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 192       generations[g].n_steps = RtsFlags.GcFlags.steps;
 193       generations[g].steps   = all_steps + g * RtsFlags.GcFlags.steps;
 194     }
 195
 196   } else {
 197     /* single generation, i.e. a two-space collector */
 198     g0->n_steps = 1;
 199     g0->steps   = all_steps;
 200   }
 201
 202 #ifdef THREADED_RTS
 203   n_nurseries = n_capabilities;
 204 #else
 205   n_nurseries = 1;
 206 #endif
 207   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 208                               "initStorage: nurseries");
 209
 210   /* Initialise all steps */
 211   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 212     for (s = 0; s < generations[g].n_steps; s++) {
 213         initStep(&generations[g].steps[s], g, s);
 214     }
 215   }
 216
 217   for (s = 0; s < n_nurseries; s++) {
 218       initStep(&nurseries[s], 0, s);
 219   }
 220
 221   /* Set up the destination pointers in each younger gen. step */
 222   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 223     for (s = 0; s < generations[g].n_steps-1; s++) {
 224       generations[g].steps[s].to = &generations[g].steps[s+1];
 225     }
 226     generations[g].steps[s].to = &generations[g+1].steps[0];
 227   }
 228   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 229
 230   for (s = 0; s < n_nurseries; s++) {
 231       nurseries[s].to = generations[0].steps[0].to;
 232   }
 233
 234   /* The oldest generation has one step. */
 235   if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
 236       if (RtsFlags.GcFlags.generations == 1) {
 237           errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
 238       } else {
 239           oldest_gen->steps[0].mark = 1;
 240           if (RtsFlags.GcFlags.compact)
 241               oldest_gen->steps[0].compact = 1;
 242       }
 243   }
 244
 245   generations[0].max_blocks = 0;
 246   g0s0 = &generations[0].steps[0];
 247
 248   /* The allocation area.  Policy: keep the allocation area
 249    * small to begin with, even if we have a large suggested heap
 250    * size.  Reason: we're going to do a major collection first, and we
 251    * don't want it to be a big one.  This vague idea is borne out by
 252    * rigorous experimental evidence.
 253    */
 254   allocNurseries();
 255
 256   weak_ptr_list = NULL;
 257   caf_list = NULL;
 258   revertible_caf_list = NULL;
 259
 260   /* initialise the allocate() interface */
 261   alloc_blocks = 0;
 262   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 263
 264   /* Tell GNU multi-precision pkg about our custom alloc functions */
 265   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 266
 267 #ifdef THREADED_RTS
 268   initSpinLock(&gc_alloc_block_sync);
 269   initSpinLock(&recordMutableGen_sync);
 270   whitehole_spin = 0;
 271 #endif
 272
 273   IF_DEBUG(gc, statDescribeGens());
 274
 275   RELEASE_SM_LOCK;
 276 }
 277
 278 void
 279 exitStorage (void)
 280 {
 281     stat_exit(calcAllocated());
 282 }
 283
 284 void
 285 freeStorage (void)
 286 {
 287     stgFree(g0s0); // frees all the steps
 288     stgFree(generations);
 289     freeAllMBlocks();
 290 #if defined(THREADED_RTS)
 291     closeMutex(&sm_mutex);
 292     closeMutex(&atomic_modify_mutvar_mutex);
 293 #endif
 294     stgFree(nurseries);
 295 }
 296
 297 /* -----------------------------------------------------------------------------
 298    CAF management.
 299
 300    The entry code for every CAF does the following:
 301
 302       - builds a CAF_BLACKHOLE in the heap
 303       - pushes an update frame pointing to the CAF_BLACKHOLE
 304       - invokes UPD_CAF(), which:
 305           - calls newCaf, below
 306           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 307
 308    Why do we build a BLACKHOLE in the heap rather than just updating
 309    the thunk directly?  It's so that we only need one kind of update
 310    frame - otherwise we'd need a static version of the update frame too.
 311
 312    newCaf() does the following:
 313
 314       - it puts the CAF on the oldest generation's mut-once list.
 315         This is so that we can treat the CAF as a root when collecting
 316         younger generations.
 317
 318    For GHCI, we have additional requirements when dealing with CAFs:
 319
 320       - we must *retain* all dynamically-loaded CAFs ever entered,
 321         just in case we need them again.
 322       - we must be able to *revert* CAFs that have been evaluated, to
 323         their pre-evaluated form.
 324
 325       To do this, we use an additional CAF list.  When newCaf() is
 326       called on a dynamically-loaded CAF, we add it to the CAF list
 327       instead of the old-generation mutable list, and save away its
 328       old info pointer (in caf->saved_info) for later reversion.
 329
 330       To revert all the CAFs, we traverse the CAF list and reset the
 331       info pointer to caf->saved_info, then throw away the CAF list.
 332       (see GC.c:revertCAFs()).
 333
 334       -- SDM 29/1/01
 335
 336    -------------------------------------------------------------------------- */
 337
 338 void
 339 newCAF(StgClosure* caf)
 340 {
 341   ACQUIRE_SM_LOCK;
 342
 343   if(keepCAFs)
 344   {
 345     // HACK:
 346     // If we are in GHCi _and_ we are using dynamic libraries,
 347     // then we can't redirect newCAF calls to newDynCAF (see below),
 348     // so we make newCAF behave almost like newDynCAF.
 349     // The dynamic libraries might be used by both the interpreted
 350     // program and GHCi itself, so they must not be reverted.
 351     // This also means that in GHCi with dynamic libraries, CAFs are not
 352     // garbage collected. If this turns out to be a problem, we could
 353     // do another hack here and do an address range test on caf to figure
 354     // out whether it is from a dynamic library.
 355     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 356     ((StgIndStatic *)caf)->static_link = caf_list;
 357     caf_list = caf;
 358   }
 359   else
 360   {
 361     /* Put this CAF on the mutable list for the old generation.
 362     * This is a HACK - the IND_STATIC closure doesn't really have
 363     * a mut_link field, but we pretend it has - in fact we re-use
 364     * the STATIC_LINK field for the time being, because when we
 365     * come to do a major GC we won't need the mut_link field
 366     * any more and can use it as a STATIC_LINK.
 367     */
 368     ((StgIndStatic *)caf)->saved_info = NULL;
 369     recordMutableGen(caf, oldest_gen);
 370   }
 371
 372   RELEASE_SM_LOCK;
 373 }
 374
 375 // An alternate version of newCaf which is used for dynamically loaded
 376 // object code in GHCi.  In this case we want to retain *all* CAFs in
 377 // the object code, because they might be demanded at any time from an
 378 // expression evaluated on the command line.
 379 // Also, GHCi might want to revert CAFs, so we add these to the
 380 // revertible_caf_list.
 381 //
 382 // The linker hackily arranges that references to newCaf from dynamic
 383 // code end up pointing to newDynCAF.
 384 void
 385 newDynCAF(StgClosure *caf)
 386 {
 387     ACQUIRE_SM_LOCK;
 388
 389     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 390     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 391     revertible_caf_list = caf;
 392
 393     RELEASE_SM_LOCK;
 394 }
 395
 396 /* -----------------------------------------------------------------------------
 397    Nursery management.
 398    -------------------------------------------------------------------------- */
 399
 400 static bdescr *
 401 allocNursery (step *stp, bdescr *tail, nat blocks)
 402 {
 403     bdescr *bd;
 404     nat i;
 405
 406     // Allocate a nursery: we allocate fresh blocks one at a time and
 407     // cons them on to the front of the list, not forgetting to update
 408     // the back pointer on the tail of the list to point to the new block.
 409     for (i=0; i < blocks; i++) {
 410         // @LDV profiling
 411         /*
 412           processNursery() in LdvProfile.c assumes that every block group in
 413           the nursery contains only a single block. So, if a block group is
 414           given multiple blocks, change processNursery() accordingly.
 415         */
 416         bd = allocBlock();
 417         bd->link = tail;
 418         // double-link the nursery: we might need to insert blocks
 419         if (tail != NULL) {
 420             tail->u.back = bd;
 421         }
 422         bd->step = stp;
 423         bd->gen_no = 0;
 424         bd->flags = 0;
 425         bd->free = bd->start;
 426         tail = bd;
 427     }
 428     tail->u.back = NULL;
 429     return tail;
 430 }
 431
 432 static void
 433 assignNurseriesToCapabilities (void)
 434 {
 435 #ifdef THREADED_RTS
 436     nat i;
 437
 438     for (i = 0; i < n_nurseries; i++) {
 439         capabilities[i].r.rNursery        = &nurseries[i];
 440         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 441         capabilities[i].r.rCurrentAlloc   = NULL;
 442     }
 443 #else /* THREADED_RTS */
 444     MainCapability.r.rNursery        = &nurseries[0];
 445     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 446     MainCapability.r.rCurrentAlloc   = NULL;
 447 #endif
 448 }
 449
 450 void
 451 allocNurseries( void )
 452 {
 453     nat i;
 454
 455     for (i = 0; i < n_nurseries; i++) {
 456         nurseries[i].blocks =
 457             allocNursery(&nurseries[i], NULL,
 458                          RtsFlags.GcFlags.minAllocAreaSize);
 459         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 460         nurseries[i].old_blocks   = NULL;
 461         nurseries[i].n_old_blocks = 0;
 462     }
 463     assignNurseriesToCapabilities();
 464 }
 465
 466 void
 467 resetNurseries( void )
 468 {
 469     nat i;
 470     bdescr *bd;
 471     step *stp;
 472
 473     for (i = 0; i < n_nurseries; i++) {
 474         stp = &nurseries[i];
 475         for (bd = stp->blocks; bd; bd = bd->link) {
 476             bd->free = bd->start;
 477             ASSERT(bd->gen_no == 0);
 478             ASSERT(bd->step == stp);
 479             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 480         }
 481     }
 482     assignNurseriesToCapabilities();
 483 }
 484
 485 lnat
 486 countNurseryBlocks (void)
 487 {
 488     nat i;
 489     lnat blocks = 0;
 490
 491     for (i = 0; i < n_nurseries; i++) {
 492         blocks += nurseries[i].n_blocks;
 493     }
 494     return blocks;
 495 }
 496
 497 static void
 498 resizeNursery ( step *stp, nat blocks )
 499 {
 500   bdescr *bd;
 501   nat nursery_blocks;
 502
 503   nursery_blocks = stp->n_blocks;
 504   if (nursery_blocks == blocks) return;
 505
 506   if (nursery_blocks < blocks) {
 507       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
 508                  blocks);
 509     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 510   }
 511   else {
 512     bdescr *next_bd;
 513
 514     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
 515                blocks);
 516
 517     bd = stp->blocks;
 518     while (nursery_blocks > blocks) {
 519         next_bd = bd->link;
 520         next_bd->u.back = NULL;
 521         nursery_blocks -= bd->blocks; // might be a large block
 522         freeGroup(bd);
 523         bd = next_bd;
 524     }
 525     stp->blocks = bd;
 526     // might have gone just under, by freeing a large block, so make
 527     // up the difference.
 528     if (nursery_blocks < blocks) {
 529         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 530     }
 531   }
 532
 533   stp->n_blocks = blocks;
 534   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 535 }
 536
 537 //
 538 // Resize each of the nurseries to the specified size.
 539 //
 540 void
 541 resizeNurseriesFixed (nat blocks)
 542 {
 543     nat i;
 544     for (i = 0; i < n_nurseries; i++) {
 545         resizeNursery(&nurseries[i], blocks);
 546     }
 547 }
 548
 549 //
 550 // Resize the nurseries to the total specified size.
 551 //
 552 void
 553 resizeNurseries (nat blocks)
 554 {
 555     // If there are multiple nurseries, then we just divide the number
 556     // of available blocks between them.
 557     resizeNurseriesFixed(blocks / n_nurseries);
 558 }
 559
 560
 561 /* -----------------------------------------------------------------------------
 562    move_TSO is called to update the TSO structure after it has been
 563    moved from one place to another.
 564    -------------------------------------------------------------------------- */
 565
 566 void
 567 move_TSO (StgTSO *src, StgTSO *dest)
 568 {
 569     ptrdiff_t diff;
 570
 571     // relocate the stack pointer...
 572     diff = (StgPtr)dest - (StgPtr)src; // In *words*
 573     dest->sp = (StgPtr)dest->sp + diff;
 574 }
 575
 576 /* -----------------------------------------------------------------------------
 577    The allocate() interface
 578
 579    allocateInGen() function allocates memory directly into a specific
 580    generation.  It always succeeds, and returns a chunk of memory n
 581    words long.  n can be larger than the size of a block if necessary,
 582    in which case a contiguous block group will be allocated.
 583
 584    allocate(n) is equivalent to allocateInGen(g0).
 585    -------------------------------------------------------------------------- */
 586
 587 StgPtr
 588 allocateInGen (generation *g, lnat n)
 589 {
 590     step *stp;
 591     bdescr *bd;
 592     StgPtr ret;
 593
 594     ACQUIRE_SM_LOCK;
 595
 596     TICK_ALLOC_HEAP_NOCTR(n);
 597     CCS_ALLOC(CCCS,n);
 598
 599     stp = &g->steps[0];
 600
 601     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
 602     {
 603         lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 604
 605         // Attempting to allocate an object larger than maxHeapSize
 606         // should definitely be disallowed.  (bug #1791)
 607         if (RtsFlags.GcFlags.maxHeapSize > 0 &&
 608             req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
 609             heapOverflow();
 610         }
 611
 612         bd = allocGroup(req_blocks);
 613         dbl_link_onto(bd, &stp->large_objects);
 614         stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
 615         bd->gen_no  = g->no;
 616         bd->step = stp;
 617         bd->flags = BF_LARGE;
 618         bd->free = bd->start + n;
 619         ret = bd->start;
 620     }
 621     else
 622     {
 623         // small allocation (<LARGE_OBJECT_THRESHOLD) */
 624         bd = stp->blocks;
 625         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 626             bd = allocBlock();
 627             bd->gen_no = g->no;
 628             bd->step = stp;
 629             bd->flags = 0;
 630             bd->link = stp->blocks;
 631             stp->blocks = bd;
 632             stp->n_blocks++;
 633             alloc_blocks++;
 634         }
 635         ret = bd->free;
 636         bd->free += n;
 637     }
 638
 639     RELEASE_SM_LOCK;
 640
 641     return ret;
 642 }
 643
 644 StgPtr
 645 allocate (lnat n)
 646 {
 647     return allocateInGen(g0,n);
 648 }
 649
 650 lnat
 651 allocatedBytes( void )
 652 {
 653     lnat allocated;
 654
 655     allocated = alloc_blocks * BLOCK_SIZE_W;
 656     if (pinned_object_block != NULL) {
 657         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 658             pinned_object_block->free;
 659     }
 660
 661     return allocated;
 662 }
 663
 664 // split N blocks off the start of the given bdescr, returning the
 665 // remainder as a new block group.  We treat the remainder as if it
 666 // had been freshly allocated in generation 0.
 667 bdescr *
 668 splitLargeBlock (bdescr *bd, nat blocks)
 669 {
 670     bdescr *new_bd;
 671
 672     // subtract the original number of blocks from the counter first
 673     bd->step->n_large_blocks -= bd->blocks;
 674
 675     new_bd = splitBlockGroup (bd, blocks);
 676
 677     dbl_link_onto(new_bd, &g0s0->large_objects);
 678     g0s0->n_large_blocks += new_bd->blocks;
 679     new_bd->gen_no  = g0s0->no;
 680     new_bd->step    = g0s0;
 681     new_bd->flags   = BF_LARGE;
 682     new_bd->free    = bd->free;
 683
 684     // add the new number of blocks to the counter.  Due to the gaps
 685     // for block descriptor, new_bd->blocks + bd->blocks might not be
 686     // equal to the original bd->blocks, which is why we do it this way.
 687     bd->step->n_large_blocks += bd->blocks;
 688
 689     return new_bd;
 690 }
 691
 692 /* -----------------------------------------------------------------------------
 693    allocateLocal()
 694
 695    This allocates memory in the current thread - it is intended for
 696    use primarily from STG-land where we have a Capability.  It is
 697    better than allocate() because it doesn't require taking the
 698    sm_mutex lock in the common case.
 699
 700    Memory is allocated directly from the nursery if possible (but not
 701    from the current nursery block, so as not to interfere with
 702    Hp/HpLim).
 703    -------------------------------------------------------------------------- */
 704
 705 StgPtr
 706 allocateLocal (Capability *cap, lnat n)
 707 {
 708     bdescr *bd;
 709     StgPtr p;
 710
 711     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 712         return allocateInGen(g0,n);
 713     }
 714
 715     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 716
 717     TICK_ALLOC_HEAP_NOCTR(n);
 718     CCS_ALLOC(CCCS,n);
 719
 720     bd = cap->r.rCurrentAlloc;
 721     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 722
 723         // The CurrentAlloc block is full, we need to find another
 724         // one.  First, we try taking the next block from the
 725         // nursery:
 726         bd = cap->r.rCurrentNursery->link;
 727
 728         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 729             // The nursery is empty, or the next block is already
 730             // full: allocate a fresh block (we can't fail here).
 731             ACQUIRE_SM_LOCK;
 732             bd = allocBlock();
 733             cap->r.rNursery->n_blocks++;
 734             RELEASE_SM_LOCK;
 735             bd->gen_no = 0;
 736             bd->step = cap->r.rNursery;
 737             bd->flags = 0;
 738             // NO: alloc_blocks++;
 739             // calcAllocated() uses the size of the nursery, and we've
 740             // already bumpted nursery->n_blocks above.
 741         } else {
 742             // we have a block in the nursery: take it and put
 743             // it at the *front* of the nursery list, and use it
 744             // to allocate() from.
 745             cap->r.rCurrentNursery->link = bd->link;
 746             if (bd->link != NULL) {
 747                 bd->link->u.back = cap->r.rCurrentNursery;
 748             }
 749         }
 750         dbl_link_onto(bd, &cap->r.rNursery->blocks);
 751         cap->r.rCurrentAlloc = bd;
 752         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 753     }
 754     p = bd->free;
 755     bd->free += n;
 756     return p;
 757 }
 758
 759 /* ---------------------------------------------------------------------------
 760    Allocate a fixed/pinned object.
 761
 762    We allocate small pinned objects into a single block, allocating a
 763    new block when the current one overflows.  The block is chained
 764    onto the large_object_list of generation 0 step 0.
 765
 766    NOTE: The GC can't in general handle pinned objects.  This
 767    interface is only safe to use for ByteArrays, which have no
 768    pointers and don't require scavenging.  It works because the
 769    block's descriptor has the BF_LARGE flag set, so the block is
 770    treated as a large object and chained onto various lists, rather
 771    than the individual objects being copied.  However, when it comes
 772    to scavenge the block, the GC will only scavenge the first object.
 773    The reason is that the GC can't linearly scan a block of pinned
 774    objects at the moment (doing so would require using the
 775    mostly-copying techniques).  But since we're restricting ourselves
 776    to pinned ByteArrays, not scavenging is ok.
 777
 778    This function is called by newPinnedByteArray# which immediately
 779    fills the allocated memory with a MutableByteArray#.
 780    ------------------------------------------------------------------------- */
 781
 782 StgPtr
 783 allocatePinned( lnat n )
 784 {
 785     StgPtr p;
 786     bdescr *bd = pinned_object_block;
 787
 788     // If the request is for a large object, then allocate()
 789     // will give us a pinned object anyway.
 790     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 791         return allocate(n);
 792     }
 793
 794     ACQUIRE_SM_LOCK;
 795
 796     TICK_ALLOC_HEAP_NOCTR(n);
 797     CCS_ALLOC(CCCS,n);
 798
 799     // we always return 8-byte aligned memory.  bd->free must be
 800     // 8-byte aligned to begin with, so we just round up n to
 801     // the nearest multiple of 8 bytes.
 802     if (sizeof(StgWord) == 4) {
 803         n = (n+1) & ~1;
 804     }
 805
 806     // If we don't have a block of pinned objects yet, or the current
 807     // one isn't large enough to hold the new object, allocate a new one.
 808     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 809         pinned_object_block = bd = allocBlock();
 810         dbl_link_onto(bd, &g0s0->large_objects);
 811         g0s0->n_large_blocks++;
 812         bd->gen_no = 0;
 813         bd->step   = g0s0;
 814         bd->flags  = BF_PINNED | BF_LARGE;
 815         bd->free   = bd->start;
 816         alloc_blocks++;
 817     }
 818
 819     p = bd->free;
 820     bd->free += n;
 821     RELEASE_SM_LOCK;
 822     return p;
 823 }
 824
 825 /* -----------------------------------------------------------------------------
 826    Write Barriers
 827    -------------------------------------------------------------------------- */
 828
 829 /*
 830    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 831    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 832    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 833    and is put on the mutable list.
 834 */
 835 void
 836 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 837 {
 838     Capability *cap = regTableToCapability(reg);
 839     bdescr *bd;
 840     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 841         p->header.info = &stg_MUT_VAR_DIRTY_info;
 842         bd = Bdescr((StgPtr)p);
 843         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 844     }
 845 }
 846
 847 // Setting a TSO's link field with a write barrier.
 848 // It is *not* necessary to call this function when
 849 //    * setting the link field to END_TSO_QUEUE
 850 //    * putting a TSO on the blackhole_queue
 851 //    * setting the link field of the currently running TSO, as it
 852 //      will already be dirty.
 853 void
 854 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
 855 {
 856     bdescr *bd;
 857     if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
 858         tso->flags |= TSO_LINK_DIRTY;
 859         bd = Bdescr((StgPtr)tso);
 860         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 861     }
 862     tso->_link = target;
 863 }
 864
 865 void
 866 dirty_TSO (Capability *cap, StgTSO *tso)
 867 {
 868     bdescr *bd;
 869     if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
 870         bd = Bdescr((StgPtr)tso);
 871         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 872     }
 873     tso->flags |= TSO_DIRTY;
 874 }
 875
 876 /*
 877    This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
 878    on the mutable list; a MVAR_DIRTY is.  When written to, a
 879    MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
 880    The check for MVAR_CLEAN is inlined at the call site for speed,
 881    this really does make a difference on concurrency-heavy benchmarks
 882    such as Chaneneos and cheap-concurrency.
 883 */
 884 void
 885 dirty_MVAR(StgRegTable *reg, StgClosure *p)
 886 {
 887     Capability *cap = regTableToCapability(reg);
 888     bdescr *bd;
 889     bd = Bdescr((StgPtr)p);
 890     if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 891 }
 892
 893 /* -----------------------------------------------------------------------------
 894    Allocation functions for GMP.
 895
 896    These all use the allocate() interface - we can't have any garbage
 897    collection going on during a gmp operation, so we use allocate()
 898    which always succeeds.  The gmp operations which might need to
 899    allocate will ask the storage manager (via doYouWantToGC()) whether
 900    a garbage collection is required, in case we get into a loop doing
 901    only allocate() style allocation.
 902    -------------------------------------------------------------------------- */
 903
 904 static void *
 905 stgAllocForGMP (size_t size_in_bytes)
 906 {
 907   StgArrWords* arr;
 908   nat data_size_in_words, total_size_in_words;
 909
 910   /* round up to a whole number of words */
 911   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 912   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 913
 914   /* allocate and fill it in. */
 915 #if defined(THREADED_RTS)
 916   arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
 917 #else
 918   arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
 919 #endif
 920   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 921
 922   /* and return a ptr to the goods inside the array */
 923   return arr->payload;
 924 }
 925
 926 static void *
 927 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 928 {
 929     void *new_stuff_ptr = stgAllocForGMP(new_size);
 930     nat i = 0;
 931     char *p = (char *) ptr;
 932     char *q = (char *) new_stuff_ptr;
 933
 934     for (; i < old_size; i++, p++, q++) {
 935         *q = *p;
 936     }
 937
 938     return(new_stuff_ptr);
 939 }
 940
 941 static void
 942 stgDeallocForGMP (void *ptr STG_UNUSED,
 943                   size_t size STG_UNUSED)
 944 {
 945     /* easy for us: the garbage collector does the dealloc'n */
 946 }
 947
 948 /* -----------------------------------------------------------------------------
 949  * Stats and stuff
 950  * -------------------------------------------------------------------------- */
 951
 952 /* -----------------------------------------------------------------------------
 953  * calcAllocated()
 954  *
 955  * Approximate how much we've allocated: number of blocks in the
 956  * nursery + blocks allocated via allocate() - unused nusery blocks.
 957  * This leaves a little slop at the end of each block, and doesn't
 958  * take into account large objects (ToDo).
 959  * -------------------------------------------------------------------------- */
 960
 961 lnat
 962 calcAllocated( void )
 963 {
 964   nat allocated;
 965   bdescr *bd;
 966
 967   allocated = allocatedBytes();
 968   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 969
 970   {
 971 #ifdef THREADED_RTS
 972   nat i;
 973   for (i = 0; i < n_nurseries; i++) {
 974       Capability *cap;
 975       for ( bd = capabilities[i].r.rCurrentNursery->link;
 976             bd != NULL; bd = bd->link ) {
 977           allocated -= BLOCK_SIZE_W;
 978       }
 979       cap = &capabilities[i];
 980       if (cap->r.rCurrentNursery->free <
 981           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 982           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 983               - cap->r.rCurrentNursery->free;
 984       }
 985   }
 986 #else
 987   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 988
 989   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 990       allocated -= BLOCK_SIZE_W;
 991   }
 992   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 993       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 994           - current_nursery->free;
 995   }
 996 #endif
 997   }
 998
 999   total_allocated += allocated;
1000   return allocated;
1001 }
1002
1003 /* Approximate the amount of live data in the heap.  To be called just
1004  * after garbage collection (see GarbageCollect()).
1005  */
1006 lnat
1007 calcLiveBlocks(void)
1008 {
1009   nat g, s;
1010   lnat live = 0;
1011   step *stp;
1012
1013   if (RtsFlags.GcFlags.generations == 1) {
1014       return g0s0->n_large_blocks + g0s0->n_blocks;
1015   }
1016
1017   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1018     for (s = 0; s < generations[g].n_steps; s++) {
1019       /* approximate amount of live data (doesn't take into account slop
1020        * at end of each block).
1021        */
1022       if (g == 0 && s == 0) {
1023           continue;
1024       }
1025       stp = &generations[g].steps[s];
1026       live += stp->n_large_blocks + stp->n_blocks;
1027     }
1028   }
1029   return live;
1030 }
1031
1032 lnat
1033 countOccupied(bdescr *bd)
1034 {
1035     lnat words;
1036
1037     words = 0;
1038     for (; bd != NULL; bd = bd->link) {
1039         ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1040         words += bd->free - bd->start;
1041     }
1042     return words;
1043 }
1044
1045 // Return an accurate count of the live data in the heap, excluding
1046 // generation 0.
1047 lnat
1048 calcLiveWords(void)
1049 {
1050     nat g, s;
1051     lnat live;
1052     step *stp;
1053
1054     if (RtsFlags.GcFlags.generations == 1) {
1055         return g0s0->n_words + countOccupied(g0s0->large_objects);
1056     }
1057
1058     live = 0;
1059     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1060         for (s = 0; s < generations[g].n_steps; s++) {
1061             if (g == 0 && s == 0) continue;
1062             stp = &generations[g].steps[s];
1063             live += stp->n_words + countOccupied(stp->large_objects);
1064         }
1065     }
1066     return live;
1067 }
1068
1069 /* Approximate the number of blocks that will be needed at the next
1070  * garbage collection.
1071  *
1072  * Assume: all data currently live will remain live.  Steps that will
1073  * be collected next time will therefore need twice as many blocks
1074  * since all the data will be copied.
1075  */
1076 extern lnat
1077 calcNeeded(void)
1078 {
1079     lnat needed = 0;
1080     nat g, s;
1081     step *stp;
1082
1083     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1084         for (s = 0; s < generations[g].n_steps; s++) {
1085             if (g == 0 && s == 0) { continue; }
1086             stp = &generations[g].steps[s];
1087
1088             // we need at least this much space
1089             needed += stp->n_blocks + stp->n_large_blocks;
1090
1091             // any additional space needed to collect this gen next time?
1092             if (g == 0 || // always collect gen 0
1093                 (generations[g].steps[0].n_blocks +
1094                  generations[g].steps[0].n_large_blocks
1095                  > generations[g].max_blocks)) {
1096                 // we will collect this gen next time
1097                 if (stp->mark) {
1098                     //  bitmap:
1099                     needed += stp->n_blocks / BITS_IN(W_);
1100                     //  mark stack:
1101                     needed += stp->n_blocks / 100;
1102                 }
1103                 if (stp->compact) {
1104                     continue; // no additional space needed for compaction
1105                 } else {
1106                     needed += stp->n_blocks;
1107                 }
1108             }
1109         }
1110     }
1111     return needed;
1112 }
1113
1114 /* ----------------------------------------------------------------------------
1115    Executable memory
1116
1117    Executable memory must be managed separately from non-executable
1118    memory.  Most OSs these days require you to jump through hoops to
1119    dynamically allocate executable memory, due to various security
1120    measures.
1121
1122    Here we provide a small memory allocator for executable memory.
1123    Memory is managed with a page granularity; we allocate linearly
1124    in the page, and when the page is emptied (all objects on the page
1125    are free) we free the page again, not forgetting to make it
1126    non-executable.
1127
1128    TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1129          the linker cannot use allocateExec for loading object code files
1130          on Windows. Once allocateExec can handle larger objects, the linker
1131          should be modified to use allocateExec instead of VirtualAlloc.
1132    ------------------------------------------------------------------------- */
1133
1134 static bdescr *exec_block;
1135
1136 void *allocateExec (nat bytes)
1137 {
1138     void *ret;
1139     nat n;
1140
1141     ACQUIRE_SM_LOCK;
1142
1143     // round up to words.
1144     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
1145
1146     if (n+1 > BLOCK_SIZE_W) {
1147         barf("allocateExec: can't handle large objects");
1148     }
1149
1150     if (exec_block == NULL ||
1151         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1152         bdescr *bd;
1153         lnat pagesize = getPageSize();
1154         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1155         debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1156         bd->gen_no = 0;
1157         bd->flags = BF_EXEC;
1158         bd->link = exec_block;
1159         if (exec_block != NULL) {
1160             exec_block->u.back = bd;
1161         }
1162         bd->u.back = NULL;
1163         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1164         exec_block = bd;
1165     }
1166     *(exec_block->free) = n;  // store the size of this chunk
1167     exec_block->gen_no += n;  // gen_no stores the number of words allocated
1168     ret = exec_block->free + 1;
1169     exec_block->free += n + 1;
1170
1171     RELEASE_SM_LOCK
1172     return ret;
1173 }
1174
1175 void freeExec (void *addr)
1176 {
1177     StgPtr p = (StgPtr)addr - 1;
1178     bdescr *bd = Bdescr((StgPtr)p);
1179
1180     if ((bd->flags & BF_EXEC) == 0) {
1181         barf("freeExec: not executable");
1182     }
1183
1184     if (*(StgPtr)p == 0) {
1185         barf("freeExec: already free?");
1186     }
1187
1188     ACQUIRE_SM_LOCK;
1189
1190     bd->gen_no -= *(StgPtr)p;
1191     *(StgPtr)p = 0;
1192
1193     if (bd->gen_no == 0) {
1194         // Free the block if it is empty, but not if it is the block at
1195         // the head of the queue.
1196         if (bd != exec_block) {
1197             debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1198             dbl_link_remove(bd, &exec_block);
1199             setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1200             freeGroup(bd);
1201         } else {
1202             bd->free = bd->start;
1203         }
1204     }
1205
1206     RELEASE_SM_LOCK
1207 }
1208
1209 /* -----------------------------------------------------------------------------
1210    Debugging
1211
1212    memInventory() checks for memory leaks by counting up all the
1213    blocks we know about and comparing that to the number of blocks
1214    allegedly floating around in the system.
1215    -------------------------------------------------------------------------- */
1216
1217 #ifdef DEBUG
1218
1219 // Useful for finding partially full blocks in gdb
1220 void findSlop(bdescr *bd);
1221 void findSlop(bdescr *bd)
1222 {
1223     lnat slop;
1224
1225     for (; bd != NULL; bd = bd->link) {
1226         slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1227         if (slop > (1024/sizeof(W_))) {
1228             debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1229                        bd->start, bd, slop / (1024/sizeof(W_)));
1230         }
1231     }
1232 }
1233
1234 nat
1235 countBlocks(bdescr *bd)
1236 {
1237     nat n;
1238     for (n=0; bd != NULL; bd=bd->link) {
1239         n += bd->blocks;
1240     }
1241     return n;
1242 }
1243
1244 // (*1) Just like countBlocks, except that we adjust the count for a
1245 // megablock group so that it doesn't include the extra few blocks
1246 // that would be taken up by block descriptors in the second and
1247 // subsequent megablock.  This is so we can tally the count with the
1248 // number of blocks allocated in the system, for memInventory().
1249 static nat
1250 countAllocdBlocks(bdescr *bd)
1251 {
1252     nat n;
1253     for (n=0; bd != NULL; bd=bd->link) {
1254         n += bd->blocks;
1255         // hack for megablock groups: see (*1) above
1256         if (bd->blocks > BLOCKS_PER_MBLOCK) {
1257             n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1258                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1259         }
1260     }
1261     return n;
1262 }
1263
1264 static lnat
1265 stepBlocks (step *stp)
1266 {
1267     ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1268     ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1269     return stp->n_blocks + stp->n_old_blocks +
1270             countAllocdBlocks(stp->large_objects);
1271 }
1272
1273 void
1274 memInventory (rtsBool show)
1275 {
1276   nat g, s, i;
1277   step *stp;
1278   lnat gen_blocks[RtsFlags.GcFlags.generations];
1279   lnat nursery_blocks, retainer_blocks,
1280        arena_blocks, exec_blocks;
1281   lnat live_blocks = 0, free_blocks = 0;
1282   rtsBool leak;
1283
1284   // count the blocks we current have
1285
1286   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1287       gen_blocks[g] = 0;
1288       for (i = 0; i < n_capabilities; i++) {
1289           gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1290       }
1291       gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1292       for (s = 0; s < generations[g].n_steps; s++) {
1293           stp = &generations[g].steps[s];
1294           gen_blocks[g] += stepBlocks(stp);
1295       }
1296   }
1297
1298   nursery_blocks = 0;
1299   for (i = 0; i < n_nurseries; i++) {
1300       nursery_blocks += stepBlocks(&nurseries[i]);
1301   }
1302
1303   retainer_blocks = 0;
1304 #ifdef PROFILING
1305   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1306       retainer_blocks = retainerStackBlocks();
1307   }
1308 #endif
1309
1310   // count the blocks allocated by the arena allocator
1311   arena_blocks = arenaBlocks();
1312
1313   // count the blocks containing executable memory
1314   exec_blocks = countAllocdBlocks(exec_block);
1315
1316   /* count the blocks on the free list */
1317   free_blocks = countFreeList();
1318
1319   live_blocks = 0;
1320   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1321       live_blocks += gen_blocks[g];
1322   }
1323   live_blocks += nursery_blocks +
1324                + retainer_blocks + arena_blocks + exec_blocks;
1325
1326 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1327
1328   leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1329
1330   ASSERT(n_alloc_blocks == live_blocks);
1331
1332   if (show || leak)
1333   {
1334       if (leak) {
1335           debugBelch("Memory leak detected:\n");
1336       } else {
1337           debugBelch("Memory inventory:\n");
1338       }
1339       for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1340           debugBelch("  gen %d blocks : %5lu blocks (%lu MB)\n", g,
1341                      gen_blocks[g], MB(gen_blocks[g]));
1342       }
1343       debugBelch("  nursery      : %5lu blocks (%lu MB)\n",
1344                  nursery_blocks, MB(nursery_blocks));
1345       debugBelch("  retainer     : %5lu blocks (%lu MB)\n",
1346                  retainer_blocks, MB(retainer_blocks));
1347       debugBelch("  arena blocks : %5lu blocks (%lu MB)\n",
1348                  arena_blocks, MB(arena_blocks));
1349       debugBelch("  exec         : %5lu blocks (%lu MB)\n",
1350                  exec_blocks, MB(exec_blocks));
1351       debugBelch("  free         : %5lu blocks (%lu MB)\n",
1352                  free_blocks, MB(free_blocks));
1353       debugBelch("  total        : %5lu blocks (%lu MB)\n",
1354                  live_blocks + free_blocks, MB(live_blocks+free_blocks));
1355       if (leak) {
1356           debugBelch("\n  in system    : %5lu blocks (%lu MB)\n",
1357                      mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1358       }
1359   }
1360 }
1361
1362
1363 /* Full heap sanity check. */
1364 void
1365 checkSanity( void )
1366 {
1367     nat g, s;
1368
1369     if (RtsFlags.GcFlags.generations == 1) {
1370         checkHeap(g0s0->blocks);
1371         checkChain(g0s0->large_objects);
1372     } else {
1373
1374         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1375             for (s = 0; s < generations[g].n_steps; s++) {
1376                 if (g == 0 && s == 0) { continue; }
1377                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1378                        == generations[g].steps[s].n_blocks);
1379                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1380                        == generations[g].steps[s].n_large_blocks);
1381                 checkHeap(generations[g].steps[s].blocks);
1382                 checkChain(generations[g].steps[s].large_objects);
1383                 if (g > 0) {
1384                     checkMutableList(generations[g].mut_list, g);
1385                 }
1386             }
1387         }
1388
1389         for (s = 0; s < n_nurseries; s++) {
1390             ASSERT(countBlocks(nurseries[s].blocks)
1391                    == nurseries[s].n_blocks);
1392             ASSERT(countBlocks(nurseries[s].large_objects)
1393                    == nurseries[s].n_large_blocks);
1394         }
1395
1396         checkFreeListSanity();
1397     }
1398 }
1399
1400 /* Nursery sanity check */
1401 void
1402 checkNurserySanity( step *stp )
1403 {
1404     bdescr *bd, *prev;
1405     nat blocks = 0;
1406
1407     prev = NULL;
1408     for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1409         ASSERT(bd->u.back == prev);
1410         prev = bd;
1411         blocks += bd->blocks;
1412     }
1413     ASSERT(blocks == stp->n_blocks);
1414 }
1415
1416 // handy function for use in gdb, because Bdescr() is inlined.
1417 extern bdescr *_bdescr( StgPtr p );
1418
1419 bdescr *
1420 _bdescr( StgPtr p )
1421 {
1422     return Bdescr(p);
1423 }
1424
1425 #endif