rts/sm/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2008
   4  *
   5  * Storage manager front end
   6  *
   7  * Documentation on the architecture of the Storage Manager can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16
  17 #include "Storage.h"
  18 #include "RtsUtils.h"
  19 #include "Stats.h"
  20 #include "BlockAlloc.h"
  21 #include "Weak.h"
  22 #include "Sanity.h"
  23 #include "Arena.h"
  24 #include "Capability.h"
  25 #include "Schedule.h"
  26 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  27 #include "OSMem.h"
  28 #include "Trace.h"
  29 #include "GC.h"
  30 #include "Evac.h"
  31
  32 #include <string.h>
  33
  34 #include "ffi.h"
  35
  36 /*
  37  * All these globals require sm_mutex to access in THREADED_RTS mode.
  38  */
  39 StgClosure    *caf_list         = NULL;
  40 StgClosure    *revertible_caf_list = NULL;
  41 rtsBool       keepCAFs;
  42
  43 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  44 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  45 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  46
  47 static bdescr *exec_block;
  48
  49 generation *generations = NULL; /* all the generations */
  50 generation *g0          = NULL; /* generation 0, for convenience */
  51 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  52 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  53
  54 nat total_steps         = 0;
  55 step *all_steps         = NULL; /* single array of steps */
  56
  57 ullong total_allocated = 0;     /* total memory allocated during run */
  58
  59 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  60 step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
  61
  62 #ifdef THREADED_RTS
  63 /*
  64  * Storage manager mutex:  protects all the above state from
  65  * simultaneous access by two STG threads.
  66  */
  67 Mutex sm_mutex;
  68 #endif
  69
  70 static void allocNurseries ( void );
  71
  72 static void
  73 initStep (step *stp, int g, int s)
  74 {
  75     stp->no = s;
  76     stp->abs_no = RtsFlags.GcFlags.steps * g + s;
  77     stp->blocks = NULL;
  78     stp->n_blocks = 0;
  79     stp->n_words = 0;
  80     stp->live_estimate = 0;
  81     stp->old_blocks = NULL;
  82     stp->n_old_blocks = 0;
  83     stp->gen = &generations[g];
  84     stp->gen_no = g;
  85     stp->large_objects = NULL;
  86     stp->n_large_blocks = 0;
  87     stp->scavenged_large_objects = NULL;
  88     stp->n_scavenged_large_blocks = 0;
  89     stp->mark = 0;
  90     stp->compact = 0;
  91     stp->bitmap = NULL;
  92 #ifdef THREADED_RTS
  93     initSpinLock(&stp->sync_large_objects);
  94 #endif
  95     stp->threads = END_TSO_QUEUE;
  96     stp->old_threads = END_TSO_QUEUE;
  97 }
  98
  99 void
 100 initStorage( void )
 101 {
 102   nat g, s;
 103   generation *gen;
 104
 105   if (generations != NULL) {
 106       // multi-init protection
 107       return;
 108   }
 109
 110   initMBlocks();
 111
 112   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 113    * doing something reasonable.
 114    */
 115   /* We use the NOT_NULL variant or gcc warns that the test is always true */
 116   ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLACKHOLE_info));
 117   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 118   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 119
 120   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 121       RtsFlags.GcFlags.heapSizeSuggestion >
 122       RtsFlags.GcFlags.maxHeapSize) {
 123     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 124   }
 125
 126   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 127       RtsFlags.GcFlags.minAllocAreaSize >
 128       RtsFlags.GcFlags.maxHeapSize) {
 129       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 130       RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
 131   }
 132
 133   initBlockAllocator();
 134
 135 #if defined(THREADED_RTS)
 136   initMutex(&sm_mutex);
 137 #endif
 138
 139   ACQUIRE_SM_LOCK;
 140
 141   /* allocate generation info array */
 142   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 143                                              * sizeof(struct generation_),
 144                                              "initStorage: gens");
 145
 146   /* allocate all the steps into an array.  It is important that we do
 147      it this way, because we need the invariant that two step pointers
 148      can be directly compared to see which is the oldest.
 149      Remember that the last generation has only one step. */
 150   total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
 151   all_steps   = stgMallocBytes(total_steps * sizeof(struct step_),
 152                                "initStorage: steps");
 153
 154   /* Initialise all generations */
 155   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 156     gen = &generations[g];
 157     gen->no = g;
 158     gen->mut_list = allocBlock();
 159     gen->collections = 0;
 160     gen->par_collections = 0;
 161     gen->failed_promotions = 0;
 162     gen->max_blocks = 0;
 163   }
 164
 165   /* A couple of convenience pointers */
 166   g0 = &generations[0];
 167   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 168
 169   /* Allocate step structures in each generation */
 170   if (RtsFlags.GcFlags.generations > 1) {
 171     /* Only for multiple-generations */
 172
 173     /* Oldest generation: one step */
 174     oldest_gen->n_steps = 1;
 175     oldest_gen->steps   = all_steps + (RtsFlags.GcFlags.generations - 1)
 176                                       * RtsFlags.GcFlags.steps;
 177
 178     /* set up all except the oldest generation with 2 steps */
 179     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 180       generations[g].n_steps = RtsFlags.GcFlags.steps;
 181       generations[g].steps   = all_steps + g * RtsFlags.GcFlags.steps;
 182     }
 183
 184   } else {
 185     /* single generation, i.e. a two-space collector */
 186     g0->n_steps = 1;
 187     g0->steps   = all_steps;
 188   }
 189
 190 #ifdef THREADED_RTS
 191   n_nurseries = n_capabilities;
 192 #else
 193   n_nurseries = 1;
 194 #endif
 195   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 196                               "initStorage: nurseries");
 197
 198   /* Initialise all steps */
 199   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 200     for (s = 0; s < generations[g].n_steps; s++) {
 201         initStep(&generations[g].steps[s], g, s);
 202     }
 203   }
 204
 205   for (s = 0; s < n_nurseries; s++) {
 206       initStep(&nurseries[s], 0, s);
 207   }
 208
 209   /* Set up the destination pointers in each younger gen. step */
 210   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 211     for (s = 0; s < generations[g].n_steps-1; s++) {
 212       generations[g].steps[s].to = &generations[g].steps[s+1];
 213     }
 214     generations[g].steps[s].to = &generations[g+1].steps[0];
 215   }
 216   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 217
 218   for (s = 0; s < n_nurseries; s++) {
 219       nurseries[s].to = generations[0].steps[0].to;
 220   }
 221
 222   /* The oldest generation has one step. */
 223   if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
 224       if (RtsFlags.GcFlags.generations == 1) {
 225           errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
 226       } else {
 227           oldest_gen->steps[0].mark = 1;
 228           if (RtsFlags.GcFlags.compact)
 229               oldest_gen->steps[0].compact = 1;
 230       }
 231   }
 232
 233   generations[0].max_blocks = 0;
 234   g0s0 = &generations[0].steps[0];
 235
 236   /* The allocation area.  Policy: keep the allocation area
 237    * small to begin with, even if we have a large suggested heap
 238    * size.  Reason: we're going to do a major collection first, and we
 239    * don't want it to be a big one.  This vague idea is borne out by
 240    * rigorous experimental evidence.
 241    */
 242   allocNurseries();
 243
 244   weak_ptr_list = NULL;
 245   caf_list = NULL;
 246   revertible_caf_list = NULL;
 247
 248   /* initialise the allocate() interface */
 249   alloc_blocks = 0;
 250   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 251
 252   exec_block = NULL;
 253
 254 #ifdef THREADED_RTS
 255   initSpinLock(&gc_alloc_block_sync);
 256   whitehole_spin = 0;
 257 #endif
 258
 259   N = 0;
 260
 261   initGcThreads();
 262
 263   IF_DEBUG(gc, statDescribeGens());
 264
 265   RELEASE_SM_LOCK;
 266 }
 267
 268 void
 269 exitStorage (void)
 270 {
 271     stat_exit(calcAllocated());
 272 }
 273
 274 void
 275 freeStorage (void)
 276 {
 277     stgFree(g0s0); // frees all the steps
 278     stgFree(generations);
 279     freeAllMBlocks();
 280 #if defined(THREADED_RTS)
 281     closeMutex(&sm_mutex);
 282 #endif
 283     stgFree(nurseries);
 284     freeGcThreads();
 285 }
 286
 287 /* -----------------------------------------------------------------------------
 288    CAF management.
 289
 290    The entry code for every CAF does the following:
 291
 292       - builds a CAF_BLACKHOLE in the heap
 293       - pushes an update frame pointing to the CAF_BLACKHOLE
 294       - invokes UPD_CAF(), which:
 295           - calls newCaf, below
 296           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 297
 298    Why do we build a BLACKHOLE in the heap rather than just updating
 299    the thunk directly?  It's so that we only need one kind of update
 300    frame - otherwise we'd need a static version of the update frame too.
 301
 302    newCaf() does the following:
 303
 304       - it puts the CAF on the oldest generation's mut-once list.
 305         This is so that we can treat the CAF as a root when collecting
 306         younger generations.
 307
 308    For GHCI, we have additional requirements when dealing with CAFs:
 309
 310       - we must *retain* all dynamically-loaded CAFs ever entered,
 311         just in case we need them again.
 312       - we must be able to *revert* CAFs that have been evaluated, to
 313         their pre-evaluated form.
 314
 315       To do this, we use an additional CAF list.  When newCaf() is
 316       called on a dynamically-loaded CAF, we add it to the CAF list
 317       instead of the old-generation mutable list, and save away its
 318       old info pointer (in caf->saved_info) for later reversion.
 319
 320       To revert all the CAFs, we traverse the CAF list and reset the
 321       info pointer to caf->saved_info, then throw away the CAF list.
 322       (see GC.c:revertCAFs()).
 323
 324       -- SDM 29/1/01
 325
 326    -------------------------------------------------------------------------- */
 327
 328 void
 329 newCAF(StgClosure* caf)
 330 {
 331   ACQUIRE_SM_LOCK;
 332
 333   if(keepCAFs)
 334   {
 335     // HACK:
 336     // If we are in GHCi _and_ we are using dynamic libraries,
 337     // then we can't redirect newCAF calls to newDynCAF (see below),
 338     // so we make newCAF behave almost like newDynCAF.
 339     // The dynamic libraries might be used by both the interpreted
 340     // program and GHCi itself, so they must not be reverted.
 341     // This also means that in GHCi with dynamic libraries, CAFs are not
 342     // garbage collected. If this turns out to be a problem, we could
 343     // do another hack here and do an address range test on caf to figure
 344     // out whether it is from a dynamic library.
 345     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 346     ((StgIndStatic *)caf)->static_link = caf_list;
 347     caf_list = caf;
 348   }
 349   else
 350   {
 351     /* Put this CAF on the mutable list for the old generation.
 352     * This is a HACK - the IND_STATIC closure doesn't really have
 353     * a mut_link field, but we pretend it has - in fact we re-use
 354     * the STATIC_LINK field for the time being, because when we
 355     * come to do a major GC we won't need the mut_link field
 356     * any more and can use it as a STATIC_LINK.
 357     */
 358     ((StgIndStatic *)caf)->saved_info = NULL;
 359     recordMutableGen(caf, oldest_gen->no);
 360   }
 361
 362   RELEASE_SM_LOCK;
 363 }
 364
 365 // An alternate version of newCaf which is used for dynamically loaded
 366 // object code in GHCi.  In this case we want to retain *all* CAFs in
 367 // the object code, because they might be demanded at any time from an
 368 // expression evaluated on the command line.
 369 // Also, GHCi might want to revert CAFs, so we add these to the
 370 // revertible_caf_list.
 371 //
 372 // The linker hackily arranges that references to newCaf from dynamic
 373 // code end up pointing to newDynCAF.
 374 void
 375 newDynCAF(StgClosure *caf)
 376 {
 377     ACQUIRE_SM_LOCK;
 378
 379     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 380     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 381     revertible_caf_list = caf;
 382
 383     RELEASE_SM_LOCK;
 384 }
 385
 386 /* -----------------------------------------------------------------------------
 387    Nursery management.
 388    -------------------------------------------------------------------------- */
 389
 390 static bdescr *
 391 allocNursery (step *stp, bdescr *tail, nat blocks)
 392 {
 393     bdescr *bd;
 394     nat i;
 395
 396     // Allocate a nursery: we allocate fresh blocks one at a time and
 397     // cons them on to the front of the list, not forgetting to update
 398     // the back pointer on the tail of the list to point to the new block.
 399     for (i=0; i < blocks; i++) {
 400         // @LDV profiling
 401         /*
 402           processNursery() in LdvProfile.c assumes that every block group in
 403           the nursery contains only a single block. So, if a block group is
 404           given multiple blocks, change processNursery() accordingly.
 405         */
 406         bd = allocBlock();
 407         bd->link = tail;
 408         // double-link the nursery: we might need to insert blocks
 409         if (tail != NULL) {
 410             tail->u.back = bd;
 411         }
 412         bd->step = stp;
 413         bd->gen_no = 0;
 414         bd->flags = 0;
 415         bd->free = bd->start;
 416         tail = bd;
 417     }
 418     tail->u.back = NULL;
 419     return tail;
 420 }
 421
 422 static void
 423 assignNurseriesToCapabilities (void)
 424 {
 425 #ifdef THREADED_RTS
 426     nat i;
 427
 428     for (i = 0; i < n_nurseries; i++) {
 429         capabilities[i].r.rNursery        = &nurseries[i];
 430         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 431         capabilities[i].r.rCurrentAlloc   = NULL;
 432     }
 433 #else /* THREADED_RTS */
 434     MainCapability.r.rNursery        = &nurseries[0];
 435     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 436     MainCapability.r.rCurrentAlloc   = NULL;
 437 #endif
 438 }
 439
 440 static void
 441 allocNurseries( void )
 442 {
 443     nat i;
 444
 445     for (i = 0; i < n_nurseries; i++) {
 446         nurseries[i].blocks =
 447             allocNursery(&nurseries[i], NULL,
 448                          RtsFlags.GcFlags.minAllocAreaSize);
 449         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 450         nurseries[i].old_blocks   = NULL;
 451         nurseries[i].n_old_blocks = 0;
 452     }
 453     assignNurseriesToCapabilities();
 454 }
 455
 456 void
 457 resetNurseries( void )
 458 {
 459     nat i;
 460     bdescr *bd;
 461     step *stp;
 462
 463     for (i = 0; i < n_nurseries; i++) {
 464         stp = &nurseries[i];
 465         for (bd = stp->blocks; bd; bd = bd->link) {
 466             bd->free = bd->start;
 467             ASSERT(bd->gen_no == 0);
 468             ASSERT(bd->step == stp);
 469             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 470         }
 471     }
 472     assignNurseriesToCapabilities();
 473 }
 474
 475 lnat
 476 countNurseryBlocks (void)
 477 {
 478     nat i;
 479     lnat blocks = 0;
 480
 481     for (i = 0; i < n_nurseries; i++) {
 482         blocks += nurseries[i].n_blocks;
 483     }
 484     return blocks;
 485 }
 486
 487 static void
 488 resizeNursery ( step *stp, nat blocks )
 489 {
 490   bdescr *bd;
 491   nat nursery_blocks;
 492
 493   nursery_blocks = stp->n_blocks;
 494   if (nursery_blocks == blocks) return;
 495
 496   if (nursery_blocks < blocks) {
 497       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
 498                  blocks);
 499     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 500   }
 501   else {
 502     bdescr *next_bd;
 503
 504     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
 505                blocks);
 506
 507     bd = stp->blocks;
 508     while (nursery_blocks > blocks) {
 509         next_bd = bd->link;
 510         next_bd->u.back = NULL;
 511         nursery_blocks -= bd->blocks; // might be a large block
 512         freeGroup(bd);
 513         bd = next_bd;
 514     }
 515     stp->blocks = bd;
 516     // might have gone just under, by freeing a large block, so make
 517     // up the difference.
 518     if (nursery_blocks < blocks) {
 519         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 520     }
 521   }
 522
 523   stp->n_blocks = blocks;
 524   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 525 }
 526
 527 //
 528 // Resize each of the nurseries to the specified size.
 529 //
 530 void
 531 resizeNurseriesFixed (nat blocks)
 532 {
 533     nat i;
 534     for (i = 0; i < n_nurseries; i++) {
 535         resizeNursery(&nurseries[i], blocks);
 536     }
 537 }
 538
 539 //
 540 // Resize the nurseries to the total specified size.
 541 //
 542 void
 543 resizeNurseries (nat blocks)
 544 {
 545     // If there are multiple nurseries, then we just divide the number
 546     // of available blocks between them.
 547     resizeNurseriesFixed(blocks / n_nurseries);
 548 }
 549
 550
 551 /* -----------------------------------------------------------------------------
 552    move_TSO is called to update the TSO structure after it has been
 553    moved from one place to another.
 554    -------------------------------------------------------------------------- */
 555
 556 void
 557 move_TSO (StgTSO *src, StgTSO *dest)
 558 {
 559     ptrdiff_t diff;
 560
 561     // relocate the stack pointer...
 562     diff = (StgPtr)dest - (StgPtr)src; // In *words*
 563     dest->sp = (StgPtr)dest->sp + diff;
 564 }
 565
 566 /* -----------------------------------------------------------------------------
 567    The allocate() interface
 568
 569    allocateInGen() function allocates memory directly into a specific
 570    generation.  It always succeeds, and returns a chunk of memory n
 571    words long.  n can be larger than the size of a block if necessary,
 572    in which case a contiguous block group will be allocated.
 573
 574    allocate(n) is equivalent to allocateInGen(g0).
 575    -------------------------------------------------------------------------- */
 576
 577 StgPtr
 578 allocateInGen (generation *g, lnat n)
 579 {
 580     step *stp;
 581     bdescr *bd;
 582     StgPtr ret;
 583
 584     ACQUIRE_SM_LOCK;
 585
 586     TICK_ALLOC_HEAP_NOCTR(n);
 587     CCS_ALLOC(CCCS,n);
 588
 589     stp = &g->steps[0];
 590
 591     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
 592     {
 593         lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 594
 595         // Attempting to allocate an object larger than maxHeapSize
 596         // should definitely be disallowed.  (bug #1791)
 597         if (RtsFlags.GcFlags.maxHeapSize > 0 &&
 598             req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
 599             heapOverflow();
 600             // heapOverflow() doesn't exit (see #2592), but we aren't
 601             // in a position to do a clean shutdown here: we
 602             // either have to allocate the memory or exit now.
 603             // Allocating the memory would be bad, because the user
 604             // has requested that we not exceed maxHeapSize, so we
 605             // just exit.
 606             stg_exit(EXIT_HEAPOVERFLOW);
 607         }
 608
 609         bd = allocGroup(req_blocks);
 610         dbl_link_onto(bd, &stp->large_objects);
 611         stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
 612         alloc_blocks += bd->blocks;
 613         bd->gen_no  = g->no;
 614         bd->step = stp;
 615         bd->flags = BF_LARGE;
 616         bd->free = bd->start + n;
 617         ret = bd->start;
 618     }
 619     else
 620     {
 621         // small allocation (<LARGE_OBJECT_THRESHOLD) */
 622         bd = stp->blocks;
 623         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 624             bd = allocBlock();
 625             bd->gen_no = g->no;
 626             bd->step = stp;
 627             bd->flags = 0;
 628             bd->link = stp->blocks;
 629             stp->blocks = bd;
 630             stp->n_blocks++;
 631             alloc_blocks++;
 632         }
 633         ret = bd->free;
 634         bd->free += n;
 635     }
 636
 637     RELEASE_SM_LOCK;
 638
 639     return ret;
 640 }
 641
 642 StgPtr
 643 allocate (lnat n)
 644 {
 645     return allocateInGen(g0,n);
 646 }
 647
 648 lnat
 649 allocatedBytes( void )
 650 {
 651     lnat allocated;
 652
 653     allocated = alloc_blocks * BLOCK_SIZE_W;
 654     if (pinned_object_block != NULL) {
 655         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 656             pinned_object_block->free;
 657     }
 658
 659     return allocated;
 660 }
 661
 662 // split N blocks off the front of the given bdescr, returning the
 663 // new block group.  We treat the remainder as if it
 664 // had been freshly allocated in generation 0.
 665 bdescr *
 666 splitLargeBlock (bdescr *bd, nat blocks)
 667 {
 668     bdescr *new_bd;
 669
 670     // subtract the original number of blocks from the counter first
 671     bd->step->n_large_blocks -= bd->blocks;
 672
 673     new_bd = splitBlockGroup (bd, blocks);
 674
 675     dbl_link_onto(new_bd, &g0s0->large_objects);
 676     g0s0->n_large_blocks += new_bd->blocks;
 677     new_bd->gen_no  = g0s0->no;
 678     new_bd->step    = g0s0;
 679     new_bd->flags   = BF_LARGE;
 680     new_bd->free    = bd->free;
 681     ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
 682
 683     // add the new number of blocks to the counter.  Due to the gaps
 684     // for block descriptor, new_bd->blocks + bd->blocks might not be
 685     // equal to the original bd->blocks, which is why we do it this way.
 686     bd->step->n_large_blocks += bd->blocks;
 687
 688     return new_bd;
 689 }
 690
 691 /* -----------------------------------------------------------------------------
 692    allocateLocal()
 693
 694    This allocates memory in the current thread - it is intended for
 695    use primarily from STG-land where we have a Capability.  It is
 696    better than allocate() because it doesn't require taking the
 697    sm_mutex lock in the common case.
 698
 699    Memory is allocated directly from the nursery if possible (but not
 700    from the current nursery block, so as not to interfere with
 701    Hp/HpLim).
 702    -------------------------------------------------------------------------- */
 703
 704 StgPtr
 705 allocateLocal (Capability *cap, lnat n)
 706 {
 707     bdescr *bd;
 708     StgPtr p;
 709
 710     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 711         return allocateInGen(g0,n);
 712     }
 713
 714     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 715
 716     TICK_ALLOC_HEAP_NOCTR(n);
 717     CCS_ALLOC(CCCS,n);
 718
 719     bd = cap->r.rCurrentAlloc;
 720     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 721
 722         // The CurrentAlloc block is full, we need to find another
 723         // one.  First, we try taking the next block from the
 724         // nursery:
 725         bd = cap->r.rCurrentNursery->link;
 726
 727         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 728             // The nursery is empty, or the next block is already
 729             // full: allocate a fresh block (we can't fail here).
 730             ACQUIRE_SM_LOCK;
 731             bd = allocBlock();
 732             cap->r.rNursery->n_blocks++;
 733             RELEASE_SM_LOCK;
 734             bd->gen_no = 0;
 735             bd->step = cap->r.rNursery;
 736             bd->flags = 0;
 737             // NO: alloc_blocks++;
 738             // calcAllocated() uses the size of the nursery, and we've
 739             // already bumpted nursery->n_blocks above.  We'll GC
 740             // pretty quickly now anyway, because MAYBE_GC() will
 741             // notice that CurrentNursery->link is NULL.
 742         } else {
 743             // we have a block in the nursery: take it and put
 744             // it at the *front* of the nursery list, and use it
 745             // to allocate() from.
 746             cap->r.rCurrentNursery->link = bd->link;
 747             if (bd->link != NULL) {
 748                 bd->link->u.back = cap->r.rCurrentNursery;
 749             }
 750         }
 751         dbl_link_onto(bd, &cap->r.rNursery->blocks);
 752         cap->r.rCurrentAlloc = bd;
 753         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 754     }
 755     p = bd->free;
 756     bd->free += n;
 757     return p;
 758 }
 759
 760 /* ---------------------------------------------------------------------------
 761    Allocate a fixed/pinned object.
 762
 763    We allocate small pinned objects into a single block, allocating a
 764    new block when the current one overflows.  The block is chained
 765    onto the large_object_list of generation 0 step 0.
 766
 767    NOTE: The GC can't in general handle pinned objects.  This
 768    interface is only safe to use for ByteArrays, which have no
 769    pointers and don't require scavenging.  It works because the
 770    block's descriptor has the BF_LARGE flag set, so the block is
 771    treated as a large object and chained onto various lists, rather
 772    than the individual objects being copied.  However, when it comes
 773    to scavenge the block, the GC will only scavenge the first object.
 774    The reason is that the GC can't linearly scan a block of pinned
 775    objects at the moment (doing so would require using the
 776    mostly-copying techniques).  But since we're restricting ourselves
 777    to pinned ByteArrays, not scavenging is ok.
 778
 779    This function is called by newPinnedByteArray# which immediately
 780    fills the allocated memory with a MutableByteArray#.
 781    ------------------------------------------------------------------------- */
 782
 783 StgPtr
 784 allocatePinned( lnat n )
 785 {
 786     StgPtr p;
 787     bdescr *bd = pinned_object_block;
 788
 789     // If the request is for a large object, then allocate()
 790     // will give us a pinned object anyway.
 791     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 792         p = allocate(n);
 793         Bdescr(p)->flags |= BF_PINNED;
 794         return p;
 795     }
 796
 797     ACQUIRE_SM_LOCK;
 798
 799     TICK_ALLOC_HEAP_NOCTR(n);
 800     CCS_ALLOC(CCCS,n);
 801
 802     // If we don't have a block of pinned objects yet, or the current
 803     // one isn't large enough to hold the new object, allocate a new one.
 804     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 805         pinned_object_block = bd = allocBlock();
 806         dbl_link_onto(bd, &g0s0->large_objects);
 807         g0s0->n_large_blocks++;
 808         bd->gen_no = 0;
 809         bd->step   = g0s0;
 810         bd->flags  = BF_PINNED | BF_LARGE;
 811         bd->free   = bd->start;
 812         alloc_blocks++;
 813     }
 814
 815     p = bd->free;
 816     bd->free += n;
 817     RELEASE_SM_LOCK;
 818     return p;
 819 }
 820
 821 /* -----------------------------------------------------------------------------
 822    Write Barriers
 823    -------------------------------------------------------------------------- */
 824
 825 /*
 826    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 827    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 828    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 829    and is put on the mutable list.
 830 */
 831 void
 832 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 833 {
 834     Capability *cap = regTableToCapability(reg);
 835     bdescr *bd;
 836     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 837         p->header.info = &stg_MUT_VAR_DIRTY_info;
 838         bd = Bdescr((StgPtr)p);
 839         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 840     }
 841 }
 842
 843 // Setting a TSO's link field with a write barrier.
 844 // It is *not* necessary to call this function when
 845 //    * setting the link field to END_TSO_QUEUE
 846 //    * putting a TSO on the blackhole_queue
 847 //    * setting the link field of the currently running TSO, as it
 848 //      will already be dirty.
 849 void
 850 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
 851 {
 852     bdescr *bd;
 853     if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
 854         tso->flags |= TSO_LINK_DIRTY;
 855         bd = Bdescr((StgPtr)tso);
 856         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 857     }
 858     tso->_link = target;
 859 }
 860
 861 void
 862 dirty_TSO (Capability *cap, StgTSO *tso)
 863 {
 864     bdescr *bd;
 865     if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
 866         bd = Bdescr((StgPtr)tso);
 867         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 868     }
 869     tso->dirty = 1;
 870 }
 871
 872 /*
 873    This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
 874    on the mutable list; a MVAR_DIRTY is.  When written to, a
 875    MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
 876    The check for MVAR_CLEAN is inlined at the call site for speed,
 877    this really does make a difference on concurrency-heavy benchmarks
 878    such as Chaneneos and cheap-concurrency.
 879 */
 880 void
 881 dirty_MVAR(StgRegTable *reg, StgClosure *p)
 882 {
 883     Capability *cap = regTableToCapability(reg);
 884     bdescr *bd;
 885     bd = Bdescr((StgPtr)p);
 886     if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 887 }
 888
 889 /* -----------------------------------------------------------------------------
 890  * Stats and stuff
 891  * -------------------------------------------------------------------------- */
 892
 893 /* -----------------------------------------------------------------------------
 894  * calcAllocated()
 895  *
 896  * Approximate how much we've allocated: number of blocks in the
 897  * nursery + blocks allocated via allocate() - unused nusery blocks.
 898  * This leaves a little slop at the end of each block, and doesn't
 899  * take into account large objects (ToDo).
 900  * -------------------------------------------------------------------------- */
 901
 902 lnat
 903 calcAllocated( void )
 904 {
 905   nat allocated;
 906   bdescr *bd;
 907
 908   allocated = allocatedBytes();
 909   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 910
 911   {
 912 #ifdef THREADED_RTS
 913   nat i;
 914   for (i = 0; i < n_nurseries; i++) {
 915       Capability *cap;
 916       for ( bd = capabilities[i].r.rCurrentNursery->link;
 917             bd != NULL; bd = bd->link ) {
 918           allocated -= BLOCK_SIZE_W;
 919       }
 920       cap = &capabilities[i];
 921       if (cap->r.rCurrentNursery->free <
 922           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 923           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 924               - cap->r.rCurrentNursery->free;
 925       }
 926   }
 927 #else
 928   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 929
 930   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 931       allocated -= BLOCK_SIZE_W;
 932   }
 933   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 934       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 935           - current_nursery->free;
 936   }
 937 #endif
 938   }
 939
 940   total_allocated += allocated;
 941   return allocated;
 942 }
 943
 944 /* Approximate the amount of live data in the heap.  To be called just
 945  * after garbage collection (see GarbageCollect()).
 946  */
 947 lnat
 948 calcLiveBlocks(void)
 949 {
 950   nat g, s;
 951   lnat live = 0;
 952   step *stp;
 953
 954   if (RtsFlags.GcFlags.generations == 1) {
 955       return g0s0->n_large_blocks + g0s0->n_blocks;
 956   }
 957
 958   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 959     for (s = 0; s < generations[g].n_steps; s++) {
 960       /* approximate amount of live data (doesn't take into account slop
 961        * at end of each block).
 962        */
 963       if (g == 0 && s == 0) {
 964           continue;
 965       }
 966       stp = &generations[g].steps[s];
 967       live += stp->n_large_blocks + stp->n_blocks;
 968     }
 969   }
 970   return live;
 971 }
 972
 973 lnat
 974 countOccupied(bdescr *bd)
 975 {
 976     lnat words;
 977
 978     words = 0;
 979     for (; bd != NULL; bd = bd->link) {
 980         ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
 981         words += bd->free - bd->start;
 982     }
 983     return words;
 984 }
 985
 986 // Return an accurate count of the live data in the heap, excluding
 987 // generation 0.
 988 lnat
 989 calcLiveWords(void)
 990 {
 991     nat g, s;
 992     lnat live;
 993     step *stp;
 994
 995     if (RtsFlags.GcFlags.generations == 1) {
 996         return g0s0->n_words + countOccupied(g0s0->large_objects);
 997     }
 998
 999     live = 0;
1000     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1001         for (s = 0; s < generations[g].n_steps; s++) {
1002             if (g == 0 && s == 0) continue;
1003             stp = &generations[g].steps[s];
1004             live += stp->n_words + countOccupied(stp->large_objects);
1005         }
1006     }
1007     return live;
1008 }
1009
1010 /* Approximate the number of blocks that will be needed at the next
1011  * garbage collection.
1012  *
1013  * Assume: all data currently live will remain live.  Steps that will
1014  * be collected next time will therefore need twice as many blocks
1015  * since all the data will be copied.
1016  */
1017 extern lnat
1018 calcNeeded(void)
1019 {
1020     lnat needed = 0;
1021     nat g, s;
1022     step *stp;
1023
1024     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1025         for (s = 0; s < generations[g].n_steps; s++) {
1026             if (g == 0 && s == 0) { continue; }
1027             stp = &generations[g].steps[s];
1028
1029             // we need at least this much space
1030             needed += stp->n_blocks + stp->n_large_blocks;
1031
1032             // any additional space needed to collect this gen next time?
1033             if (g == 0 || // always collect gen 0
1034                 (generations[g].steps[0].n_blocks +
1035                  generations[g].steps[0].n_large_blocks
1036                  > generations[g].max_blocks)) {
1037                 // we will collect this gen next time
1038                 if (stp->mark) {
1039                     //  bitmap:
1040                     needed += stp->n_blocks / BITS_IN(W_);
1041                     //  mark stack:
1042                     needed += stp->n_blocks / 100;
1043                 }
1044                 if (stp->compact) {
1045                     continue; // no additional space needed for compaction
1046                 } else {
1047                     needed += stp->n_blocks;
1048                 }
1049             }
1050         }
1051     }
1052     return needed;
1053 }
1054
1055 /* ----------------------------------------------------------------------------
1056    Executable memory
1057
1058    Executable memory must be managed separately from non-executable
1059    memory.  Most OSs these days require you to jump through hoops to
1060    dynamically allocate executable memory, due to various security
1061    measures.
1062
1063    Here we provide a small memory allocator for executable memory.
1064    Memory is managed with a page granularity; we allocate linearly
1065    in the page, and when the page is emptied (all objects on the page
1066    are free) we free the page again, not forgetting to make it
1067    non-executable.
1068
1069    TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1070          the linker cannot use allocateExec for loading object code files
1071          on Windows. Once allocateExec can handle larger objects, the linker
1072          should be modified to use allocateExec instead of VirtualAlloc.
1073    ------------------------------------------------------------------------- */
1074
1075 #if defined(linux_HOST_OS)
1076
1077 // On Linux we need to use libffi for allocating executable memory,
1078 // because it knows how to work around the restrictions put in place
1079 // by SELinux.
1080
1081 void *allocateExec (nat bytes, void **exec_ret)
1082 {
1083     void **ret, **exec;
1084     ACQUIRE_SM_LOCK;
1085     ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1086     RELEASE_SM_LOCK;
1087     if (ret == NULL) return ret;
1088     *ret = ret; // save the address of the writable mapping, for freeExec().
1089     *exec_ret = exec + 1;
1090     return (ret + 1);
1091 }
1092
1093 // freeExec gets passed the executable address, not the writable address.
1094 void freeExec (void *addr)
1095 {
1096     void *writable;
1097     writable = *((void**)addr - 1);
1098     ACQUIRE_SM_LOCK;
1099     ffi_closure_free (writable);
1100     RELEASE_SM_LOCK
1101 }
1102
1103 #else
1104
1105 void *allocateExec (nat bytes, void **exec_ret)
1106 {
1107     void *ret;
1108     nat n;
1109
1110     ACQUIRE_SM_LOCK;
1111
1112     // round up to words.
1113     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
1114
1115     if (n+1 > BLOCK_SIZE_W) {
1116         barf("allocateExec: can't handle large objects");
1117     }
1118
1119     if (exec_block == NULL ||
1120         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1121         bdescr *bd;
1122         lnat pagesize = getPageSize();
1123         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1124         debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1125         bd->gen_no = 0;
1126         bd->flags = BF_EXEC;
1127         bd->link = exec_block;
1128         if (exec_block != NULL) {
1129             exec_block->u.back = bd;
1130         }
1131         bd->u.back = NULL;
1132         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1133         exec_block = bd;
1134     }
1135     *(exec_block->free) = n;  // store the size of this chunk
1136     exec_block->gen_no += n;  // gen_no stores the number of words allocated
1137     ret = exec_block->free + 1;
1138     exec_block->free += n + 1;
1139
1140     RELEASE_SM_LOCK
1141     *exec_ret = ret;
1142     return ret;
1143 }
1144
1145 void freeExec (void *addr)
1146 {
1147     StgPtr p = (StgPtr)addr - 1;
1148     bdescr *bd = Bdescr((StgPtr)p);
1149
1150     if ((bd->flags & BF_EXEC) == 0) {
1151         barf("freeExec: not executable");
1152     }
1153
1154     if (*(StgPtr)p == 0) {
1155         barf("freeExec: already free?");
1156     }
1157
1158     ACQUIRE_SM_LOCK;
1159
1160     bd->gen_no -= *(StgPtr)p;
1161     *(StgPtr)p = 0;
1162
1163     if (bd->gen_no == 0) {
1164         // Free the block if it is empty, but not if it is the block at
1165         // the head of the queue.
1166         if (bd != exec_block) {
1167             debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1168             dbl_link_remove(bd, &exec_block);
1169             setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1170             freeGroup(bd);
1171         } else {
1172             bd->free = bd->start;
1173         }
1174     }
1175
1176     RELEASE_SM_LOCK
1177 }
1178
1179 #endif /* mingw32_HOST_OS */
1180
1181 /* -----------------------------------------------------------------------------
1182    Debugging
1183
1184    memInventory() checks for memory leaks by counting up all the
1185    blocks we know about and comparing that to the number of blocks
1186    allegedly floating around in the system.
1187    -------------------------------------------------------------------------- */
1188
1189 #ifdef DEBUG
1190
1191 // Useful for finding partially full blocks in gdb
1192 void findSlop(bdescr *bd);
1193 void findSlop(bdescr *bd)
1194 {
1195     lnat slop;
1196
1197     for (; bd != NULL; bd = bd->link) {
1198         slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1199         if (slop > (1024/sizeof(W_))) {
1200             debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1201                        bd->start, bd, slop / (1024/sizeof(W_)));
1202         }
1203     }
1204 }
1205
1206 nat
1207 countBlocks(bdescr *bd)
1208 {
1209     nat n;
1210     for (n=0; bd != NULL; bd=bd->link) {
1211         n += bd->blocks;
1212     }
1213     return n;
1214 }
1215
1216 // (*1) Just like countBlocks, except that we adjust the count for a
1217 // megablock group so that it doesn't include the extra few blocks
1218 // that would be taken up by block descriptors in the second and
1219 // subsequent megablock.  This is so we can tally the count with the
1220 // number of blocks allocated in the system, for memInventory().
1221 static nat
1222 countAllocdBlocks(bdescr *bd)
1223 {
1224     nat n;
1225     for (n=0; bd != NULL; bd=bd->link) {
1226         n += bd->blocks;
1227         // hack for megablock groups: see (*1) above
1228         if (bd->blocks > BLOCKS_PER_MBLOCK) {
1229             n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1230                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1231         }
1232     }
1233     return n;
1234 }
1235
1236 static lnat
1237 stepBlocks (step *stp)
1238 {
1239     ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1240     ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1241     return stp->n_blocks + stp->n_old_blocks +
1242             countAllocdBlocks(stp->large_objects);
1243 }
1244
1245 // If memInventory() calculates that we have a memory leak, this
1246 // function will try to find the block(s) that are leaking by marking
1247 // all the ones that we know about, and search through memory to find
1248 // blocks that are not marked.  In the debugger this can help to give
1249 // us a clue about what kind of block leaked.  In the future we might
1250 // annotate blocks with their allocation site to give more helpful
1251 // info.
1252 static void
1253 findMemoryLeak (void)
1254 {
1255   nat g, s, i;
1256   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1257       for (i = 0; i < n_capabilities; i++) {
1258           markBlocks(capabilities[i].mut_lists[g]);
1259       }
1260       markBlocks(generations[g].mut_list);
1261       for (s = 0; s < generations[g].n_steps; s++) {
1262           markBlocks(generations[g].steps[s].blocks);
1263           markBlocks(generations[g].steps[s].large_objects);
1264       }
1265   }
1266
1267   for (i = 0; i < n_nurseries; i++) {
1268       markBlocks(nurseries[i].blocks);
1269       markBlocks(nurseries[i].large_objects);
1270   }
1271
1272 #ifdef PROFILING
1273   // TODO:
1274   // if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1275   //    markRetainerBlocks();
1276   // }
1277 #endif
1278
1279   // count the blocks allocated by the arena allocator
1280   // TODO:
1281   // markArenaBlocks();
1282
1283   // count the blocks containing executable memory
1284   markBlocks(exec_block);
1285
1286   reportUnmarkedBlocks();
1287 }
1288
1289
1290 void
1291 memInventory (rtsBool show)
1292 {
1293   nat g, s, i;
1294   step *stp;
1295   lnat gen_blocks[RtsFlags.GcFlags.generations];
1296   lnat nursery_blocks, retainer_blocks,
1297        arena_blocks, exec_blocks;
1298   lnat live_blocks = 0, free_blocks = 0;
1299   rtsBool leak;
1300
1301   // count the blocks we current have
1302
1303   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1304       gen_blocks[g] = 0;
1305       for (i = 0; i < n_capabilities; i++) {
1306           gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1307       }
1308       gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1309       for (s = 0; s < generations[g].n_steps; s++) {
1310           stp = &generations[g].steps[s];
1311           gen_blocks[g] += stepBlocks(stp);
1312       }
1313   }
1314
1315   nursery_blocks = 0;
1316   for (i = 0; i < n_nurseries; i++) {
1317       nursery_blocks += stepBlocks(&nurseries[i]);
1318   }
1319
1320   retainer_blocks = 0;
1321 #ifdef PROFILING
1322   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1323       retainer_blocks = retainerStackBlocks();
1324   }
1325 #endif
1326
1327   // count the blocks allocated by the arena allocator
1328   arena_blocks = arenaBlocks();
1329
1330   // count the blocks containing executable memory
1331   exec_blocks = countAllocdBlocks(exec_block);
1332
1333   /* count the blocks on the free list */
1334   free_blocks = countFreeList();
1335
1336   live_blocks = 0;
1337   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1338       live_blocks += gen_blocks[g];
1339   }
1340   live_blocks += nursery_blocks +
1341                + retainer_blocks + arena_blocks + exec_blocks;
1342
1343 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1344
1345   leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1346
1347   if (show || leak)
1348   {
1349       if (leak) {
1350           debugBelch("Memory leak detected:\n");
1351       } else {
1352           debugBelch("Memory inventory:\n");
1353       }
1354       for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1355           debugBelch("  gen %d blocks : %5lu blocks (%lu MB)\n", g,
1356                      gen_blocks[g], MB(gen_blocks[g]));
1357       }
1358       debugBelch("  nursery      : %5lu blocks (%lu MB)\n",
1359                  nursery_blocks, MB(nursery_blocks));
1360       debugBelch("  retainer     : %5lu blocks (%lu MB)\n",
1361                  retainer_blocks, MB(retainer_blocks));
1362       debugBelch("  arena blocks : %5lu blocks (%lu MB)\n",
1363                  arena_blocks, MB(arena_blocks));
1364       debugBelch("  exec         : %5lu blocks (%lu MB)\n",
1365                  exec_blocks, MB(exec_blocks));
1366       debugBelch("  free         : %5lu blocks (%lu MB)\n",
1367                  free_blocks, MB(free_blocks));
1368       debugBelch("  total        : %5lu blocks (%lu MB)\n",
1369                  live_blocks + free_blocks, MB(live_blocks+free_blocks));
1370       if (leak) {
1371           debugBelch("\n  in system    : %5lu blocks (%lu MB)\n",
1372                      mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1373       }
1374   }
1375
1376   if (leak) {
1377       debugBelch("\n");
1378       findMemoryLeak();
1379   }
1380   ASSERT(n_alloc_blocks == live_blocks);
1381   ASSERT(!leak);
1382 }
1383
1384
1385 /* Full heap sanity check. */
1386 void
1387 checkSanity( void )
1388 {
1389     nat g, s;
1390
1391     if (RtsFlags.GcFlags.generations == 1) {
1392         checkHeap(g0s0->blocks);
1393         checkLargeObjects(g0s0->large_objects);
1394     } else {
1395
1396         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1397             for (s = 0; s < generations[g].n_steps; s++) {
1398                 if (g == 0 && s == 0) { continue; }
1399                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1400                        == generations[g].steps[s].n_blocks);
1401                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1402                        == generations[g].steps[s].n_large_blocks);
1403                 checkHeap(generations[g].steps[s].blocks);
1404                 checkLargeObjects(generations[g].steps[s].large_objects);
1405             }
1406         }
1407
1408         for (s = 0; s < n_nurseries; s++) {
1409             ASSERT(countBlocks(nurseries[s].blocks)
1410                    == nurseries[s].n_blocks);
1411             ASSERT(countBlocks(nurseries[s].large_objects)
1412                    == nurseries[s].n_large_blocks);
1413         }
1414
1415         checkFreeListSanity();
1416     }
1417
1418 #if defined(THREADED_RTS)
1419     // check the stacks too in threaded mode, because we don't do a
1420     // full heap sanity check in this case (see checkHeap())
1421     checkMutableLists(rtsTrue);
1422 #else
1423     checkMutableLists(rtsFalse);
1424 #endif
1425 }
1426
1427 /* Nursery sanity check */
1428 void
1429 checkNurserySanity( step *stp )
1430 {
1431     bdescr *bd, *prev;
1432     nat blocks = 0;
1433
1434     prev = NULL;
1435     for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1436         ASSERT(bd->u.back == prev);
1437         prev = bd;
1438         blocks += bd->blocks;
1439     }
1440     ASSERT(blocks == stp->n_blocks);
1441 }
1442
1443 // handy function for use in gdb, because Bdescr() is inlined.
1444 extern bdescr *_bdescr( StgPtr p );
1445
1446 bdescr *
1447 _bdescr( StgPtr p )
1448 {
1449     return Bdescr(p);
1450 }
1451
1452 #endif