rts/sm/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2008
   4  *
   5  * Storage manager front end
   6  *
   7  * Documentation on the architecture of the Storage Manager can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16
  17 #include "Storage.h"
  18 #include "RtsUtils.h"
  19 #include "Stats.h"
  20 #include "BlockAlloc.h"
  21 #include "Weak.h"
  22 #include "Sanity.h"
  23 #include "Arena.h"
  24 #include "Capability.h"
  25 #include "Schedule.h"
  26 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  27 #include "OSMem.h"
  28 #include "Trace.h"
  29 #include "GC.h"
  30 #include "Evac.h"
  31
  32 #include <string.h>
  33
  34 #include "ffi.h"
  35
  36 /*
  37  * All these globals require sm_mutex to access in THREADED_RTS mode.
  38  */
  39 StgClosure    *caf_list         = NULL;
  40 StgClosure    *revertible_caf_list = NULL;
  41 rtsBool       keepCAFs;
  42
  43 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  44 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  45 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  46
  47 static bdescr *exec_block;
  48
  49 generation *generations = NULL; /* all the generations */
  50 generation *g0          = NULL; /* generation 0, for convenience */
  51 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  52 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  53
  54 nat total_steps         = 0;
  55 step *all_steps         = NULL; /* single array of steps */
  56
  57 ullong total_allocated = 0;     /* total memory allocated during run */
  58
  59 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  60 step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
  61
  62 #ifdef THREADED_RTS
  63 /*
  64  * Storage manager mutex:  protects all the above state from
  65  * simultaneous access by two STG threads.
  66  */
  67 Mutex sm_mutex;
  68 #endif
  69
  70 static void allocNurseries ( void );
  71
  72 static void
  73 initStep (step *stp, int g, int s)
  74 {
  75     stp->no = s;
  76     stp->abs_no = RtsFlags.GcFlags.steps * g + s;
  77     stp->blocks = NULL;
  78     stp->n_blocks = 0;
  79     stp->n_words = 0;
  80     stp->live_estimate = 0;
  81     stp->old_blocks = NULL;
  82     stp->n_old_blocks = 0;
  83     stp->gen = &generations[g];
  84     stp->gen_no = g;
  85     stp->large_objects = NULL;
  86     stp->n_large_blocks = 0;
  87     stp->scavenged_large_objects = NULL;
  88     stp->n_scavenged_large_blocks = 0;
  89     stp->mark = 0;
  90     stp->compact = 0;
  91     stp->bitmap = NULL;
  92 #ifdef THREADED_RTS
  93     initSpinLock(&stp->sync_large_objects);
  94 #endif
  95     stp->threads = END_TSO_QUEUE;
  96     stp->old_threads = END_TSO_QUEUE;
  97 }
  98
  99 void
 100 initStorage( void )
 101 {
 102   nat g, s;
 103   generation *gen;
 104
 105   if (generations != NULL) {
 106       // multi-init protection
 107       return;
 108   }
 109
 110   initMBlocks();
 111
 112   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 113    * doing something reasonable.
 114    */
 115   /* We use the NOT_NULL variant or gcc warns that the test is always true */
 116   ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLACKHOLE_info));
 117   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 118   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 119
 120   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 121       RtsFlags.GcFlags.heapSizeSuggestion >
 122       RtsFlags.GcFlags.maxHeapSize) {
 123     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 124   }
 125
 126   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 127       RtsFlags.GcFlags.minAllocAreaSize >
 128       RtsFlags.GcFlags.maxHeapSize) {
 129       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 130       RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
 131   }
 132
 133   initBlockAllocator();
 134
 135 #if defined(THREADED_RTS)
 136   initMutex(&sm_mutex);
 137 #endif
 138
 139   ACQUIRE_SM_LOCK;
 140
 141   /* allocate generation info array */
 142   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 143                                              * sizeof(struct generation_),
 144                                              "initStorage: gens");
 145
 146   /* allocate all the steps into an array.  It is important that we do
 147      it this way, because we need the invariant that two step pointers
 148      can be directly compared to see which is the oldest.
 149      Remember that the last generation has only one step. */
 150   total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
 151   all_steps   = stgMallocBytes(total_steps * sizeof(struct step_),
 152                                "initStorage: steps");
 153
 154   /* Initialise all generations */
 155   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 156     gen = &generations[g];
 157     gen->no = g;
 158     gen->mut_list = allocBlock();
 159     gen->collections = 0;
 160     gen->par_collections = 0;
 161     gen->failed_promotions = 0;
 162     gen->max_blocks = 0;
 163   }
 164
 165   /* A couple of convenience pointers */
 166   g0 = &generations[0];
 167   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 168
 169   /* Allocate step structures in each generation */
 170   if (RtsFlags.GcFlags.generations > 1) {
 171     /* Only for multiple-generations */
 172
 173     /* Oldest generation: one step */
 174     oldest_gen->n_steps = 1;
 175     oldest_gen->steps   = all_steps + (RtsFlags.GcFlags.generations - 1)
 176                                       * RtsFlags.GcFlags.steps;
 177
 178     /* set up all except the oldest generation with 2 steps */
 179     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 180       generations[g].n_steps = RtsFlags.GcFlags.steps;
 181       generations[g].steps   = all_steps + g * RtsFlags.GcFlags.steps;
 182     }
 183
 184   } else {
 185     /* single generation, i.e. a two-space collector */
 186     g0->n_steps = 1;
 187     g0->steps   = all_steps;
 188   }
 189
 190 #ifdef THREADED_RTS
 191   n_nurseries = n_capabilities;
 192 #else
 193   n_nurseries = 1;
 194 #endif
 195   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 196                               "initStorage: nurseries");
 197
 198   /* Initialise all steps */
 199   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 200     for (s = 0; s < generations[g].n_steps; s++) {
 201         initStep(&generations[g].steps[s], g, s);
 202     }
 203   }
 204
 205   for (s = 0; s < n_nurseries; s++) {
 206       initStep(&nurseries[s], 0, s);
 207   }
 208
 209   /* Set up the destination pointers in each younger gen. step */
 210   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 211     for (s = 0; s < generations[g].n_steps-1; s++) {
 212       generations[g].steps[s].to = &generations[g].steps[s+1];
 213     }
 214     generations[g].steps[s].to = &generations[g+1].steps[0];
 215   }
 216   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 217
 218   for (s = 0; s < n_nurseries; s++) {
 219       nurseries[s].to = generations[0].steps[0].to;
 220   }
 221
 222   /* The oldest generation has one step. */
 223   if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
 224       if (RtsFlags.GcFlags.generations == 1) {
 225           errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
 226       } else {
 227           oldest_gen->steps[0].mark = 1;
 228           if (RtsFlags.GcFlags.compact)
 229               oldest_gen->steps[0].compact = 1;
 230       }
 231   }
 232
 233   generations[0].max_blocks = 0;
 234   g0s0 = &generations[0].steps[0];
 235
 236   /* The allocation area.  Policy: keep the allocation area
 237    * small to begin with, even if we have a large suggested heap
 238    * size.  Reason: we're going to do a major collection first, and we
 239    * don't want it to be a big one.  This vague idea is borne out by
 240    * rigorous experimental evidence.
 241    */
 242   allocNurseries();
 243
 244   weak_ptr_list = NULL;
 245   caf_list = NULL;
 246   revertible_caf_list = NULL;
 247
 248   /* initialise the allocate() interface */
 249   alloc_blocks = 0;
 250   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 251
 252   exec_block = NULL;
 253
 254 #ifdef THREADED_RTS
 255   initSpinLock(&gc_alloc_block_sync);
 256   whitehole_spin = 0;
 257 #endif
 258
 259   N = 0;
 260
 261   initGcThreads();
 262
 263   IF_DEBUG(gc, statDescribeGens());
 264
 265   RELEASE_SM_LOCK;
 266 }
 267
 268 void
 269 exitStorage (void)
 270 {
 271     stat_exit(calcAllocated());
 272 }
 273
 274 void
 275 freeStorage (void)
 276 {
 277     stgFree(g0s0); // frees all the steps
 278     stgFree(generations);
 279     freeAllMBlocks();
 280 #if defined(THREADED_RTS)
 281     closeMutex(&sm_mutex);
 282 #endif
 283     stgFree(nurseries);
 284     freeGcThreads();
 285 }
 286
 287 /* -----------------------------------------------------------------------------
 288    CAF management.
 289
 290    The entry code for every CAF does the following:
 291
 292       - builds a CAF_BLACKHOLE in the heap
 293       - pushes an update frame pointing to the CAF_BLACKHOLE
 294       - invokes UPD_CAF(), which:
 295           - calls newCaf, below
 296           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 297
 298    Why do we build a BLACKHOLE in the heap rather than just updating
 299    the thunk directly?  It's so that we only need one kind of update
 300    frame - otherwise we'd need a static version of the update frame too.
 301
 302    newCaf() does the following:
 303
 304       - it puts the CAF on the oldest generation's mut-once list.
 305         This is so that we can treat the CAF as a root when collecting
 306         younger generations.
 307
 308    For GHCI, we have additional requirements when dealing with CAFs:
 309
 310       - we must *retain* all dynamically-loaded CAFs ever entered,
 311         just in case we need them again.
 312       - we must be able to *revert* CAFs that have been evaluated, to
 313         their pre-evaluated form.
 314
 315       To do this, we use an additional CAF list.  When newCaf() is
 316       called on a dynamically-loaded CAF, we add it to the CAF list
 317       instead of the old-generation mutable list, and save away its
 318       old info pointer (in caf->saved_info) for later reversion.
 319
 320       To revert all the CAFs, we traverse the CAF list and reset the
 321       info pointer to caf->saved_info, then throw away the CAF list.
 322       (see GC.c:revertCAFs()).
 323
 324       -- SDM 29/1/01
 325
 326    -------------------------------------------------------------------------- */
 327
 328 void
 329 newCAF(StgClosure* caf)
 330 {
 331   ACQUIRE_SM_LOCK;
 332
 333 #ifdef DYNAMIC
 334   if(keepCAFs)
 335   {
 336     // HACK:
 337     // If we are in GHCi _and_ we are using dynamic libraries,
 338     // then we can't redirect newCAF calls to newDynCAF (see below),
 339     // so we make newCAF behave almost like newDynCAF.
 340     // The dynamic libraries might be used by both the interpreted
 341     // program and GHCi itself, so they must not be reverted.
 342     // This also means that in GHCi with dynamic libraries, CAFs are not
 343     // garbage collected. If this turns out to be a problem, we could
 344     // do another hack here and do an address range test on caf to figure
 345     // out whether it is from a dynamic library.
 346     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 347     ((StgIndStatic *)caf)->static_link = caf_list;
 348     caf_list = caf;
 349   }
 350   else
 351 #endif
 352   {
 353     /* Put this CAF on the mutable list for the old generation.
 354     * This is a HACK - the IND_STATIC closure doesn't really have
 355     * a mut_link field, but we pretend it has - in fact we re-use
 356     * the STATIC_LINK field for the time being, because when we
 357     * come to do a major GC we won't need the mut_link field
 358     * any more and can use it as a STATIC_LINK.
 359     */
 360     ((StgIndStatic *)caf)->saved_info = NULL;
 361     recordMutableGen(caf, oldest_gen->no);
 362   }
 363
 364   RELEASE_SM_LOCK;
 365 }
 366
 367 // An alternate version of newCaf which is used for dynamically loaded
 368 // object code in GHCi.  In this case we want to retain *all* CAFs in
 369 // the object code, because they might be demanded at any time from an
 370 // expression evaluated on the command line.
 371 // Also, GHCi might want to revert CAFs, so we add these to the
 372 // revertible_caf_list.
 373 //
 374 // The linker hackily arranges that references to newCaf from dynamic
 375 // code end up pointing to newDynCAF.
 376 void
 377 newDynCAF(StgClosure *caf)
 378 {
 379     ACQUIRE_SM_LOCK;
 380
 381     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 382     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 383     revertible_caf_list = caf;
 384
 385     RELEASE_SM_LOCK;
 386 }
 387
 388 /* -----------------------------------------------------------------------------
 389    Nursery management.
 390    -------------------------------------------------------------------------- */
 391
 392 static bdescr *
 393 allocNursery (step *stp, bdescr *tail, nat blocks)
 394 {
 395     bdescr *bd;
 396     nat i;
 397
 398     // Allocate a nursery: we allocate fresh blocks one at a time and
 399     // cons them on to the front of the list, not forgetting to update
 400     // the back pointer on the tail of the list to point to the new block.
 401     for (i=0; i < blocks; i++) {
 402         // @LDV profiling
 403         /*
 404           processNursery() in LdvProfile.c assumes that every block group in
 405           the nursery contains only a single block. So, if a block group is
 406           given multiple blocks, change processNursery() accordingly.
 407         */
 408         bd = allocBlock();
 409         bd->link = tail;
 410         // double-link the nursery: we might need to insert blocks
 411         if (tail != NULL) {
 412             tail->u.back = bd;
 413         }
 414         initBdescr(bd, stp);
 415         bd->flags = 0;
 416         bd->free = bd->start;
 417         tail = bd;
 418     }
 419     tail->u.back = NULL;
 420     return tail;
 421 }
 422
 423 static void
 424 assignNurseriesToCapabilities (void)
 425 {
 426 #ifdef THREADED_RTS
 427     nat i;
 428
 429     for (i = 0; i < n_nurseries; i++) {
 430         capabilities[i].r.rNursery        = &nurseries[i];
 431         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 432         capabilities[i].r.rCurrentAlloc   = NULL;
 433     }
 434 #else /* THREADED_RTS */
 435     MainCapability.r.rNursery        = &nurseries[0];
 436     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 437     MainCapability.r.rCurrentAlloc   = NULL;
 438 #endif
 439 }
 440
 441 static void
 442 allocNurseries( void )
 443 {
 444     nat i;
 445
 446     for (i = 0; i < n_nurseries; i++) {
 447         nurseries[i].blocks =
 448             allocNursery(&nurseries[i], NULL,
 449                          RtsFlags.GcFlags.minAllocAreaSize);
 450         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 451         nurseries[i].old_blocks   = NULL;
 452         nurseries[i].n_old_blocks = 0;
 453     }
 454     assignNurseriesToCapabilities();
 455 }
 456
 457 void
 458 resetNurseries( void )
 459 {
 460     nat i;
 461     bdescr *bd;
 462     step *stp;
 463
 464     for (i = 0; i < n_nurseries; i++) {
 465         stp = &nurseries[i];
 466         for (bd = stp->blocks; bd; bd = bd->link) {
 467             bd->free = bd->start;
 468             ASSERT(bd->gen_no == 0);
 469             ASSERT(bd->step == stp);
 470             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 471         }
 472     }
 473     assignNurseriesToCapabilities();
 474 }
 475
 476 lnat
 477 countNurseryBlocks (void)
 478 {
 479     nat i;
 480     lnat blocks = 0;
 481
 482     for (i = 0; i < n_nurseries; i++) {
 483         blocks += nurseries[i].n_blocks;
 484     }
 485     return blocks;
 486 }
 487
 488 static void
 489 resizeNursery ( step *stp, nat blocks )
 490 {
 491   bdescr *bd;
 492   nat nursery_blocks;
 493
 494   nursery_blocks = stp->n_blocks;
 495   if (nursery_blocks == blocks) return;
 496
 497   if (nursery_blocks < blocks) {
 498       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
 499                  blocks);
 500     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 501   }
 502   else {
 503     bdescr *next_bd;
 504
 505     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
 506                blocks);
 507
 508     bd = stp->blocks;
 509     while (nursery_blocks > blocks) {
 510         next_bd = bd->link;
 511         next_bd->u.back = NULL;
 512         nursery_blocks -= bd->blocks; // might be a large block
 513         freeGroup(bd);
 514         bd = next_bd;
 515     }
 516     stp->blocks = bd;
 517     // might have gone just under, by freeing a large block, so make
 518     // up the difference.
 519     if (nursery_blocks < blocks) {
 520         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 521     }
 522   }
 523
 524   stp->n_blocks = blocks;
 525   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 526 }
 527
 528 //
 529 // Resize each of the nurseries to the specified size.
 530 //
 531 void
 532 resizeNurseriesFixed (nat blocks)
 533 {
 534     nat i;
 535     for (i = 0; i < n_nurseries; i++) {
 536         resizeNursery(&nurseries[i], blocks);
 537     }
 538 }
 539
 540 //
 541 // Resize the nurseries to the total specified size.
 542 //
 543 void
 544 resizeNurseries (nat blocks)
 545 {
 546     // If there are multiple nurseries, then we just divide the number
 547     // of available blocks between them.
 548     resizeNurseriesFixed(blocks / n_nurseries);
 549 }
 550
 551
 552 /* -----------------------------------------------------------------------------
 553    move_TSO is called to update the TSO structure after it has been
 554    moved from one place to another.
 555    -------------------------------------------------------------------------- */
 556
 557 void
 558 move_TSO (StgTSO *src, StgTSO *dest)
 559 {
 560     ptrdiff_t diff;
 561
 562     // relocate the stack pointer...
 563     diff = (StgPtr)dest - (StgPtr)src; // In *words*
 564     dest->sp = (StgPtr)dest->sp + diff;
 565 }
 566
 567 /* -----------------------------------------------------------------------------
 568    The allocate() interface
 569
 570    allocateInGen() function allocates memory directly into a specific
 571    generation.  It always succeeds, and returns a chunk of memory n
 572    words long.  n can be larger than the size of a block if necessary,
 573    in which case a contiguous block group will be allocated.
 574
 575    allocate(n) is equivalent to allocateInGen(g0).
 576    -------------------------------------------------------------------------- */
 577
 578 StgPtr
 579 allocateInGen (generation *g, lnat n)
 580 {
 581     step *stp;
 582     bdescr *bd;
 583     StgPtr ret;
 584
 585     ACQUIRE_SM_LOCK;
 586
 587     TICK_ALLOC_HEAP_NOCTR(n);
 588     CCS_ALLOC(CCCS,n);
 589
 590     stp = &g->steps[0];
 591
 592     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
 593     {
 594         lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 595
 596         // Attempting to allocate an object larger than maxHeapSize
 597         // should definitely be disallowed.  (bug #1791)
 598         if (RtsFlags.GcFlags.maxHeapSize > 0 &&
 599             req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
 600             heapOverflow();
 601             // heapOverflow() doesn't exit (see #2592), but we aren't
 602             // in a position to do a clean shutdown here: we
 603             // either have to allocate the memory or exit now.
 604             // Allocating the memory would be bad, because the user
 605             // has requested that we not exceed maxHeapSize, so we
 606             // just exit.
 607             stg_exit(EXIT_HEAPOVERFLOW);
 608         }
 609
 610         bd = allocGroup(req_blocks);
 611         dbl_link_onto(bd, &stp->large_objects);
 612         stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
 613         alloc_blocks += bd->blocks;
 614         initBdescr(bd, stp);
 615         bd->flags = BF_LARGE;
 616         bd->free = bd->start + n;
 617         ret = bd->start;
 618     }
 619     else
 620     {
 621         // small allocation (<LARGE_OBJECT_THRESHOLD) */
 622         bd = stp->blocks;
 623         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 624             bd = allocBlock();
 625             initBdescr(bd, stp);
 626             bd->flags = 0;
 627             bd->link = stp->blocks;
 628             stp->blocks = bd;
 629             stp->n_blocks++;
 630             alloc_blocks++;
 631         }
 632         ret = bd->free;
 633         bd->free += n;
 634     }
 635
 636     RELEASE_SM_LOCK;
 637
 638     return ret;
 639 }
 640
 641 StgPtr
 642 allocate (lnat n)
 643 {
 644     return allocateInGen(g0,n);
 645 }
 646
 647 lnat
 648 allocatedBytes( void )
 649 {
 650     lnat allocated;
 651
 652     allocated = alloc_blocks * BLOCK_SIZE_W;
 653     if (pinned_object_block != NULL) {
 654         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 655             pinned_object_block->free;
 656     }
 657
 658     return allocated;
 659 }
 660
 661 // split N blocks off the front of the given bdescr, returning the
 662 // new block group.  We treat the remainder as if it
 663 // had been freshly allocated in generation 0.
 664 bdescr *
 665 splitLargeBlock (bdescr *bd, nat blocks)
 666 {
 667     bdescr *new_bd;
 668
 669     // subtract the original number of blocks from the counter first
 670     bd->step->n_large_blocks -= bd->blocks;
 671
 672     new_bd = splitBlockGroup (bd, blocks);
 673
 674     dbl_link_onto(new_bd, &g0s0->large_objects);
 675     g0s0->n_large_blocks += new_bd->blocks;
 676     initBdescr(new_bd, g0s0);
 677     new_bd->flags   = BF_LARGE;
 678     new_bd->free    = bd->free;
 679     ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
 680
 681     // add the new number of blocks to the counter.  Due to the gaps
 682     // for block descriptor, new_bd->blocks + bd->blocks might not be
 683     // equal to the original bd->blocks, which is why we do it this way.
 684     bd->step->n_large_blocks += bd->blocks;
 685
 686     return new_bd;
 687 }
 688
 689 /* -----------------------------------------------------------------------------
 690    allocateLocal()
 691
 692    This allocates memory in the current thread - it is intended for
 693    use primarily from STG-land where we have a Capability.  It is
 694    better than allocate() because it doesn't require taking the
 695    sm_mutex lock in the common case.
 696
 697    Memory is allocated directly from the nursery if possible (but not
 698    from the current nursery block, so as not to interfere with
 699    Hp/HpLim).
 700    -------------------------------------------------------------------------- */
 701
 702 StgPtr
 703 allocateLocal (Capability *cap, lnat n)
 704 {
 705     bdescr *bd;
 706     StgPtr p;
 707
 708     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 709         return allocateInGen(g0,n);
 710     }
 711
 712     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 713
 714     TICK_ALLOC_HEAP_NOCTR(n);
 715     CCS_ALLOC(CCCS,n);
 716
 717     bd = cap->r.rCurrentAlloc;
 718     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 719
 720         // The CurrentAlloc block is full, we need to find another
 721         // one.  First, we try taking the next block from the
 722         // nursery:
 723         bd = cap->r.rCurrentNursery->link;
 724
 725         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 726             // The nursery is empty, or the next block is already
 727             // full: allocate a fresh block (we can't fail here).
 728             ACQUIRE_SM_LOCK;
 729             bd = allocBlock();
 730             cap->r.rNursery->n_blocks++;
 731             RELEASE_SM_LOCK;
 732             initBdescr(bd, cap->r.rNursery);
 733             bd->flags = 0;
 734             // NO: alloc_blocks++;
 735             // calcAllocated() uses the size of the nursery, and we've
 736             // already bumpted nursery->n_blocks above.  We'll GC
 737             // pretty quickly now anyway, because MAYBE_GC() will
 738             // notice that CurrentNursery->link is NULL.
 739         } else {
 740             // we have a block in the nursery: take it and put
 741             // it at the *front* of the nursery list, and use it
 742             // to allocate() from.
 743             cap->r.rCurrentNursery->link = bd->link;
 744             if (bd->link != NULL) {
 745                 bd->link->u.back = cap->r.rCurrentNursery;
 746             }
 747         }
 748         dbl_link_onto(bd, &cap->r.rNursery->blocks);
 749         cap->r.rCurrentAlloc = bd;
 750         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 751     }
 752     p = bd->free;
 753     bd->free += n;
 754     return p;
 755 }
 756
 757 /* ---------------------------------------------------------------------------
 758    Allocate a fixed/pinned object.
 759
 760    We allocate small pinned objects into a single block, allocating a
 761    new block when the current one overflows.  The block is chained
 762    onto the large_object_list of generation 0 step 0.
 763
 764    NOTE: The GC can't in general handle pinned objects.  This
 765    interface is only safe to use for ByteArrays, which have no
 766    pointers and don't require scavenging.  It works because the
 767    block's descriptor has the BF_LARGE flag set, so the block is
 768    treated as a large object and chained onto various lists, rather
 769    than the individual objects being copied.  However, when it comes
 770    to scavenge the block, the GC will only scavenge the first object.
 771    The reason is that the GC can't linearly scan a block of pinned
 772    objects at the moment (doing so would require using the
 773    mostly-copying techniques).  But since we're restricting ourselves
 774    to pinned ByteArrays, not scavenging is ok.
 775
 776    This function is called by newPinnedByteArray# which immediately
 777    fills the allocated memory with a MutableByteArray#.
 778    ------------------------------------------------------------------------- */
 779
 780 StgPtr
 781 allocatePinned( lnat n )
 782 {
 783     StgPtr p;
 784     bdescr *bd = pinned_object_block;
 785
 786     // If the request is for a large object, then allocate()
 787     // will give us a pinned object anyway.
 788     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 789         p = allocate(n);
 790         Bdescr(p)->flags |= BF_PINNED;
 791         return p;
 792     }
 793
 794     ACQUIRE_SM_LOCK;
 795
 796     TICK_ALLOC_HEAP_NOCTR(n);
 797     CCS_ALLOC(CCCS,n);
 798
 799     // If we don't have a block of pinned objects yet, or the current
 800     // one isn't large enough to hold the new object, allocate a new one.
 801     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 802         pinned_object_block = bd = allocBlock();
 803         dbl_link_onto(bd, &g0s0->large_objects);
 804         g0s0->n_large_blocks++;
 805         initBdescr(bd, g0s0);
 806         bd->flags  = BF_PINNED | BF_LARGE;
 807         bd->free   = bd->start;
 808         alloc_blocks++;
 809     }
 810
 811     p = bd->free;
 812     bd->free += n;
 813     RELEASE_SM_LOCK;
 814     return p;
 815 }
 816
 817 /* -----------------------------------------------------------------------------
 818    Write Barriers
 819    -------------------------------------------------------------------------- */
 820
 821 /*
 822    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 823    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 824    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 825    and is put on the mutable list.
 826 */
 827 void
 828 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 829 {
 830     Capability *cap = regTableToCapability(reg);
 831     bdescr *bd;
 832     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 833         p->header.info = &stg_MUT_VAR_DIRTY_info;
 834         bd = Bdescr((StgPtr)p);
 835         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 836     }
 837 }
 838
 839 // Setting a TSO's link field with a write barrier.
 840 // It is *not* necessary to call this function when
 841 //    * setting the link field to END_TSO_QUEUE
 842 //    * putting a TSO on the blackhole_queue
 843 //    * setting the link field of the currently running TSO, as it
 844 //      will already be dirty.
 845 void
 846 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
 847 {
 848     bdescr *bd;
 849     if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
 850         tso->flags |= TSO_LINK_DIRTY;
 851         bd = Bdescr((StgPtr)tso);
 852         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 853     }
 854     tso->_link = target;
 855 }
 856
 857 void
 858 dirty_TSO (Capability *cap, StgTSO *tso)
 859 {
 860     bdescr *bd;
 861     if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
 862         bd = Bdescr((StgPtr)tso);
 863         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 864     }
 865     tso->dirty = 1;
 866 }
 867
 868 /*
 869    This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
 870    on the mutable list; a MVAR_DIRTY is.  When written to, a
 871    MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
 872    The check for MVAR_CLEAN is inlined at the call site for speed,
 873    this really does make a difference on concurrency-heavy benchmarks
 874    such as Chaneneos and cheap-concurrency.
 875 */
 876 void
 877 dirty_MVAR(StgRegTable *reg, StgClosure *p)
 878 {
 879     Capability *cap = regTableToCapability(reg);
 880     bdescr *bd;
 881     bd = Bdescr((StgPtr)p);
 882     if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 883 }
 884
 885 /* -----------------------------------------------------------------------------
 886  * Stats and stuff
 887  * -------------------------------------------------------------------------- */
 888
 889 /* -----------------------------------------------------------------------------
 890  * calcAllocated()
 891  *
 892  * Approximate how much we've allocated: number of blocks in the
 893  * nursery + blocks allocated via allocate() - unused nusery blocks.
 894  * This leaves a little slop at the end of each block, and doesn't
 895  * take into account large objects (ToDo).
 896  * -------------------------------------------------------------------------- */
 897
 898 lnat
 899 calcAllocated( void )
 900 {
 901   nat allocated;
 902   bdescr *bd;
 903
 904   allocated = allocatedBytes();
 905   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 906
 907   {
 908 #ifdef THREADED_RTS
 909   nat i;
 910   for (i = 0; i < n_nurseries; i++) {
 911       Capability *cap;
 912       for ( bd = capabilities[i].r.rCurrentNursery->link;
 913             bd != NULL; bd = bd->link ) {
 914           allocated -= BLOCK_SIZE_W;
 915       }
 916       cap = &capabilities[i];
 917       if (cap->r.rCurrentNursery->free <
 918           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 919           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 920               - cap->r.rCurrentNursery->free;
 921       }
 922   }
 923 #else
 924   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 925
 926   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 927       allocated -= BLOCK_SIZE_W;
 928   }
 929   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 930       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 931           - current_nursery->free;
 932   }
 933 #endif
 934   }
 935
 936   total_allocated += allocated;
 937   return allocated;
 938 }
 939
 940 /* Approximate the amount of live data in the heap.  To be called just
 941  * after garbage collection (see GarbageCollect()).
 942  */
 943 lnat
 944 calcLiveBlocks(void)
 945 {
 946   nat g, s;
 947   lnat live = 0;
 948   step *stp;
 949
 950   if (RtsFlags.GcFlags.generations == 1) {
 951       return g0s0->n_large_blocks + g0s0->n_blocks;
 952   }
 953
 954   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 955     for (s = 0; s < generations[g].n_steps; s++) {
 956       /* approximate amount of live data (doesn't take into account slop
 957        * at end of each block).
 958        */
 959       if (g == 0 && s == 0) {
 960           continue;
 961       }
 962       stp = &generations[g].steps[s];
 963       live += stp->n_large_blocks + stp->n_blocks;
 964     }
 965   }
 966   return live;
 967 }
 968
 969 lnat
 970 countOccupied(bdescr *bd)
 971 {
 972     lnat words;
 973
 974     words = 0;
 975     for (; bd != NULL; bd = bd->link) {
 976         ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
 977         words += bd->free - bd->start;
 978     }
 979     return words;
 980 }
 981
 982 // Return an accurate count of the live data in the heap, excluding
 983 // generation 0.
 984 lnat
 985 calcLiveWords(void)
 986 {
 987     nat g, s;
 988     lnat live;
 989     step *stp;
 990
 991     if (RtsFlags.GcFlags.generations == 1) {
 992         return g0s0->n_words + countOccupied(g0s0->large_objects);
 993     }
 994
 995     live = 0;
 996     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 997         for (s = 0; s < generations[g].n_steps; s++) {
 998             if (g == 0 && s == 0) continue;
 999             stp = &generations[g].steps[s];
1000             live += stp->n_words + countOccupied(stp->large_objects);
1001         }
1002     }
1003     return live;
1004 }
1005
1006 /* Approximate the number of blocks that will be needed at the next
1007  * garbage collection.
1008  *
1009  * Assume: all data currently live will remain live.  Steps that will
1010  * be collected next time will therefore need twice as many blocks
1011  * since all the data will be copied.
1012  */
1013 extern lnat
1014 calcNeeded(void)
1015 {
1016     lnat needed = 0;
1017     nat g, s;
1018     step *stp;
1019
1020     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1021         for (s = 0; s < generations[g].n_steps; s++) {
1022             if (g == 0 && s == 0) { continue; }
1023             stp = &generations[g].steps[s];
1024
1025             // we need at least this much space
1026             needed += stp->n_blocks + stp->n_large_blocks;
1027
1028             // any additional space needed to collect this gen next time?
1029             if (g == 0 || // always collect gen 0
1030                 (generations[g].steps[0].n_blocks +
1031                  generations[g].steps[0].n_large_blocks
1032                  > generations[g].max_blocks)) {
1033                 // we will collect this gen next time
1034                 if (stp->mark) {
1035                     //  bitmap:
1036                     needed += stp->n_blocks / BITS_IN(W_);
1037                     //  mark stack:
1038                     needed += stp->n_blocks / 100;
1039                 }
1040                 if (stp->compact) {
1041                     continue; // no additional space needed for compaction
1042                 } else {
1043                     needed += stp->n_blocks;
1044                 }
1045             }
1046         }
1047     }
1048     return needed;
1049 }
1050
1051 /* ----------------------------------------------------------------------------
1052    Executable memory
1053
1054    Executable memory must be managed separately from non-executable
1055    memory.  Most OSs these days require you to jump through hoops to
1056    dynamically allocate executable memory, due to various security
1057    measures.
1058
1059    Here we provide a small memory allocator for executable memory.
1060    Memory is managed with a page granularity; we allocate linearly
1061    in the page, and when the page is emptied (all objects on the page
1062    are free) we free the page again, not forgetting to make it
1063    non-executable.
1064
1065    TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1066          the linker cannot use allocateExec for loading object code files
1067          on Windows. Once allocateExec can handle larger objects, the linker
1068          should be modified to use allocateExec instead of VirtualAlloc.
1069    ------------------------------------------------------------------------- */
1070
1071 #if defined(linux_HOST_OS)
1072
1073 // On Linux we need to use libffi for allocating executable memory,
1074 // because it knows how to work around the restrictions put in place
1075 // by SELinux.
1076
1077 void *allocateExec (nat bytes, void **exec_ret)
1078 {
1079     void **ret, **exec;
1080     ACQUIRE_SM_LOCK;
1081     ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1082     RELEASE_SM_LOCK;
1083     if (ret == NULL) return ret;
1084     *ret = ret; // save the address of the writable mapping, for freeExec().
1085     *exec_ret = exec + 1;
1086     return (ret + 1);
1087 }
1088
1089 // freeExec gets passed the executable address, not the writable address.
1090 void freeExec (void *addr)
1091 {
1092     void *writable;
1093     writable = *((void**)addr - 1);
1094     ACQUIRE_SM_LOCK;
1095     ffi_closure_free (writable);
1096     RELEASE_SM_LOCK
1097 }
1098
1099 #else
1100
1101 void *allocateExec (nat bytes, void **exec_ret)
1102 {
1103     void *ret;
1104     nat n;
1105
1106     ACQUIRE_SM_LOCK;
1107
1108     // round up to words.
1109     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
1110
1111     if (n+1 > BLOCK_SIZE_W) {
1112         barf("allocateExec: can't handle large objects");
1113     }
1114
1115     if (exec_block == NULL ||
1116         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1117         bdescr *bd;
1118         lnat pagesize = getPageSize();
1119         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1120         debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1121         bd->gen_no = 0;
1122         bd->flags = BF_EXEC;
1123         bd->link = exec_block;
1124         if (exec_block != NULL) {
1125             exec_block->u.back = bd;
1126         }
1127         bd->u.back = NULL;
1128         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1129         exec_block = bd;
1130     }
1131     *(exec_block->free) = n;  // store the size of this chunk
1132     exec_block->gen_no += n;  // gen_no stores the number of words allocated
1133     ret = exec_block->free + 1;
1134     exec_block->free += n + 1;
1135
1136     RELEASE_SM_LOCK
1137     *exec_ret = ret;
1138     return ret;
1139 }
1140
1141 void freeExec (void *addr)
1142 {
1143     StgPtr p = (StgPtr)addr - 1;
1144     bdescr *bd = Bdescr((StgPtr)p);
1145
1146     if ((bd->flags & BF_EXEC) == 0) {
1147         barf("freeExec: not executable");
1148     }
1149
1150     if (*(StgPtr)p == 0) {
1151         barf("freeExec: already free?");
1152     }
1153
1154     ACQUIRE_SM_LOCK;
1155
1156     bd->gen_no -= *(StgPtr)p;
1157     *(StgPtr)p = 0;
1158
1159     if (bd->gen_no == 0) {
1160         // Free the block if it is empty, but not if it is the block at
1161         // the head of the queue.
1162         if (bd != exec_block) {
1163             debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1164             dbl_link_remove(bd, &exec_block);
1165             setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1166             freeGroup(bd);
1167         } else {
1168             bd->free = bd->start;
1169         }
1170     }
1171
1172     RELEASE_SM_LOCK
1173 }
1174
1175 #endif /* mingw32_HOST_OS */
1176
1177 /* -----------------------------------------------------------------------------
1178    Debugging
1179
1180    memInventory() checks for memory leaks by counting up all the
1181    blocks we know about and comparing that to the number of blocks
1182    allegedly floating around in the system.
1183    -------------------------------------------------------------------------- */
1184
1185 #ifdef DEBUG
1186
1187 // Useful for finding partially full blocks in gdb
1188 void findSlop(bdescr *bd);
1189 void findSlop(bdescr *bd)
1190 {
1191     lnat slop;
1192
1193     for (; bd != NULL; bd = bd->link) {
1194         slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1195         if (slop > (1024/sizeof(W_))) {
1196             debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1197                        bd->start, bd, slop / (1024/sizeof(W_)));
1198         }
1199     }
1200 }
1201
1202 nat
1203 countBlocks(bdescr *bd)
1204 {
1205     nat n;
1206     for (n=0; bd != NULL; bd=bd->link) {
1207         n += bd->blocks;
1208     }
1209     return n;
1210 }
1211
1212 // (*1) Just like countBlocks, except that we adjust the count for a
1213 // megablock group so that it doesn't include the extra few blocks
1214 // that would be taken up by block descriptors in the second and
1215 // subsequent megablock.  This is so we can tally the count with the
1216 // number of blocks allocated in the system, for memInventory().
1217 static nat
1218 countAllocdBlocks(bdescr *bd)
1219 {
1220     nat n;
1221     for (n=0; bd != NULL; bd=bd->link) {
1222         n += bd->blocks;
1223         // hack for megablock groups: see (*1) above
1224         if (bd->blocks > BLOCKS_PER_MBLOCK) {
1225             n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1226                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1227         }
1228     }
1229     return n;
1230 }
1231
1232 static lnat
1233 stepBlocks (step *stp)
1234 {
1235     ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1236     ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1237     return stp->n_blocks + stp->n_old_blocks +
1238             countAllocdBlocks(stp->large_objects);
1239 }
1240
1241 // If memInventory() calculates that we have a memory leak, this
1242 // function will try to find the block(s) that are leaking by marking
1243 // all the ones that we know about, and search through memory to find
1244 // blocks that are not marked.  In the debugger this can help to give
1245 // us a clue about what kind of block leaked.  In the future we might
1246 // annotate blocks with their allocation site to give more helpful
1247 // info.
1248 static void
1249 findMemoryLeak (void)
1250 {
1251   nat g, s, i;
1252   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1253       for (i = 0; i < n_capabilities; i++) {
1254           markBlocks(capabilities[i].mut_lists[g]);
1255       }
1256       markBlocks(generations[g].mut_list);
1257       for (s = 0; s < generations[g].n_steps; s++) {
1258           markBlocks(generations[g].steps[s].blocks);
1259           markBlocks(generations[g].steps[s].large_objects);
1260       }
1261   }
1262
1263   for (i = 0; i < n_nurseries; i++) {
1264       markBlocks(nurseries[i].blocks);
1265       markBlocks(nurseries[i].large_objects);
1266   }
1267
1268 #ifdef PROFILING
1269   // TODO:
1270   // if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1271   //    markRetainerBlocks();
1272   // }
1273 #endif
1274
1275   // count the blocks allocated by the arena allocator
1276   // TODO:
1277   // markArenaBlocks();
1278
1279   // count the blocks containing executable memory
1280   markBlocks(exec_block);
1281
1282   reportUnmarkedBlocks();
1283 }
1284
1285
1286 void
1287 memInventory (rtsBool show)
1288 {
1289   nat g, s, i;
1290   step *stp;
1291   lnat gen_blocks[RtsFlags.GcFlags.generations];
1292   lnat nursery_blocks, retainer_blocks,
1293        arena_blocks, exec_blocks;
1294   lnat live_blocks = 0, free_blocks = 0;
1295   rtsBool leak;
1296
1297   // count the blocks we current have
1298
1299   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1300       gen_blocks[g] = 0;
1301       for (i = 0; i < n_capabilities; i++) {
1302           gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1303       }
1304       gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1305       for (s = 0; s < generations[g].n_steps; s++) {
1306           stp = &generations[g].steps[s];
1307           gen_blocks[g] += stepBlocks(stp);
1308       }
1309   }
1310
1311   nursery_blocks = 0;
1312   for (i = 0; i < n_nurseries; i++) {
1313       nursery_blocks += stepBlocks(&nurseries[i]);
1314   }
1315
1316   retainer_blocks = 0;
1317 #ifdef PROFILING
1318   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1319       retainer_blocks = retainerStackBlocks();
1320   }
1321 #endif
1322
1323   // count the blocks allocated by the arena allocator
1324   arena_blocks = arenaBlocks();
1325
1326   // count the blocks containing executable memory
1327   exec_blocks = countAllocdBlocks(exec_block);
1328
1329   /* count the blocks on the free list */
1330   free_blocks = countFreeList();
1331
1332   live_blocks = 0;
1333   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1334       live_blocks += gen_blocks[g];
1335   }
1336   live_blocks += nursery_blocks +
1337                + retainer_blocks + arena_blocks + exec_blocks;
1338
1339 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1340
1341   leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1342
1343   if (show || leak)
1344   {
1345       if (leak) {
1346           debugBelch("Memory leak detected:\n");
1347       } else {
1348           debugBelch("Memory inventory:\n");
1349       }
1350       for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1351           debugBelch("  gen %d blocks : %5lu blocks (%lu MB)\n", g,
1352                      gen_blocks[g], MB(gen_blocks[g]));
1353       }
1354       debugBelch("  nursery      : %5lu blocks (%lu MB)\n",
1355                  nursery_blocks, MB(nursery_blocks));
1356       debugBelch("  retainer     : %5lu blocks (%lu MB)\n",
1357                  retainer_blocks, MB(retainer_blocks));
1358       debugBelch("  arena blocks : %5lu blocks (%lu MB)\n",
1359                  arena_blocks, MB(arena_blocks));
1360       debugBelch("  exec         : %5lu blocks (%lu MB)\n",
1361                  exec_blocks, MB(exec_blocks));
1362       debugBelch("  free         : %5lu blocks (%lu MB)\n",
1363                  free_blocks, MB(free_blocks));
1364       debugBelch("  total        : %5lu blocks (%lu MB)\n",
1365                  live_blocks + free_blocks, MB(live_blocks+free_blocks));
1366       if (leak) {
1367           debugBelch("\n  in system    : %5lu blocks (%lu MB)\n",
1368                      mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1369       }
1370   }
1371
1372   if (leak) {
1373       debugBelch("\n");
1374       findMemoryLeak();
1375   }
1376   ASSERT(n_alloc_blocks == live_blocks);
1377   ASSERT(!leak);
1378 }
1379
1380
1381 /* Full heap sanity check. */
1382 void
1383 checkSanity( void )
1384 {
1385     nat g, s;
1386
1387     if (RtsFlags.GcFlags.generations == 1) {
1388         checkHeap(g0s0->blocks);
1389         checkLargeObjects(g0s0->large_objects);
1390     } else {
1391
1392         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1393             for (s = 0; s < generations[g].n_steps; s++) {
1394                 if (g == 0 && s == 0) { continue; }
1395                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1396                        == generations[g].steps[s].n_blocks);
1397                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1398                        == generations[g].steps[s].n_large_blocks);
1399                 checkHeap(generations[g].steps[s].blocks);
1400                 checkLargeObjects(generations[g].steps[s].large_objects);
1401             }
1402         }
1403
1404         for (s = 0; s < n_nurseries; s++) {
1405             ASSERT(countBlocks(nurseries[s].blocks)
1406                    == nurseries[s].n_blocks);
1407             ASSERT(countBlocks(nurseries[s].large_objects)
1408                    == nurseries[s].n_large_blocks);
1409         }
1410
1411         checkFreeListSanity();
1412     }
1413
1414 #if defined(THREADED_RTS)
1415     // check the stacks too in threaded mode, because we don't do a
1416     // full heap sanity check in this case (see checkHeap())
1417     checkMutableLists(rtsTrue);
1418 #else
1419     checkMutableLists(rtsFalse);
1420 #endif
1421 }
1422
1423 /* Nursery sanity check */
1424 void
1425 checkNurserySanity( step *stp )
1426 {
1427     bdescr *bd, *prev;
1428     nat blocks = 0;
1429
1430     prev = NULL;
1431     for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1432         ASSERT(bd->u.back == prev);
1433         prev = bd;
1434         blocks += bd->blocks;
1435     }
1436     ASSERT(blocks == stp->n_blocks);
1437 }
1438
1439 // handy function for use in gdb, because Bdescr() is inlined.
1440 extern bdescr *_bdescr( StgPtr p );
1441
1442 bdescr *
1443 _bdescr( StgPtr p )
1444 {
1445     return Bdescr(p);
1446 }
1447
1448 #endif