rts/sm/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2008
   4  *
   5  * Storage manager front end
   6  *
   7  * Documentation on the architecture of the Storage Manager can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsUtils.h"
  17 #include "RtsFlags.h"
  18 #include "Stats.h"
  19 #include "Hooks.h"
  20 #include "BlockAlloc.h"
  21 #include "MBlock.h"
  22 #include "Weak.h"
  23 #include "Sanity.h"
  24 #include "Arena.h"
  25 #include "OSThreads.h"
  26 #include "Capability.h"
  27 #include "Storage.h"
  28 #include "Schedule.h"
  29 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  30 #include "OSMem.h"
  31 #include "Trace.h"
  32 #include "GC.h"
  33 #include "Evac.h"
  34
  35 #include <stdlib.h>
  36 #include <string.h>
  37
  38 #include "ffi.h"
  39
  40 /*
  41  * All these globals require sm_mutex to access in THREADED_RTS mode.
  42  */
  43 StgClosure    *caf_list         = NULL;
  44 StgClosure    *revertible_caf_list = NULL;
  45 rtsBool       keepCAFs;
  46
  47 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  48 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  49 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  50
  51 static bdescr *exec_block;
  52
  53 generation *generations = NULL; /* all the generations */
  54 generation *g0          = NULL; /* generation 0, for convenience */
  55 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  56 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  57
  58 nat total_steps         = 0;
  59 step *all_steps         = NULL; /* single array of steps */
  60
  61 ullong total_allocated = 0;     /* total memory allocated during run */
  62
  63 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  64 step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
  65
  66 #ifdef THREADED_RTS
  67 /*
  68  * Storage manager mutex:  protects all the above state from
  69  * simultaneous access by two STG threads.
  70  */
  71 Mutex sm_mutex;
  72 #endif
  73
  74
  75 /*
  76  * Forward references
  77  */
  78 static void *stgAllocForGMP   (size_t size_in_bytes);
  79 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  80 static void  stgDeallocForGMP (void *ptr, size_t size);
  81
  82 static void
  83 initStep (step *stp, int g, int s)
  84 {
  85     stp->no = s;
  86     stp->abs_no = RtsFlags.GcFlags.steps * g + s;
  87     stp->blocks = NULL;
  88     stp->n_blocks = 0;
  89     stp->n_words = 0;
  90     stp->live_estimate = 0;
  91     stp->old_blocks = NULL;
  92     stp->n_old_blocks = 0;
  93     stp->gen = &generations[g];
  94     stp->gen_no = g;
  95     stp->large_objects = NULL;
  96     stp->n_large_blocks = 0;
  97     stp->scavenged_large_objects = NULL;
  98     stp->n_scavenged_large_blocks = 0;
  99     stp->mark = 0;
 100     stp->compact = 0;
 101     stp->bitmap = NULL;
 102 #ifdef THREADED_RTS
 103     initSpinLock(&stp->sync_large_objects);
 104 #endif
 105     stp->threads = END_TSO_QUEUE;
 106     stp->old_threads = END_TSO_QUEUE;
 107 }
 108
 109 void
 110 initStorage( void )
 111 {
 112   nat g, s;
 113   generation *gen;
 114
 115   if (generations != NULL) {
 116       // multi-init protection
 117       return;
 118   }
 119
 120   initMBlocks();
 121
 122   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 123    * doing something reasonable.
 124    */
 125   /* We use the NOT_NULL variant or gcc warns that the test is always true */
 126   ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLACKHOLE_info));
 127   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 128   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 129
 130   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 131       RtsFlags.GcFlags.heapSizeSuggestion >
 132       RtsFlags.GcFlags.maxHeapSize) {
 133     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 134   }
 135
 136   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 137       RtsFlags.GcFlags.minAllocAreaSize >
 138       RtsFlags.GcFlags.maxHeapSize) {
 139       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 140       RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
 141   }
 142
 143   initBlockAllocator();
 144
 145 #if defined(THREADED_RTS)
 146   initMutex(&sm_mutex);
 147 #endif
 148
 149   ACQUIRE_SM_LOCK;
 150
 151   /* allocate generation info array */
 152   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 153                                              * sizeof(struct generation_),
 154                                              "initStorage: gens");
 155
 156   /* allocate all the steps into an array.  It is important that we do
 157      it this way, because we need the invariant that two step pointers
 158      can be directly compared to see which is the oldest.
 159      Remember that the last generation has only one step. */
 160   total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
 161   all_steps   = stgMallocBytes(total_steps * sizeof(struct step_),
 162                                "initStorage: steps");
 163
 164   /* Initialise all generations */
 165   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 166     gen = &generations[g];
 167     gen->no = g;
 168     gen->mut_list = allocBlock();
 169     gen->collections = 0;
 170     gen->par_collections = 0;
 171     gen->failed_promotions = 0;
 172     gen->max_blocks = 0;
 173   }
 174
 175   /* A couple of convenience pointers */
 176   g0 = &generations[0];
 177   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 178
 179   /* Allocate step structures in each generation */
 180   if (RtsFlags.GcFlags.generations > 1) {
 181     /* Only for multiple-generations */
 182
 183     /* Oldest generation: one step */
 184     oldest_gen->n_steps = 1;
 185     oldest_gen->steps   = all_steps + (RtsFlags.GcFlags.generations - 1)
 186                                       * RtsFlags.GcFlags.steps;
 187
 188     /* set up all except the oldest generation with 2 steps */
 189     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 190       generations[g].n_steps = RtsFlags.GcFlags.steps;
 191       generations[g].steps   = all_steps + g * RtsFlags.GcFlags.steps;
 192     }
 193
 194   } else {
 195     /* single generation, i.e. a two-space collector */
 196     g0->n_steps = 1;
 197     g0->steps   = all_steps;
 198   }
 199
 200 #ifdef THREADED_RTS
 201   n_nurseries = n_capabilities;
 202 #else
 203   n_nurseries = 1;
 204 #endif
 205   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 206                               "initStorage: nurseries");
 207
 208   /* Initialise all steps */
 209   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 210     for (s = 0; s < generations[g].n_steps; s++) {
 211         initStep(&generations[g].steps[s], g, s);
 212     }
 213   }
 214
 215   for (s = 0; s < n_nurseries; s++) {
 216       initStep(&nurseries[s], 0, s);
 217   }
 218
 219   /* Set up the destination pointers in each younger gen. step */
 220   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 221     for (s = 0; s < generations[g].n_steps-1; s++) {
 222       generations[g].steps[s].to = &generations[g].steps[s+1];
 223     }
 224     generations[g].steps[s].to = &generations[g+1].steps[0];
 225   }
 226   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 227
 228   for (s = 0; s < n_nurseries; s++) {
 229       nurseries[s].to = generations[0].steps[0].to;
 230   }
 231
 232   /* The oldest generation has one step. */
 233   if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
 234       if (RtsFlags.GcFlags.generations == 1) {
 235           errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
 236       } else {
 237           oldest_gen->steps[0].mark = 1;
 238           if (RtsFlags.GcFlags.compact)
 239               oldest_gen->steps[0].compact = 1;
 240       }
 241   }
 242
 243   generations[0].max_blocks = 0;
 244   g0s0 = &generations[0].steps[0];
 245
 246   /* The allocation area.  Policy: keep the allocation area
 247    * small to begin with, even if we have a large suggested heap
 248    * size.  Reason: we're going to do a major collection first, and we
 249    * don't want it to be a big one.  This vague idea is borne out by
 250    * rigorous experimental evidence.
 251    */
 252   allocNurseries();
 253
 254   weak_ptr_list = NULL;
 255   caf_list = NULL;
 256   revertible_caf_list = NULL;
 257
 258   /* initialise the allocate() interface */
 259   alloc_blocks = 0;
 260   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 261
 262   exec_block = NULL;
 263
 264   /* Tell GNU multi-precision pkg about our custom alloc functions */
 265   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 266
 267 #ifdef THREADED_RTS
 268   initSpinLock(&gc_alloc_block_sync);
 269   whitehole_spin = 0;
 270 #endif
 271
 272   N = 0;
 273
 274   initGcThreads();
 275
 276   IF_DEBUG(gc, statDescribeGens());
 277
 278   RELEASE_SM_LOCK;
 279 }
 280
 281 void
 282 exitStorage (void)
 283 {
 284     stat_exit(calcAllocated());
 285 }
 286
 287 void
 288 freeStorage (void)
 289 {
 290     stgFree(g0s0); // frees all the steps
 291     stgFree(generations);
 292     freeAllMBlocks();
 293 #if defined(THREADED_RTS)
 294     closeMutex(&sm_mutex);
 295 #endif
 296     stgFree(nurseries);
 297 }
 298
 299 /* -----------------------------------------------------------------------------
 300    CAF management.
 301
 302    The entry code for every CAF does the following:
 303
 304       - builds a CAF_BLACKHOLE in the heap
 305       - pushes an update frame pointing to the CAF_BLACKHOLE
 306       - invokes UPD_CAF(), which:
 307           - calls newCaf, below
 308           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 309
 310    Why do we build a BLACKHOLE in the heap rather than just updating
 311    the thunk directly?  It's so that we only need one kind of update
 312    frame - otherwise we'd need a static version of the update frame too.
 313
 314    newCaf() does the following:
 315
 316       - it puts the CAF on the oldest generation's mut-once list.
 317         This is so that we can treat the CAF as a root when collecting
 318         younger generations.
 319
 320    For GHCI, we have additional requirements when dealing with CAFs:
 321
 322       - we must *retain* all dynamically-loaded CAFs ever entered,
 323         just in case we need them again.
 324       - we must be able to *revert* CAFs that have been evaluated, to
 325         their pre-evaluated form.
 326
 327       To do this, we use an additional CAF list.  When newCaf() is
 328       called on a dynamically-loaded CAF, we add it to the CAF list
 329       instead of the old-generation mutable list, and save away its
 330       old info pointer (in caf->saved_info) for later reversion.
 331
 332       To revert all the CAFs, we traverse the CAF list and reset the
 333       info pointer to caf->saved_info, then throw away the CAF list.
 334       (see GC.c:revertCAFs()).
 335
 336       -- SDM 29/1/01
 337
 338    -------------------------------------------------------------------------- */
 339
 340 void
 341 newCAF(StgClosure* caf)
 342 {
 343   ACQUIRE_SM_LOCK;
 344
 345   if(keepCAFs)
 346   {
 347     // HACK:
 348     // If we are in GHCi _and_ we are using dynamic libraries,
 349     // then we can't redirect newCAF calls to newDynCAF (see below),
 350     // so we make newCAF behave almost like newDynCAF.
 351     // The dynamic libraries might be used by both the interpreted
 352     // program and GHCi itself, so they must not be reverted.
 353     // This also means that in GHCi with dynamic libraries, CAFs are not
 354     // garbage collected. If this turns out to be a problem, we could
 355     // do another hack here and do an address range test on caf to figure
 356     // out whether it is from a dynamic library.
 357     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 358     ((StgIndStatic *)caf)->static_link = caf_list;
 359     caf_list = caf;
 360   }
 361   else
 362   {
 363     /* Put this CAF on the mutable list for the old generation.
 364     * This is a HACK - the IND_STATIC closure doesn't really have
 365     * a mut_link field, but we pretend it has - in fact we re-use
 366     * the STATIC_LINK field for the time being, because when we
 367     * come to do a major GC we won't need the mut_link field
 368     * any more and can use it as a STATIC_LINK.
 369     */
 370     ((StgIndStatic *)caf)->saved_info = NULL;
 371     recordMutableGen(caf, oldest_gen->no);
 372   }
 373
 374   RELEASE_SM_LOCK;
 375 }
 376
 377 // An alternate version of newCaf which is used for dynamically loaded
 378 // object code in GHCi.  In this case we want to retain *all* CAFs in
 379 // the object code, because they might be demanded at any time from an
 380 // expression evaluated on the command line.
 381 // Also, GHCi might want to revert CAFs, so we add these to the
 382 // revertible_caf_list.
 383 //
 384 // The linker hackily arranges that references to newCaf from dynamic
 385 // code end up pointing to newDynCAF.
 386 void
 387 newDynCAF(StgClosure *caf)
 388 {
 389     ACQUIRE_SM_LOCK;
 390
 391     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 392     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 393     revertible_caf_list = caf;
 394
 395     RELEASE_SM_LOCK;
 396 }
 397
 398 /* -----------------------------------------------------------------------------
 399    Nursery management.
 400    -------------------------------------------------------------------------- */
 401
 402 static bdescr *
 403 allocNursery (step *stp, bdescr *tail, nat blocks)
 404 {
 405     bdescr *bd;
 406     nat i;
 407
 408     // Allocate a nursery: we allocate fresh blocks one at a time and
 409     // cons them on to the front of the list, not forgetting to update
 410     // the back pointer on the tail of the list to point to the new block.
 411     for (i=0; i < blocks; i++) {
 412         // @LDV profiling
 413         /*
 414           processNursery() in LdvProfile.c assumes that every block group in
 415           the nursery contains only a single block. So, if a block group is
 416           given multiple blocks, change processNursery() accordingly.
 417         */
 418         bd = allocBlock();
 419         bd->link = tail;
 420         // double-link the nursery: we might need to insert blocks
 421         if (tail != NULL) {
 422             tail->u.back = bd;
 423         }
 424         bd->step = stp;
 425         bd->gen_no = 0;
 426         bd->flags = 0;
 427         bd->free = bd->start;
 428         tail = bd;
 429     }
 430     tail->u.back = NULL;
 431     return tail;
 432 }
 433
 434 static void
 435 assignNurseriesToCapabilities (void)
 436 {
 437 #ifdef THREADED_RTS
 438     nat i;
 439
 440     for (i = 0; i < n_nurseries; i++) {
 441         capabilities[i].r.rNursery        = &nurseries[i];
 442         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 443         capabilities[i].r.rCurrentAlloc   = NULL;
 444     }
 445 #else /* THREADED_RTS */
 446     MainCapability.r.rNursery        = &nurseries[0];
 447     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 448     MainCapability.r.rCurrentAlloc   = NULL;
 449 #endif
 450 }
 451
 452 void
 453 allocNurseries( void )
 454 {
 455     nat i;
 456
 457     for (i = 0; i < n_nurseries; i++) {
 458         nurseries[i].blocks =
 459             allocNursery(&nurseries[i], NULL,
 460                          RtsFlags.GcFlags.minAllocAreaSize);
 461         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 462         nurseries[i].old_blocks   = NULL;
 463         nurseries[i].n_old_blocks = 0;
 464     }
 465     assignNurseriesToCapabilities();
 466 }
 467
 468 void
 469 resetNurseries( void )
 470 {
 471     nat i;
 472     bdescr *bd;
 473     step *stp;
 474
 475     for (i = 0; i < n_nurseries; i++) {
 476         stp = &nurseries[i];
 477         for (bd = stp->blocks; bd; bd = bd->link) {
 478             bd->free = bd->start;
 479             ASSERT(bd->gen_no == 0);
 480             ASSERT(bd->step == stp);
 481             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 482         }
 483     }
 484     assignNurseriesToCapabilities();
 485 }
 486
 487 lnat
 488 countNurseryBlocks (void)
 489 {
 490     nat i;
 491     lnat blocks = 0;
 492
 493     for (i = 0; i < n_nurseries; i++) {
 494         blocks += nurseries[i].n_blocks;
 495     }
 496     return blocks;
 497 }
 498
 499 static void
 500 resizeNursery ( step *stp, nat blocks )
 501 {
 502   bdescr *bd;
 503   nat nursery_blocks;
 504
 505   nursery_blocks = stp->n_blocks;
 506   if (nursery_blocks == blocks) return;
 507
 508   if (nursery_blocks < blocks) {
 509       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
 510                  blocks);
 511     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 512   }
 513   else {
 514     bdescr *next_bd;
 515
 516     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
 517                blocks);
 518
 519     bd = stp->blocks;
 520     while (nursery_blocks > blocks) {
 521         next_bd = bd->link;
 522         next_bd->u.back = NULL;
 523         nursery_blocks -= bd->blocks; // might be a large block
 524         freeGroup(bd);
 525         bd = next_bd;
 526     }
 527     stp->blocks = bd;
 528     // might have gone just under, by freeing a large block, so make
 529     // up the difference.
 530     if (nursery_blocks < blocks) {
 531         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 532     }
 533   }
 534
 535   stp->n_blocks = blocks;
 536   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 537 }
 538
 539 //
 540 // Resize each of the nurseries to the specified size.
 541 //
 542 void
 543 resizeNurseriesFixed (nat blocks)
 544 {
 545     nat i;
 546     for (i = 0; i < n_nurseries; i++) {
 547         resizeNursery(&nurseries[i], blocks);
 548     }
 549 }
 550
 551 //
 552 // Resize the nurseries to the total specified size.
 553 //
 554 void
 555 resizeNurseries (nat blocks)
 556 {
 557     // If there are multiple nurseries, then we just divide the number
 558     // of available blocks between them.
 559     resizeNurseriesFixed(blocks / n_nurseries);
 560 }
 561
 562
 563 /* -----------------------------------------------------------------------------
 564    move_TSO is called to update the TSO structure after it has been
 565    moved from one place to another.
 566    -------------------------------------------------------------------------- */
 567
 568 void
 569 move_TSO (StgTSO *src, StgTSO *dest)
 570 {
 571     ptrdiff_t diff;
 572
 573     // relocate the stack pointer...
 574     diff = (StgPtr)dest - (StgPtr)src; // In *words*
 575     dest->sp = (StgPtr)dest->sp + diff;
 576 }
 577
 578 /* -----------------------------------------------------------------------------
 579    The allocate() interface
 580
 581    allocateInGen() function allocates memory directly into a specific
 582    generation.  It always succeeds, and returns a chunk of memory n
 583    words long.  n can be larger than the size of a block if necessary,
 584    in which case a contiguous block group will be allocated.
 585
 586    allocate(n) is equivalent to allocateInGen(g0).
 587    -------------------------------------------------------------------------- */
 588
 589 StgPtr
 590 allocateInGen (generation *g, lnat n)
 591 {
 592     step *stp;
 593     bdescr *bd;
 594     StgPtr ret;
 595
 596     ACQUIRE_SM_LOCK;
 597
 598     TICK_ALLOC_HEAP_NOCTR(n);
 599     CCS_ALLOC(CCCS,n);
 600
 601     stp = &g->steps[0];
 602
 603     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
 604     {
 605         lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 606
 607         // Attempting to allocate an object larger than maxHeapSize
 608         // should definitely be disallowed.  (bug #1791)
 609         if (RtsFlags.GcFlags.maxHeapSize > 0 &&
 610             req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
 611             heapOverflow();
 612             // heapOverflow() doesn't exit (see #2592), but we aren't
 613             // in a position to do a clean shutdown here: we
 614             // either have to allocate the memory or exit now.
 615             // Allocating the memory would be bad, because the user
 616             // has requested that we not exceed maxHeapSize, so we
 617             // just exit.
 618             stg_exit(EXIT_HEAPOVERFLOW);
 619         }
 620
 621         bd = allocGroup(req_blocks);
 622         dbl_link_onto(bd, &stp->large_objects);
 623         stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
 624         alloc_blocks += bd->blocks;
 625         bd->gen_no  = g->no;
 626         bd->step = stp;
 627         bd->flags = BF_LARGE;
 628         bd->free = bd->start + n;
 629         ret = bd->start;
 630     }
 631     else
 632     {
 633         // small allocation (<LARGE_OBJECT_THRESHOLD) */
 634         bd = stp->blocks;
 635         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 636             bd = allocBlock();
 637             bd->gen_no = g->no;
 638             bd->step = stp;
 639             bd->flags = 0;
 640             bd->link = stp->blocks;
 641             stp->blocks = bd;
 642             stp->n_blocks++;
 643             alloc_blocks++;
 644         }
 645         ret = bd->free;
 646         bd->free += n;
 647     }
 648
 649     RELEASE_SM_LOCK;
 650
 651     return ret;
 652 }
 653
 654 StgPtr
 655 allocate (lnat n)
 656 {
 657     return allocateInGen(g0,n);
 658 }
 659
 660 lnat
 661 allocatedBytes( void )
 662 {
 663     lnat allocated;
 664
 665     allocated = alloc_blocks * BLOCK_SIZE_W;
 666     if (pinned_object_block != NULL) {
 667         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 668             pinned_object_block->free;
 669     }
 670
 671     return allocated;
 672 }
 673
 674 // split N blocks off the front of the given bdescr, returning the
 675 // new block group.  We treat the remainder as if it
 676 // had been freshly allocated in generation 0.
 677 bdescr *
 678 splitLargeBlock (bdescr *bd, nat blocks)
 679 {
 680     bdescr *new_bd;
 681
 682     // subtract the original number of blocks from the counter first
 683     bd->step->n_large_blocks -= bd->blocks;
 684
 685     new_bd = splitBlockGroup (bd, blocks);
 686
 687     dbl_link_onto(new_bd, &g0s0->large_objects);
 688     g0s0->n_large_blocks += new_bd->blocks;
 689     new_bd->gen_no  = g0s0->no;
 690     new_bd->step    = g0s0;
 691     new_bd->flags   = BF_LARGE;
 692     new_bd->free    = bd->free;
 693     ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
 694
 695     // add the new number of blocks to the counter.  Due to the gaps
 696     // for block descriptor, new_bd->blocks + bd->blocks might not be
 697     // equal to the original bd->blocks, which is why we do it this way.
 698     bd->step->n_large_blocks += bd->blocks;
 699
 700     return new_bd;
 701 }
 702
 703 /* -----------------------------------------------------------------------------
 704    allocateLocal()
 705
 706    This allocates memory in the current thread - it is intended for
 707    use primarily from STG-land where we have a Capability.  It is
 708    better than allocate() because it doesn't require taking the
 709    sm_mutex lock in the common case.
 710
 711    Memory is allocated directly from the nursery if possible (but not
 712    from the current nursery block, so as not to interfere with
 713    Hp/HpLim).
 714    -------------------------------------------------------------------------- */
 715
 716 StgPtr
 717 allocateLocal (Capability *cap, lnat n)
 718 {
 719     bdescr *bd;
 720     StgPtr p;
 721
 722     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 723         return allocateInGen(g0,n);
 724     }
 725
 726     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 727
 728     TICK_ALLOC_HEAP_NOCTR(n);
 729     CCS_ALLOC(CCCS,n);
 730
 731     bd = cap->r.rCurrentAlloc;
 732     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 733
 734         // The CurrentAlloc block is full, we need to find another
 735         // one.  First, we try taking the next block from the
 736         // nursery:
 737         bd = cap->r.rCurrentNursery->link;
 738
 739         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 740             // The nursery is empty, or the next block is already
 741             // full: allocate a fresh block (we can't fail here).
 742             ACQUIRE_SM_LOCK;
 743             bd = allocBlock();
 744             cap->r.rNursery->n_blocks++;
 745             RELEASE_SM_LOCK;
 746             bd->gen_no = 0;
 747             bd->step = cap->r.rNursery;
 748             bd->flags = 0;
 749             // NO: alloc_blocks++;
 750             // calcAllocated() uses the size of the nursery, and we've
 751             // already bumpted nursery->n_blocks above.  We'll GC
 752             // pretty quickly now anyway, because MAYBE_GC() will
 753             // notice that CurrentNursery->link is NULL.
 754         } else {
 755             // we have a block in the nursery: take it and put
 756             // it at the *front* of the nursery list, and use it
 757             // to allocate() from.
 758             cap->r.rCurrentNursery->link = bd->link;
 759             if (bd->link != NULL) {
 760                 bd->link->u.back = cap->r.rCurrentNursery;
 761             }
 762         }
 763         dbl_link_onto(bd, &cap->r.rNursery->blocks);
 764         cap->r.rCurrentAlloc = bd;
 765         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 766     }
 767     p = bd->free;
 768     bd->free += n;
 769     return p;
 770 }
 771
 772 /* ---------------------------------------------------------------------------
 773    Allocate a fixed/pinned object.
 774
 775    We allocate small pinned objects into a single block, allocating a
 776    new block when the current one overflows.  The block is chained
 777    onto the large_object_list of generation 0 step 0.
 778
 779    NOTE: The GC can't in general handle pinned objects.  This
 780    interface is only safe to use for ByteArrays, which have no
 781    pointers and don't require scavenging.  It works because the
 782    block's descriptor has the BF_LARGE flag set, so the block is
 783    treated as a large object and chained onto various lists, rather
 784    than the individual objects being copied.  However, when it comes
 785    to scavenge the block, the GC will only scavenge the first object.
 786    The reason is that the GC can't linearly scan a block of pinned
 787    objects at the moment (doing so would require using the
 788    mostly-copying techniques).  But since we're restricting ourselves
 789    to pinned ByteArrays, not scavenging is ok.
 790
 791    This function is called by newPinnedByteArray# which immediately
 792    fills the allocated memory with a MutableByteArray#.
 793    ------------------------------------------------------------------------- */
 794
 795 StgPtr
 796 allocatePinned( lnat n )
 797 {
 798     StgPtr p;
 799     bdescr *bd = pinned_object_block;
 800
 801     // If the request is for a large object, then allocate()
 802     // will give us a pinned object anyway.
 803     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 804         p = allocate(n);
 805         Bdescr(p)->flags |= BF_PINNED;
 806         return p;
 807     }
 808
 809     ACQUIRE_SM_LOCK;
 810
 811     TICK_ALLOC_HEAP_NOCTR(n);
 812     CCS_ALLOC(CCCS,n);
 813
 814     // If we don't have a block of pinned objects yet, or the current
 815     // one isn't large enough to hold the new object, allocate a new one.
 816     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 817         pinned_object_block = bd = allocBlock();
 818         dbl_link_onto(bd, &g0s0->large_objects);
 819         g0s0->n_large_blocks++;
 820         bd->gen_no = 0;
 821         bd->step   = g0s0;
 822         bd->flags  = BF_PINNED | BF_LARGE;
 823         bd->free   = bd->start;
 824         alloc_blocks++;
 825     }
 826
 827     p = bd->free;
 828     bd->free += n;
 829     RELEASE_SM_LOCK;
 830     return p;
 831 }
 832
 833 /* -----------------------------------------------------------------------------
 834    Write Barriers
 835    -------------------------------------------------------------------------- */
 836
 837 /*
 838    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 839    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 840    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 841    and is put on the mutable list.
 842 */
 843 void
 844 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 845 {
 846     Capability *cap = regTableToCapability(reg);
 847     bdescr *bd;
 848     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 849         p->header.info = &stg_MUT_VAR_DIRTY_info;
 850         bd = Bdescr((StgPtr)p);
 851         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 852     }
 853 }
 854
 855 // Setting a TSO's link field with a write barrier.
 856 // It is *not* necessary to call this function when
 857 //    * setting the link field to END_TSO_QUEUE
 858 //    * putting a TSO on the blackhole_queue
 859 //    * setting the link field of the currently running TSO, as it
 860 //      will already be dirty.
 861 void
 862 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
 863 {
 864     bdescr *bd;
 865     if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
 866         tso->flags |= TSO_LINK_DIRTY;
 867         bd = Bdescr((StgPtr)tso);
 868         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 869     }
 870     tso->_link = target;
 871 }
 872
 873 void
 874 dirty_TSO (Capability *cap, StgTSO *tso)
 875 {
 876     bdescr *bd;
 877     if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
 878         bd = Bdescr((StgPtr)tso);
 879         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 880     }
 881     tso->flags |= TSO_DIRTY;
 882 }
 883
 884 /*
 885    This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
 886    on the mutable list; a MVAR_DIRTY is.  When written to, a
 887    MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
 888    The check for MVAR_CLEAN is inlined at the call site for speed,
 889    this really does make a difference on concurrency-heavy benchmarks
 890    such as Chaneneos and cheap-concurrency.
 891 */
 892 void
 893 dirty_MVAR(StgRegTable *reg, StgClosure *p)
 894 {
 895     Capability *cap = regTableToCapability(reg);
 896     bdescr *bd;
 897     bd = Bdescr((StgPtr)p);
 898     if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 899 }
 900
 901 /* -----------------------------------------------------------------------------
 902    Allocation functions for GMP.
 903
 904    These all use the allocate() interface - we can't have any garbage
 905    collection going on during a gmp operation, so we use allocate()
 906    which always succeeds.  The gmp operations which might need to
 907    allocate will ask the storage manager (via doYouWantToGC()) whether
 908    a garbage collection is required, in case we get into a loop doing
 909    only allocate() style allocation.
 910    -------------------------------------------------------------------------- */
 911
 912 static void *
 913 stgAllocForGMP (size_t size_in_bytes)
 914 {
 915   StgArrWords* arr;
 916   nat data_size_in_words, total_size_in_words;
 917
 918   /* round up to a whole number of words */
 919   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 920   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 921
 922   /* allocate and fill it in. */
 923 #if defined(THREADED_RTS)
 924   arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
 925 #else
 926   arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
 927 #endif
 928   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 929
 930   /* and return a ptr to the goods inside the array */
 931   return arr->payload;
 932 }
 933
 934 static void *
 935 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 936 {
 937     size_t min_size;
 938     void *new_stuff_ptr = stgAllocForGMP(new_size);
 939     nat i = 0;
 940     char *p = (char *) ptr;
 941     char *q = (char *) new_stuff_ptr;
 942
 943     min_size = old_size < new_size ? old_size : new_size;
 944     for (; i < min_size; i++, p++, q++) {
 945         *q = *p;
 946     }
 947
 948     return(new_stuff_ptr);
 949 }
 950
 951 static void
 952 stgDeallocForGMP (void *ptr STG_UNUSED,
 953                   size_t size STG_UNUSED)
 954 {
 955     /* easy for us: the garbage collector does the dealloc'n */
 956 }
 957
 958 /* -----------------------------------------------------------------------------
 959  * Stats and stuff
 960  * -------------------------------------------------------------------------- */
 961
 962 /* -----------------------------------------------------------------------------
 963  * calcAllocated()
 964  *
 965  * Approximate how much we've allocated: number of blocks in the
 966  * nursery + blocks allocated via allocate() - unused nusery blocks.
 967  * This leaves a little slop at the end of each block, and doesn't
 968  * take into account large objects (ToDo).
 969  * -------------------------------------------------------------------------- */
 970
 971 lnat
 972 calcAllocated( void )
 973 {
 974   nat allocated;
 975   bdescr *bd;
 976
 977   allocated = allocatedBytes();
 978   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 979
 980   {
 981 #ifdef THREADED_RTS
 982   nat i;
 983   for (i = 0; i < n_nurseries; i++) {
 984       Capability *cap;
 985       for ( bd = capabilities[i].r.rCurrentNursery->link;
 986             bd != NULL; bd = bd->link ) {
 987           allocated -= BLOCK_SIZE_W;
 988       }
 989       cap = &capabilities[i];
 990       if (cap->r.rCurrentNursery->free <
 991           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 992           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 993               - cap->r.rCurrentNursery->free;
 994       }
 995   }
 996 #else
 997   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 998
 999   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
1000       allocated -= BLOCK_SIZE_W;
1001   }
1002   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
1003       allocated -= (current_nursery->start + BLOCK_SIZE_W)
1004           - current_nursery->free;
1005   }
1006 #endif
1007   }
1008
1009   total_allocated += allocated;
1010   return allocated;
1011 }
1012
1013 /* Approximate the amount of live data in the heap.  To be called just
1014  * after garbage collection (see GarbageCollect()).
1015  */
1016 lnat
1017 calcLiveBlocks(void)
1018 {
1019   nat g, s;
1020   lnat live = 0;
1021   step *stp;
1022
1023   if (RtsFlags.GcFlags.generations == 1) {
1024       return g0s0->n_large_blocks + g0s0->n_blocks;
1025   }
1026
1027   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1028     for (s = 0; s < generations[g].n_steps; s++) {
1029       /* approximate amount of live data (doesn't take into account slop
1030        * at end of each block).
1031        */
1032       if (g == 0 && s == 0) {
1033           continue;
1034       }
1035       stp = &generations[g].steps[s];
1036       live += stp->n_large_blocks + stp->n_blocks;
1037     }
1038   }
1039   return live;
1040 }
1041
1042 lnat
1043 countOccupied(bdescr *bd)
1044 {
1045     lnat words;
1046
1047     words = 0;
1048     for (; bd != NULL; bd = bd->link) {
1049         ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1050         words += bd->free - bd->start;
1051     }
1052     return words;
1053 }
1054
1055 // Return an accurate count of the live data in the heap, excluding
1056 // generation 0.
1057 lnat
1058 calcLiveWords(void)
1059 {
1060     nat g, s;
1061     lnat live;
1062     step *stp;
1063
1064     if (RtsFlags.GcFlags.generations == 1) {
1065         return g0s0->n_words + countOccupied(g0s0->large_objects);
1066     }
1067
1068     live = 0;
1069     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1070         for (s = 0; s < generations[g].n_steps; s++) {
1071             if (g == 0 && s == 0) continue;
1072             stp = &generations[g].steps[s];
1073             live += stp->n_words + countOccupied(stp->large_objects);
1074         }
1075     }
1076     return live;
1077 }
1078
1079 /* Approximate the number of blocks that will be needed at the next
1080  * garbage collection.
1081  *
1082  * Assume: all data currently live will remain live.  Steps that will
1083  * be collected next time will therefore need twice as many blocks
1084  * since all the data will be copied.
1085  */
1086 extern lnat
1087 calcNeeded(void)
1088 {
1089     lnat needed = 0;
1090     nat g, s;
1091     step *stp;
1092
1093     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1094         for (s = 0; s < generations[g].n_steps; s++) {
1095             if (g == 0 && s == 0) { continue; }
1096             stp = &generations[g].steps[s];
1097
1098             // we need at least this much space
1099             needed += stp->n_blocks + stp->n_large_blocks;
1100
1101             // any additional space needed to collect this gen next time?
1102             if (g == 0 || // always collect gen 0
1103                 (generations[g].steps[0].n_blocks +
1104                  generations[g].steps[0].n_large_blocks
1105                  > generations[g].max_blocks)) {
1106                 // we will collect this gen next time
1107                 if (stp->mark) {
1108                     //  bitmap:
1109                     needed += stp->n_blocks / BITS_IN(W_);
1110                     //  mark stack:
1111                     needed += stp->n_blocks / 100;
1112                 }
1113                 if (stp->compact) {
1114                     continue; // no additional space needed for compaction
1115                 } else {
1116                     needed += stp->n_blocks;
1117                 }
1118             }
1119         }
1120     }
1121     return needed;
1122 }
1123
1124 /* ----------------------------------------------------------------------------
1125    Executable memory
1126
1127    Executable memory must be managed separately from non-executable
1128    memory.  Most OSs these days require you to jump through hoops to
1129    dynamically allocate executable memory, due to various security
1130    measures.
1131
1132    Here we provide a small memory allocator for executable memory.
1133    Memory is managed with a page granularity; we allocate linearly
1134    in the page, and when the page is emptied (all objects on the page
1135    are free) we free the page again, not forgetting to make it
1136    non-executable.
1137
1138    TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1139          the linker cannot use allocateExec for loading object code files
1140          on Windows. Once allocateExec can handle larger objects, the linker
1141          should be modified to use allocateExec instead of VirtualAlloc.
1142    ------------------------------------------------------------------------- */
1143
1144 #if defined(linux_HOST_OS)
1145
1146 // On Linux we need to use libffi for allocating executable memory,
1147 // because it knows how to work around the restrictions put in place
1148 // by SELinux.
1149
1150 void *allocateExec (nat bytes, void **exec_ret)
1151 {
1152     void **ret, **exec;
1153     ACQUIRE_SM_LOCK;
1154     ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1155     RELEASE_SM_LOCK;
1156     if (ret == NULL) return ret;
1157     *ret = ret; // save the address of the writable mapping, for freeExec().
1158     *exec_ret = exec + 1;
1159     return (ret + 1);
1160 }
1161
1162 // freeExec gets passed the executable address, not the writable address.
1163 void freeExec (void *addr)
1164 {
1165     void *writable;
1166     writable = *((void**)addr - 1);
1167     ACQUIRE_SM_LOCK;
1168     ffi_closure_free (writable);
1169     RELEASE_SM_LOCK
1170 }
1171
1172 #else
1173
1174 void *allocateExec (nat bytes, void **exec_ret)
1175 {
1176     void *ret;
1177     nat n;
1178
1179     ACQUIRE_SM_LOCK;
1180
1181     // round up to words.
1182     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
1183
1184     if (n+1 > BLOCK_SIZE_W) {
1185         barf("allocateExec: can't handle large objects");
1186     }
1187
1188     if (exec_block == NULL ||
1189         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1190         bdescr *bd;
1191         lnat pagesize = getPageSize();
1192         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1193         debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1194         bd->gen_no = 0;
1195         bd->flags = BF_EXEC;
1196         bd->link = exec_block;
1197         if (exec_block != NULL) {
1198             exec_block->u.back = bd;
1199         }
1200         bd->u.back = NULL;
1201         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1202         exec_block = bd;
1203     }
1204     *(exec_block->free) = n;  // store the size of this chunk
1205     exec_block->gen_no += n;  // gen_no stores the number of words allocated
1206     ret = exec_block->free + 1;
1207     exec_block->free += n + 1;
1208
1209     RELEASE_SM_LOCK
1210     *exec_ret = ret;
1211     return ret;
1212 }
1213
1214 void freeExec (void *addr)
1215 {
1216     StgPtr p = (StgPtr)addr - 1;
1217     bdescr *bd = Bdescr((StgPtr)p);
1218
1219     if ((bd->flags & BF_EXEC) == 0) {
1220         barf("freeExec: not executable");
1221     }
1222
1223     if (*(StgPtr)p == 0) {
1224         barf("freeExec: already free?");
1225     }
1226
1227     ACQUIRE_SM_LOCK;
1228
1229     bd->gen_no -= *(StgPtr)p;
1230     *(StgPtr)p = 0;
1231
1232     if (bd->gen_no == 0) {
1233         // Free the block if it is empty, but not if it is the block at
1234         // the head of the queue.
1235         if (bd != exec_block) {
1236             debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1237             dbl_link_remove(bd, &exec_block);
1238             setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1239             freeGroup(bd);
1240         } else {
1241             bd->free = bd->start;
1242         }
1243     }
1244
1245     RELEASE_SM_LOCK
1246 }
1247
1248 #endif /* mingw32_HOST_OS */
1249
1250 /* -----------------------------------------------------------------------------
1251    Debugging
1252
1253    memInventory() checks for memory leaks by counting up all the
1254    blocks we know about and comparing that to the number of blocks
1255    allegedly floating around in the system.
1256    -------------------------------------------------------------------------- */
1257
1258 #ifdef DEBUG
1259
1260 // Useful for finding partially full blocks in gdb
1261 void findSlop(bdescr *bd);
1262 void findSlop(bdescr *bd)
1263 {
1264     lnat slop;
1265
1266     for (; bd != NULL; bd = bd->link) {
1267         slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1268         if (slop > (1024/sizeof(W_))) {
1269             debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1270                        bd->start, bd, slop / (1024/sizeof(W_)));
1271         }
1272     }
1273 }
1274
1275 nat
1276 countBlocks(bdescr *bd)
1277 {
1278     nat n;
1279     for (n=0; bd != NULL; bd=bd->link) {
1280         n += bd->blocks;
1281     }
1282     return n;
1283 }
1284
1285 // (*1) Just like countBlocks, except that we adjust the count for a
1286 // megablock group so that it doesn't include the extra few blocks
1287 // that would be taken up by block descriptors in the second and
1288 // subsequent megablock.  This is so we can tally the count with the
1289 // number of blocks allocated in the system, for memInventory().
1290 static nat
1291 countAllocdBlocks(bdescr *bd)
1292 {
1293     nat n;
1294     for (n=0; bd != NULL; bd=bd->link) {
1295         n += bd->blocks;
1296         // hack for megablock groups: see (*1) above
1297         if (bd->blocks > BLOCKS_PER_MBLOCK) {
1298             n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1299                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1300         }
1301     }
1302     return n;
1303 }
1304
1305 static lnat
1306 stepBlocks (step *stp)
1307 {
1308     ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1309     ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1310     return stp->n_blocks + stp->n_old_blocks +
1311             countAllocdBlocks(stp->large_objects);
1312 }
1313
1314 // If memInventory() calculates that we have a memory leak, this
1315 // function will try to find the block(s) that are leaking by marking
1316 // all the ones that we know about, and search through memory to find
1317 // blocks that are not marked.  In the debugger this can help to give
1318 // us a clue about what kind of block leaked.  In the future we might
1319 // annotate blocks with their allocation site to give more helpful
1320 // info.
1321 static void
1322 findMemoryLeak (void)
1323 {
1324   nat g, s, i;
1325   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1326       for (i = 0; i < n_capabilities; i++) {
1327           markBlocks(capabilities[i].mut_lists[g]);
1328       }
1329       markBlocks(generations[g].mut_list);
1330       for (s = 0; s < generations[g].n_steps; s++) {
1331           markBlocks(generations[g].steps[s].blocks);
1332           markBlocks(generations[g].steps[s].large_objects);
1333       }
1334   }
1335
1336   for (i = 0; i < n_nurseries; i++) {
1337       markBlocks(nurseries[i].blocks);
1338       markBlocks(nurseries[i].large_objects);
1339   }
1340
1341 #ifdef PROFILING
1342   // TODO:
1343   // if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1344   //    markRetainerBlocks();
1345   // }
1346 #endif
1347
1348   // count the blocks allocated by the arena allocator
1349   // TODO:
1350   // markArenaBlocks();
1351
1352   // count the blocks containing executable memory
1353   markBlocks(exec_block);
1354
1355   reportUnmarkedBlocks();
1356 }
1357
1358
1359 void
1360 memInventory (rtsBool show)
1361 {
1362   nat g, s, i;
1363   step *stp;
1364   lnat gen_blocks[RtsFlags.GcFlags.generations];
1365   lnat nursery_blocks, retainer_blocks,
1366        arena_blocks, exec_blocks;
1367   lnat live_blocks = 0, free_blocks = 0;
1368   rtsBool leak;
1369
1370   // count the blocks we current have
1371
1372   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1373       gen_blocks[g] = 0;
1374       for (i = 0; i < n_capabilities; i++) {
1375           gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1376       }
1377       gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1378       for (s = 0; s < generations[g].n_steps; s++) {
1379           stp = &generations[g].steps[s];
1380           gen_blocks[g] += stepBlocks(stp);
1381       }
1382   }
1383
1384   nursery_blocks = 0;
1385   for (i = 0; i < n_nurseries; i++) {
1386       nursery_blocks += stepBlocks(&nurseries[i]);
1387   }
1388
1389   retainer_blocks = 0;
1390 #ifdef PROFILING
1391   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1392       retainer_blocks = retainerStackBlocks();
1393   }
1394 #endif
1395
1396   // count the blocks allocated by the arena allocator
1397   arena_blocks = arenaBlocks();
1398
1399   // count the blocks containing executable memory
1400   exec_blocks = countAllocdBlocks(exec_block);
1401
1402   /* count the blocks on the free list */
1403   free_blocks = countFreeList();
1404
1405   live_blocks = 0;
1406   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1407       live_blocks += gen_blocks[g];
1408   }
1409   live_blocks += nursery_blocks +
1410                + retainer_blocks + arena_blocks + exec_blocks;
1411
1412 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1413
1414   leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1415
1416   if (show || leak)
1417   {
1418       if (leak) {
1419           debugBelch("Memory leak detected:\n");
1420       } else {
1421           debugBelch("Memory inventory:\n");
1422       }
1423       for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1424           debugBelch("  gen %d blocks : %5lu blocks (%lu MB)\n", g,
1425                      gen_blocks[g], MB(gen_blocks[g]));
1426       }
1427       debugBelch("  nursery      : %5lu blocks (%lu MB)\n",
1428                  nursery_blocks, MB(nursery_blocks));
1429       debugBelch("  retainer     : %5lu blocks (%lu MB)\n",
1430                  retainer_blocks, MB(retainer_blocks));
1431       debugBelch("  arena blocks : %5lu blocks (%lu MB)\n",
1432                  arena_blocks, MB(arena_blocks));
1433       debugBelch("  exec         : %5lu blocks (%lu MB)\n",
1434                  exec_blocks, MB(exec_blocks));
1435       debugBelch("  free         : %5lu blocks (%lu MB)\n",
1436                  free_blocks, MB(free_blocks));
1437       debugBelch("  total        : %5lu blocks (%lu MB)\n",
1438                  live_blocks + free_blocks, MB(live_blocks+free_blocks));
1439       if (leak) {
1440           debugBelch("\n  in system    : %5lu blocks (%lu MB)\n",
1441                      mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1442       }
1443   }
1444
1445   if (leak) {
1446       debugBelch("\n");
1447       findMemoryLeak();
1448   }
1449   ASSERT(n_alloc_blocks == live_blocks);
1450   ASSERT(!leak);
1451 }
1452
1453
1454 /* Full heap sanity check. */
1455 void
1456 checkSanity( void )
1457 {
1458     nat g, s;
1459
1460     if (RtsFlags.GcFlags.generations == 1) {
1461         checkHeap(g0s0->blocks);
1462         checkLargeObjects(g0s0->large_objects);
1463     } else {
1464
1465         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1466             for (s = 0; s < generations[g].n_steps; s++) {
1467                 if (g == 0 && s == 0) { continue; }
1468                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1469                        == generations[g].steps[s].n_blocks);
1470                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1471                        == generations[g].steps[s].n_large_blocks);
1472                 checkHeap(generations[g].steps[s].blocks);
1473                 checkLargeObjects(generations[g].steps[s].large_objects);
1474             }
1475         }
1476
1477         for (s = 0; s < n_nurseries; s++) {
1478             ASSERT(countBlocks(nurseries[s].blocks)
1479                    == nurseries[s].n_blocks);
1480             ASSERT(countBlocks(nurseries[s].large_objects)
1481                    == nurseries[s].n_large_blocks);
1482         }
1483
1484         checkFreeListSanity();
1485     }
1486
1487 #if defined(THREADED_RTS)
1488     // check the stacks too in threaded mode, because we don't do a
1489     // full heap sanity check in this case (see checkHeap())
1490     checkMutableLists(rtsTrue);
1491 #else
1492     checkMutableLists(rtsFalse);
1493 #endif
1494 }
1495
1496 /* Nursery sanity check */
1497 void
1498 checkNurserySanity( step *stp )
1499 {
1500     bdescr *bd, *prev;
1501     nat blocks = 0;
1502
1503     prev = NULL;
1504     for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1505         ASSERT(bd->u.back == prev);
1506         prev = bd;
1507         blocks += bd->blocks;
1508     }
1509     ASSERT(blocks == stp->n_blocks);
1510 }
1511
1512 // handy function for use in gdb, because Bdescr() is inlined.
1513 extern bdescr *_bdescr( StgPtr p );
1514
1515 bdescr *
1516 _bdescr( StgPtr p )
1517 {
1518     return Bdescr(p);
1519 }
1520
1521 #endif