rts/sm/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2008
   4  *
   5  * Storage manager front end
   6  *
   7  * Documentation on the architecture of the Storage Manager can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsUtils.h"
  17 #include "RtsFlags.h"
  18 #include "Stats.h"
  19 #include "Hooks.h"
  20 #include "BlockAlloc.h"
  21 #include "MBlock.h"
  22 #include "Weak.h"
  23 #include "Sanity.h"
  24 #include "Arena.h"
  25 #include "OSThreads.h"
  26 #include "Capability.h"
  27 #include "Storage.h"
  28 #include "Schedule.h"
  29 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  30 #include "OSMem.h"
  31 #include "Trace.h"
  32 #include "GC.h"
  33 #include "Evac.h"
  34
  35 #include <stdlib.h>
  36 #include <string.h>
  37
  38 /*
  39  * All these globals require sm_mutex to access in THREADED_RTS mode.
  40  */
  41 StgClosure    *caf_list         = NULL;
  42 StgClosure    *revertible_caf_list = NULL;
  43 rtsBool       keepCAFs;
  44
  45 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  46 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  47 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  48
  49 generation *generations = NULL; /* all the generations */
  50 generation *g0          = NULL; /* generation 0, for convenience */
  51 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  52 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  53
  54 nat total_steps         = 0;
  55 step *all_steps         = NULL; /* single array of steps */
  56
  57 ullong total_allocated = 0;     /* total memory allocated during run */
  58
  59 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  60 step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
  61
  62 #ifdef THREADED_RTS
  63 /*
  64  * Storage manager mutex:  protects all the above state from
  65  * simultaneous access by two STG threads.
  66  */
  67 Mutex sm_mutex;
  68 /*
  69  * This mutex is used by atomicModifyMutVar# only
  70  */
  71 Mutex atomic_modify_mutvar_mutex;
  72 #endif
  73
  74
  75 /*
  76  * Forward references
  77  */
  78 static void *stgAllocForGMP   (size_t size_in_bytes);
  79 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  80 static void  stgDeallocForGMP (void *ptr, size_t size);
  81
  82 static void
  83 initStep (step *stp, int g, int s)
  84 {
  85     stp->no = s;
  86     stp->abs_no = RtsFlags.GcFlags.steps * g + s;
  87     stp->blocks = NULL;
  88     stp->n_blocks = 0;
  89     stp->n_words = 0;
  90     stp->live_estimate = 0;
  91     stp->old_blocks = NULL;
  92     stp->n_old_blocks = 0;
  93     stp->gen = &generations[g];
  94     stp->gen_no = g;
  95     stp->large_objects = NULL;
  96     stp->n_large_blocks = 0;
  97     stp->scavenged_large_objects = NULL;
  98     stp->n_scavenged_large_blocks = 0;
  99     stp->mark = 0;
 100     stp->compact = 0;
 101     stp->bitmap = NULL;
 102 #ifdef THREADED_RTS
 103     initSpinLock(&stp->sync_todo);
 104     initSpinLock(&stp->sync_large_objects);
 105 #endif
 106     stp->threads = END_TSO_QUEUE;
 107     stp->old_threads = END_TSO_QUEUE;
 108 }
 109
 110 void
 111 initStorage( void )
 112 {
 113   nat g, s;
 114   generation *gen;
 115
 116   if (generations != NULL) {
 117       // multi-init protection
 118       return;
 119   }
 120
 121   initMBlocks();
 122
 123   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 124    * doing something reasonable.
 125    */
 126   /* We use the NOT_NULL variant or gcc warns that the test is always true */
 127   ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLACKHOLE_info));
 128   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 129   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 130
 131   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 132       RtsFlags.GcFlags.heapSizeSuggestion >
 133       RtsFlags.GcFlags.maxHeapSize) {
 134     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 135   }
 136
 137   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 138       RtsFlags.GcFlags.minAllocAreaSize >
 139       RtsFlags.GcFlags.maxHeapSize) {
 140       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 141       RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
 142   }
 143
 144   initBlockAllocator();
 145
 146 #if defined(THREADED_RTS)
 147   initMutex(&sm_mutex);
 148   initMutex(&atomic_modify_mutvar_mutex);
 149 #endif
 150
 151   ACQUIRE_SM_LOCK;
 152
 153   /* allocate generation info array */
 154   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 155                                              * sizeof(struct generation_),
 156                                              "initStorage: gens");
 157
 158   /* allocate all the steps into an array.  It is important that we do
 159      it this way, because we need the invariant that two step pointers
 160      can be directly compared to see which is the oldest.
 161      Remember that the last generation has only one step. */
 162   total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
 163   all_steps   = stgMallocBytes(total_steps * sizeof(struct step_),
 164                                "initStorage: steps");
 165
 166   /* Initialise all generations */
 167   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 168     gen = &generations[g];
 169     gen->no = g;
 170     gen->mut_list = allocBlock();
 171     gen->collections = 0;
 172     gen->par_collections = 0;
 173     gen->failed_promotions = 0;
 174     gen->max_blocks = 0;
 175   }
 176
 177   /* A couple of convenience pointers */
 178   g0 = &generations[0];
 179   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 180
 181   /* Allocate step structures in each generation */
 182   if (RtsFlags.GcFlags.generations > 1) {
 183     /* Only for multiple-generations */
 184
 185     /* Oldest generation: one step */
 186     oldest_gen->n_steps = 1;
 187     oldest_gen->steps   = all_steps + (RtsFlags.GcFlags.generations - 1)
 188                                       * RtsFlags.GcFlags.steps;
 189
 190     /* set up all except the oldest generation with 2 steps */
 191     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 192       generations[g].n_steps = RtsFlags.GcFlags.steps;
 193       generations[g].steps   = all_steps + g * RtsFlags.GcFlags.steps;
 194     }
 195
 196   } else {
 197     /* single generation, i.e. a two-space collector */
 198     g0->n_steps = 1;
 199     g0->steps   = all_steps;
 200   }
 201
 202 #ifdef THREADED_RTS
 203   n_nurseries = n_capabilities;
 204 #else
 205   n_nurseries = 1;
 206 #endif
 207   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 208                               "initStorage: nurseries");
 209
 210   /* Initialise all steps */
 211   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 212     for (s = 0; s < generations[g].n_steps; s++) {
 213         initStep(&generations[g].steps[s], g, s);
 214     }
 215   }
 216
 217   for (s = 0; s < n_nurseries; s++) {
 218       initStep(&nurseries[s], 0, s);
 219   }
 220
 221   /* Set up the destination pointers in each younger gen. step */
 222   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 223     for (s = 0; s < generations[g].n_steps-1; s++) {
 224       generations[g].steps[s].to = &generations[g].steps[s+1];
 225     }
 226     generations[g].steps[s].to = &generations[g+1].steps[0];
 227   }
 228   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 229
 230   for (s = 0; s < n_nurseries; s++) {
 231       nurseries[s].to = generations[0].steps[0].to;
 232   }
 233
 234   /* The oldest generation has one step. */
 235   if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
 236       if (RtsFlags.GcFlags.generations == 1) {
 237           errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
 238       } else {
 239           oldest_gen->steps[0].mark = 1;
 240           if (RtsFlags.GcFlags.compact)
 241               oldest_gen->steps[0].compact = 1;
 242       }
 243   }
 244
 245   generations[0].max_blocks = 0;
 246   g0s0 = &generations[0].steps[0];
 247
 248   /* The allocation area.  Policy: keep the allocation area
 249    * small to begin with, even if we have a large suggested heap
 250    * size.  Reason: we're going to do a major collection first, and we
 251    * don't want it to be a big one.  This vague idea is borne out by
 252    * rigorous experimental evidence.
 253    */
 254   allocNurseries();
 255
 256   weak_ptr_list = NULL;
 257   caf_list = NULL;
 258   revertible_caf_list = NULL;
 259
 260   /* initialise the allocate() interface */
 261   alloc_blocks = 0;
 262   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 263
 264   /* Tell GNU multi-precision pkg about our custom alloc functions */
 265   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 266
 267 #ifdef THREADED_RTS
 268   initSpinLock(&gc_alloc_block_sync);
 269   initSpinLock(&recordMutableGen_sync);
 270   whitehole_spin = 0;
 271 #endif
 272
 273   IF_DEBUG(gc, statDescribeGens());
 274
 275   RELEASE_SM_LOCK;
 276 }
 277
 278 void
 279 exitStorage (void)
 280 {
 281     stat_exit(calcAllocated());
 282 }
 283
 284 void
 285 freeStorage (void)
 286 {
 287     stgFree(g0s0); // frees all the steps
 288     stgFree(generations);
 289     freeAllMBlocks();
 290 #if defined(THREADED_RTS)
 291     closeMutex(&sm_mutex);
 292     closeMutex(&atomic_modify_mutvar_mutex);
 293 #endif
 294     stgFree(nurseries);
 295 }
 296
 297 /* -----------------------------------------------------------------------------
 298    CAF management.
 299
 300    The entry code for every CAF does the following:
 301
 302       - builds a CAF_BLACKHOLE in the heap
 303       - pushes an update frame pointing to the CAF_BLACKHOLE
 304       - invokes UPD_CAF(), which:
 305           - calls newCaf, below
 306           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 307
 308    Why do we build a BLACKHOLE in the heap rather than just updating
 309    the thunk directly?  It's so that we only need one kind of update
 310    frame - otherwise we'd need a static version of the update frame too.
 311
 312    newCaf() does the following:
 313
 314       - it puts the CAF on the oldest generation's mut-once list.
 315         This is so that we can treat the CAF as a root when collecting
 316         younger generations.
 317
 318    For GHCI, we have additional requirements when dealing with CAFs:
 319
 320       - we must *retain* all dynamically-loaded CAFs ever entered,
 321         just in case we need them again.
 322       - we must be able to *revert* CAFs that have been evaluated, to
 323         their pre-evaluated form.
 324
 325       To do this, we use an additional CAF list.  When newCaf() is
 326       called on a dynamically-loaded CAF, we add it to the CAF list
 327       instead of the old-generation mutable list, and save away its
 328       old info pointer (in caf->saved_info) for later reversion.
 329
 330       To revert all the CAFs, we traverse the CAF list and reset the
 331       info pointer to caf->saved_info, then throw away the CAF list.
 332       (see GC.c:revertCAFs()).
 333
 334       -- SDM 29/1/01
 335
 336    -------------------------------------------------------------------------- */
 337
 338 void
 339 newCAF(StgClosure* caf)
 340 {
 341   ACQUIRE_SM_LOCK;
 342
 343   if(keepCAFs)
 344   {
 345     // HACK:
 346     // If we are in GHCi _and_ we are using dynamic libraries,
 347     // then we can't redirect newCAF calls to newDynCAF (see below),
 348     // so we make newCAF behave almost like newDynCAF.
 349     // The dynamic libraries might be used by both the interpreted
 350     // program and GHCi itself, so they must not be reverted.
 351     // This also means that in GHCi with dynamic libraries, CAFs are not
 352     // garbage collected. If this turns out to be a problem, we could
 353     // do another hack here and do an address range test on caf to figure
 354     // out whether it is from a dynamic library.
 355     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 356     ((StgIndStatic *)caf)->static_link = caf_list;
 357     caf_list = caf;
 358   }
 359   else
 360   {
 361     /* Put this CAF on the mutable list for the old generation.
 362     * This is a HACK - the IND_STATIC closure doesn't really have
 363     * a mut_link field, but we pretend it has - in fact we re-use
 364     * the STATIC_LINK field for the time being, because when we
 365     * come to do a major GC we won't need the mut_link field
 366     * any more and can use it as a STATIC_LINK.
 367     */
 368     ((StgIndStatic *)caf)->saved_info = NULL;
 369     recordMutableGen(caf, oldest_gen);
 370   }
 371
 372   RELEASE_SM_LOCK;
 373 }
 374
 375 // An alternate version of newCaf which is used for dynamically loaded
 376 // object code in GHCi.  In this case we want to retain *all* CAFs in
 377 // the object code, because they might be demanded at any time from an
 378 // expression evaluated on the command line.
 379 // Also, GHCi might want to revert CAFs, so we add these to the
 380 // revertible_caf_list.
 381 //
 382 // The linker hackily arranges that references to newCaf from dynamic
 383 // code end up pointing to newDynCAF.
 384 void
 385 newDynCAF(StgClosure *caf)
 386 {
 387     ACQUIRE_SM_LOCK;
 388
 389     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 390     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 391     revertible_caf_list = caf;
 392
 393     RELEASE_SM_LOCK;
 394 }
 395
 396 /* -----------------------------------------------------------------------------
 397    Nursery management.
 398    -------------------------------------------------------------------------- */
 399
 400 static bdescr *
 401 allocNursery (step *stp, bdescr *tail, nat blocks)
 402 {
 403     bdescr *bd;
 404     nat i;
 405
 406     // Allocate a nursery: we allocate fresh blocks one at a time and
 407     // cons them on to the front of the list, not forgetting to update
 408     // the back pointer on the tail of the list to point to the new block.
 409     for (i=0; i < blocks; i++) {
 410         // @LDV profiling
 411         /*
 412           processNursery() in LdvProfile.c assumes that every block group in
 413           the nursery contains only a single block. So, if a block group is
 414           given multiple blocks, change processNursery() accordingly.
 415         */
 416         bd = allocBlock();
 417         bd->link = tail;
 418         // double-link the nursery: we might need to insert blocks
 419         if (tail != NULL) {
 420             tail->u.back = bd;
 421         }
 422         bd->step = stp;
 423         bd->gen_no = 0;
 424         bd->flags = 0;
 425         bd->free = bd->start;
 426         tail = bd;
 427     }
 428     tail->u.back = NULL;
 429     return tail;
 430 }
 431
 432 static void
 433 assignNurseriesToCapabilities (void)
 434 {
 435 #ifdef THREADED_RTS
 436     nat i;
 437
 438     for (i = 0; i < n_nurseries; i++) {
 439         capabilities[i].r.rNursery        = &nurseries[i];
 440         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 441         capabilities[i].r.rCurrentAlloc   = NULL;
 442     }
 443 #else /* THREADED_RTS */
 444     MainCapability.r.rNursery        = &nurseries[0];
 445     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 446     MainCapability.r.rCurrentAlloc   = NULL;
 447 #endif
 448 }
 449
 450 void
 451 allocNurseries( void )
 452 {
 453     nat i;
 454
 455     for (i = 0; i < n_nurseries; i++) {
 456         nurseries[i].blocks =
 457             allocNursery(&nurseries[i], NULL,
 458                          RtsFlags.GcFlags.minAllocAreaSize);
 459         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 460         nurseries[i].old_blocks   = NULL;
 461         nurseries[i].n_old_blocks = 0;
 462     }
 463     assignNurseriesToCapabilities();
 464 }
 465
 466 void
 467 resetNurseries( void )
 468 {
 469     nat i;
 470     bdescr *bd;
 471     step *stp;
 472
 473     for (i = 0; i < n_nurseries; i++) {
 474         stp = &nurseries[i];
 475         for (bd = stp->blocks; bd; bd = bd->link) {
 476             bd->free = bd->start;
 477             ASSERT(bd->gen_no == 0);
 478             ASSERT(bd->step == stp);
 479             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 480         }
 481     }
 482     assignNurseriesToCapabilities();
 483 }
 484
 485 lnat
 486 countNurseryBlocks (void)
 487 {
 488     nat i;
 489     lnat blocks = 0;
 490
 491     for (i = 0; i < n_nurseries; i++) {
 492         blocks += nurseries[i].n_blocks;
 493     }
 494     return blocks;
 495 }
 496
 497 static void
 498 resizeNursery ( step *stp, nat blocks )
 499 {
 500   bdescr *bd;
 501   nat nursery_blocks;
 502
 503   nursery_blocks = stp->n_blocks;
 504   if (nursery_blocks == blocks) return;
 505
 506   if (nursery_blocks < blocks) {
 507       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
 508                  blocks);
 509     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 510   }
 511   else {
 512     bdescr *next_bd;
 513
 514     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
 515                blocks);
 516
 517     bd = stp->blocks;
 518     while (nursery_blocks > blocks) {
 519         next_bd = bd->link;
 520         next_bd->u.back = NULL;
 521         nursery_blocks -= bd->blocks; // might be a large block
 522         freeGroup(bd);
 523         bd = next_bd;
 524     }
 525     stp->blocks = bd;
 526     // might have gone just under, by freeing a large block, so make
 527     // up the difference.
 528     if (nursery_blocks < blocks) {
 529         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 530     }
 531   }
 532
 533   stp->n_blocks = blocks;
 534   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 535 }
 536
 537 //
 538 // Resize each of the nurseries to the specified size.
 539 //
 540 void
 541 resizeNurseriesFixed (nat blocks)
 542 {
 543     nat i;
 544     for (i = 0; i < n_nurseries; i++) {
 545         resizeNursery(&nurseries[i], blocks);
 546     }
 547 }
 548
 549 //
 550 // Resize the nurseries to the total specified size.
 551 //
 552 void
 553 resizeNurseries (nat blocks)
 554 {
 555     // If there are multiple nurseries, then we just divide the number
 556     // of available blocks between them.
 557     resizeNurseriesFixed(blocks / n_nurseries);
 558 }
 559
 560
 561 /* -----------------------------------------------------------------------------
 562    move_TSO is called to update the TSO structure after it has been
 563    moved from one place to another.
 564    -------------------------------------------------------------------------- */
 565
 566 void
 567 move_TSO (StgTSO *src, StgTSO *dest)
 568 {
 569     ptrdiff_t diff;
 570
 571     // relocate the stack pointer...
 572     diff = (StgPtr)dest - (StgPtr)src; // In *words*
 573     dest->sp = (StgPtr)dest->sp + diff;
 574 }
 575
 576 /* -----------------------------------------------------------------------------
 577    The allocate() interface
 578
 579    allocateInGen() function allocates memory directly into a specific
 580    generation.  It always succeeds, and returns a chunk of memory n
 581    words long.  n can be larger than the size of a block if necessary,
 582    in which case a contiguous block group will be allocated.
 583
 584    allocate(n) is equivalent to allocateInGen(g0).
 585    -------------------------------------------------------------------------- */
 586
 587 StgPtr
 588 allocateInGen (generation *g, lnat n)
 589 {
 590     step *stp;
 591     bdescr *bd;
 592     StgPtr ret;
 593
 594     ACQUIRE_SM_LOCK;
 595
 596     TICK_ALLOC_HEAP_NOCTR(n);
 597     CCS_ALLOC(CCCS,n);
 598
 599     stp = &g->steps[0];
 600
 601     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
 602     {
 603         lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 604
 605         // Attempting to allocate an object larger than maxHeapSize
 606         // should definitely be disallowed.  (bug #1791)
 607         if (RtsFlags.GcFlags.maxHeapSize > 0 &&
 608             req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
 609             heapOverflow();
 610         }
 611
 612         bd = allocGroup(req_blocks);
 613         dbl_link_onto(bd, &stp->large_objects);
 614         stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
 615         bd->gen_no  = g->no;
 616         bd->step = stp;
 617         bd->flags = BF_LARGE;
 618         bd->free = bd->start + n;
 619         ret = bd->start;
 620     }
 621     else
 622     {
 623         // small allocation (<LARGE_OBJECT_THRESHOLD) */
 624         bd = stp->blocks;
 625         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 626             bd = allocBlock();
 627             bd->gen_no = g->no;
 628             bd->step = stp;
 629             bd->flags = 0;
 630             bd->link = stp->blocks;
 631             stp->blocks = bd;
 632             stp->n_blocks++;
 633             alloc_blocks++;
 634         }
 635         ret = bd->free;
 636         bd->free += n;
 637     }
 638
 639     RELEASE_SM_LOCK;
 640
 641     return ret;
 642 }
 643
 644 StgPtr
 645 allocate (lnat n)
 646 {
 647     return allocateInGen(g0,n);
 648 }
 649
 650 lnat
 651 allocatedBytes( void )
 652 {
 653     lnat allocated;
 654
 655     allocated = alloc_blocks * BLOCK_SIZE_W;
 656     if (pinned_object_block != NULL) {
 657         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 658             pinned_object_block->free;
 659     }
 660
 661     return allocated;
 662 }
 663
 664 // split N blocks off the front of the given bdescr, returning the
 665 // new block group.  We treat the remainder as if it
 666 // had been freshly allocated in generation 0.
 667 bdescr *
 668 splitLargeBlock (bdescr *bd, nat blocks)
 669 {
 670     bdescr *new_bd;
 671
 672     // subtract the original number of blocks from the counter first
 673     bd->step->n_large_blocks -= bd->blocks;
 674
 675     new_bd = splitBlockGroup (bd, blocks);
 676
 677     dbl_link_onto(new_bd, &g0s0->large_objects);
 678     g0s0->n_large_blocks += new_bd->blocks;
 679     new_bd->gen_no  = g0s0->no;
 680     new_bd->step    = g0s0;
 681     new_bd->flags   = BF_LARGE;
 682     new_bd->free    = bd->free;
 683     ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
 684
 685     // add the new number of blocks to the counter.  Due to the gaps
 686     // for block descriptor, new_bd->blocks + bd->blocks might not be
 687     // equal to the original bd->blocks, which is why we do it this way.
 688     bd->step->n_large_blocks += bd->blocks;
 689
 690     return new_bd;
 691 }
 692
 693 /* -----------------------------------------------------------------------------
 694    allocateLocal()
 695
 696    This allocates memory in the current thread - it is intended for
 697    use primarily from STG-land where we have a Capability.  It is
 698    better than allocate() because it doesn't require taking the
 699    sm_mutex lock in the common case.
 700
 701    Memory is allocated directly from the nursery if possible (but not
 702    from the current nursery block, so as not to interfere with
 703    Hp/HpLim).
 704    -------------------------------------------------------------------------- */
 705
 706 StgPtr
 707 allocateLocal (Capability *cap, lnat n)
 708 {
 709     bdescr *bd;
 710     StgPtr p;
 711
 712     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 713         return allocateInGen(g0,n);
 714     }
 715
 716     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 717
 718     TICK_ALLOC_HEAP_NOCTR(n);
 719     CCS_ALLOC(CCCS,n);
 720
 721     bd = cap->r.rCurrentAlloc;
 722     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 723
 724         // The CurrentAlloc block is full, we need to find another
 725         // one.  First, we try taking the next block from the
 726         // nursery:
 727         bd = cap->r.rCurrentNursery->link;
 728
 729         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 730             // The nursery is empty, or the next block is already
 731             // full: allocate a fresh block (we can't fail here).
 732             ACQUIRE_SM_LOCK;
 733             bd = allocBlock();
 734             cap->r.rNursery->n_blocks++;
 735             RELEASE_SM_LOCK;
 736             bd->gen_no = 0;
 737             bd->step = cap->r.rNursery;
 738             bd->flags = 0;
 739             // NO: alloc_blocks++;
 740             // calcAllocated() uses the size of the nursery, and we've
 741             // already bumpted nursery->n_blocks above.
 742         } else {
 743             // we have a block in the nursery: take it and put
 744             // it at the *front* of the nursery list, and use it
 745             // to allocate() from.
 746             cap->r.rCurrentNursery->link = bd->link;
 747             if (bd->link != NULL) {
 748                 bd->link->u.back = cap->r.rCurrentNursery;
 749             }
 750         }
 751         dbl_link_onto(bd, &cap->r.rNursery->blocks);
 752         cap->r.rCurrentAlloc = bd;
 753         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 754     }
 755     p = bd->free;
 756     bd->free += n;
 757     return p;
 758 }
 759
 760 /* ---------------------------------------------------------------------------
 761    Allocate a fixed/pinned object.
 762
 763    We allocate small pinned objects into a single block, allocating a
 764    new block when the current one overflows.  The block is chained
 765    onto the large_object_list of generation 0 step 0.
 766
 767    NOTE: The GC can't in general handle pinned objects.  This
 768    interface is only safe to use for ByteArrays, which have no
 769    pointers and don't require scavenging.  It works because the
 770    block's descriptor has the BF_LARGE flag set, so the block is
 771    treated as a large object and chained onto various lists, rather
 772    than the individual objects being copied.  However, when it comes
 773    to scavenge the block, the GC will only scavenge the first object.
 774    The reason is that the GC can't linearly scan a block of pinned
 775    objects at the moment (doing so would require using the
 776    mostly-copying techniques).  But since we're restricting ourselves
 777    to pinned ByteArrays, not scavenging is ok.
 778
 779    This function is called by newPinnedByteArray# which immediately
 780    fills the allocated memory with a MutableByteArray#.
 781    ------------------------------------------------------------------------- */
 782
 783 StgPtr
 784 allocatePinned( lnat n )
 785 {
 786     StgPtr p;
 787     bdescr *bd = pinned_object_block;
 788
 789     // If the request is for a large object, then allocate()
 790     // will give us a pinned object anyway.
 791     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 792         return allocate(n);
 793     }
 794
 795     ACQUIRE_SM_LOCK;
 796
 797     TICK_ALLOC_HEAP_NOCTR(n);
 798     CCS_ALLOC(CCCS,n);
 799
 800     // we always return 8-byte aligned memory.  bd->free must be
 801     // 8-byte aligned to begin with, so we just round up n to
 802     // the nearest multiple of 8 bytes.
 803     if (sizeof(StgWord) == 4) {
 804         n = (n+1) & ~1;
 805     }
 806
 807     // If we don't have a block of pinned objects yet, or the current
 808     // one isn't large enough to hold the new object, allocate a new one.
 809     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 810         pinned_object_block = bd = allocBlock();
 811         dbl_link_onto(bd, &g0s0->large_objects);
 812         g0s0->n_large_blocks++;
 813         bd->gen_no = 0;
 814         bd->step   = g0s0;
 815         bd->flags  = BF_PINNED | BF_LARGE;
 816         bd->free   = bd->start;
 817         alloc_blocks++;
 818     }
 819
 820     p = bd->free;
 821     bd->free += n;
 822     RELEASE_SM_LOCK;
 823     return p;
 824 }
 825
 826 /* -----------------------------------------------------------------------------
 827    Write Barriers
 828    -------------------------------------------------------------------------- */
 829
 830 /*
 831    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 832    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 833    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 834    and is put on the mutable list.
 835 */
 836 void
 837 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 838 {
 839     Capability *cap = regTableToCapability(reg);
 840     bdescr *bd;
 841     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 842         p->header.info = &stg_MUT_VAR_DIRTY_info;
 843         bd = Bdescr((StgPtr)p);
 844         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 845     }
 846 }
 847
 848 // Setting a TSO's link field with a write barrier.
 849 // It is *not* necessary to call this function when
 850 //    * setting the link field to END_TSO_QUEUE
 851 //    * putting a TSO on the blackhole_queue
 852 //    * setting the link field of the currently running TSO, as it
 853 //      will already be dirty.
 854 void
 855 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
 856 {
 857     bdescr *bd;
 858     if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
 859         tso->flags |= TSO_LINK_DIRTY;
 860         bd = Bdescr((StgPtr)tso);
 861         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 862     }
 863     tso->_link = target;
 864 }
 865
 866 void
 867 dirty_TSO (Capability *cap, StgTSO *tso)
 868 {
 869     bdescr *bd;
 870     if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) {
 871         bd = Bdescr((StgPtr)tso);
 872         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 873     }
 874     tso->flags |= TSO_DIRTY;
 875 }
 876
 877 /*
 878    This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
 879    on the mutable list; a MVAR_DIRTY is.  When written to, a
 880    MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
 881    The check for MVAR_CLEAN is inlined at the call site for speed,
 882    this really does make a difference on concurrency-heavy benchmarks
 883    such as Chaneneos and cheap-concurrency.
 884 */
 885 void
 886 dirty_MVAR(StgRegTable *reg, StgClosure *p)
 887 {
 888     Capability *cap = regTableToCapability(reg);
 889     bdescr *bd;
 890     bd = Bdescr((StgPtr)p);
 891     if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 892 }
 893
 894 /* -----------------------------------------------------------------------------
 895    Allocation functions for GMP.
 896
 897    These all use the allocate() interface - we can't have any garbage
 898    collection going on during a gmp operation, so we use allocate()
 899    which always succeeds.  The gmp operations which might need to
 900    allocate will ask the storage manager (via doYouWantToGC()) whether
 901    a garbage collection is required, in case we get into a loop doing
 902    only allocate() style allocation.
 903    -------------------------------------------------------------------------- */
 904
 905 static void *
 906 stgAllocForGMP (size_t size_in_bytes)
 907 {
 908   StgArrWords* arr;
 909   nat data_size_in_words, total_size_in_words;
 910
 911   /* round up to a whole number of words */
 912   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 913   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 914
 915   /* allocate and fill it in. */
 916 #if defined(THREADED_RTS)
 917   arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
 918 #else
 919   arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
 920 #endif
 921   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 922
 923   /* and return a ptr to the goods inside the array */
 924   return arr->payload;
 925 }
 926
 927 static void *
 928 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 929 {
 930     size_t min_size;
 931     void *new_stuff_ptr = stgAllocForGMP(new_size);
 932     nat i = 0;
 933     char *p = (char *) ptr;
 934     char *q = (char *) new_stuff_ptr;
 935
 936     min_size = old_size < new_size ? old_size : new_size;
 937     for (; i < min_size; i++, p++, q++) {
 938         *q = *p;
 939     }
 940
 941     return(new_stuff_ptr);
 942 }
 943
 944 static void
 945 stgDeallocForGMP (void *ptr STG_UNUSED,
 946                   size_t size STG_UNUSED)
 947 {
 948     /* easy for us: the garbage collector does the dealloc'n */
 949 }
 950
 951 /* -----------------------------------------------------------------------------
 952  * Stats and stuff
 953  * -------------------------------------------------------------------------- */
 954
 955 /* -----------------------------------------------------------------------------
 956  * calcAllocated()
 957  *
 958  * Approximate how much we've allocated: number of blocks in the
 959  * nursery + blocks allocated via allocate() - unused nusery blocks.
 960  * This leaves a little slop at the end of each block, and doesn't
 961  * take into account large objects (ToDo).
 962  * -------------------------------------------------------------------------- */
 963
 964 lnat
 965 calcAllocated( void )
 966 {
 967   nat allocated;
 968   bdescr *bd;
 969
 970   allocated = allocatedBytes();
 971   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 972
 973   {
 974 #ifdef THREADED_RTS
 975   nat i;
 976   for (i = 0; i < n_nurseries; i++) {
 977       Capability *cap;
 978       for ( bd = capabilities[i].r.rCurrentNursery->link;
 979             bd != NULL; bd = bd->link ) {
 980           allocated -= BLOCK_SIZE_W;
 981       }
 982       cap = &capabilities[i];
 983       if (cap->r.rCurrentNursery->free <
 984           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 985           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 986               - cap->r.rCurrentNursery->free;
 987       }
 988   }
 989 #else
 990   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 991
 992   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 993       allocated -= BLOCK_SIZE_W;
 994   }
 995   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 996       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 997           - current_nursery->free;
 998   }
 999 #endif
1000   }
1001
1002   total_allocated += allocated;
1003   return allocated;
1004 }
1005
1006 /* Approximate the amount of live data in the heap.  To be called just
1007  * after garbage collection (see GarbageCollect()).
1008  */
1009 lnat
1010 calcLiveBlocks(void)
1011 {
1012   nat g, s;
1013   lnat live = 0;
1014   step *stp;
1015
1016   if (RtsFlags.GcFlags.generations == 1) {
1017       return g0s0->n_large_blocks + g0s0->n_blocks;
1018   }
1019
1020   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1021     for (s = 0; s < generations[g].n_steps; s++) {
1022       /* approximate amount of live data (doesn't take into account slop
1023        * at end of each block).
1024        */
1025       if (g == 0 && s == 0) {
1026           continue;
1027       }
1028       stp = &generations[g].steps[s];
1029       live += stp->n_large_blocks + stp->n_blocks;
1030     }
1031   }
1032   return live;
1033 }
1034
1035 lnat
1036 countOccupied(bdescr *bd)
1037 {
1038     lnat words;
1039
1040     words = 0;
1041     for (; bd != NULL; bd = bd->link) {
1042         ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1043         words += bd->free - bd->start;
1044     }
1045     return words;
1046 }
1047
1048 // Return an accurate count of the live data in the heap, excluding
1049 // generation 0.
1050 lnat
1051 calcLiveWords(void)
1052 {
1053     nat g, s;
1054     lnat live;
1055     step *stp;
1056
1057     if (RtsFlags.GcFlags.generations == 1) {
1058         return g0s0->n_words + countOccupied(g0s0->large_objects);
1059     }
1060
1061     live = 0;
1062     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1063         for (s = 0; s < generations[g].n_steps; s++) {
1064             if (g == 0 && s == 0) continue;
1065             stp = &generations[g].steps[s];
1066             live += stp->n_words + countOccupied(stp->large_objects);
1067         }
1068     }
1069     return live;
1070 }
1071
1072 /* Approximate the number of blocks that will be needed at the next
1073  * garbage collection.
1074  *
1075  * Assume: all data currently live will remain live.  Steps that will
1076  * be collected next time will therefore need twice as many blocks
1077  * since all the data will be copied.
1078  */
1079 extern lnat
1080 calcNeeded(void)
1081 {
1082     lnat needed = 0;
1083     nat g, s;
1084     step *stp;
1085
1086     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1087         for (s = 0; s < generations[g].n_steps; s++) {
1088             if (g == 0 && s == 0) { continue; }
1089             stp = &generations[g].steps[s];
1090
1091             // we need at least this much space
1092             needed += stp->n_blocks + stp->n_large_blocks;
1093
1094             // any additional space needed to collect this gen next time?
1095             if (g == 0 || // always collect gen 0
1096                 (generations[g].steps[0].n_blocks +
1097                  generations[g].steps[0].n_large_blocks
1098                  > generations[g].max_blocks)) {
1099                 // we will collect this gen next time
1100                 if (stp->mark) {
1101                     //  bitmap:
1102                     needed += stp->n_blocks / BITS_IN(W_);
1103                     //  mark stack:
1104                     needed += stp->n_blocks / 100;
1105                 }
1106                 if (stp->compact) {
1107                     continue; // no additional space needed for compaction
1108                 } else {
1109                     needed += stp->n_blocks;
1110                 }
1111             }
1112         }
1113     }
1114     return needed;
1115 }
1116
1117 /* ----------------------------------------------------------------------------
1118    Executable memory
1119
1120    Executable memory must be managed separately from non-executable
1121    memory.  Most OSs these days require you to jump through hoops to
1122    dynamically allocate executable memory, due to various security
1123    measures.
1124
1125    Here we provide a small memory allocator for executable memory.
1126    Memory is managed with a page granularity; we allocate linearly
1127    in the page, and when the page is emptied (all objects on the page
1128    are free) we free the page again, not forgetting to make it
1129    non-executable.
1130
1131    TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1132          the linker cannot use allocateExec for loading object code files
1133          on Windows. Once allocateExec can handle larger objects, the linker
1134          should be modified to use allocateExec instead of VirtualAlloc.
1135    ------------------------------------------------------------------------- */
1136
1137 static bdescr *exec_block;
1138
1139 void *allocateExec (nat bytes)
1140 {
1141     void *ret;
1142     nat n;
1143
1144     ACQUIRE_SM_LOCK;
1145
1146     // round up to words.
1147     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
1148
1149     if (n+1 > BLOCK_SIZE_W) {
1150         barf("allocateExec: can't handle large objects");
1151     }
1152
1153     if (exec_block == NULL ||
1154         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1155         bdescr *bd;
1156         lnat pagesize = getPageSize();
1157         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1158         debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1159         bd->gen_no = 0;
1160         bd->flags = BF_EXEC;
1161         bd->link = exec_block;
1162         if (exec_block != NULL) {
1163             exec_block->u.back = bd;
1164         }
1165         bd->u.back = NULL;
1166         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1167         exec_block = bd;
1168     }
1169     *(exec_block->free) = n;  // store the size of this chunk
1170     exec_block->gen_no += n;  // gen_no stores the number of words allocated
1171     ret = exec_block->free + 1;
1172     exec_block->free += n + 1;
1173
1174     RELEASE_SM_LOCK
1175     return ret;
1176 }
1177
1178 void freeExec (void *addr)
1179 {
1180     StgPtr p = (StgPtr)addr - 1;
1181     bdescr *bd = Bdescr((StgPtr)p);
1182
1183     if ((bd->flags & BF_EXEC) == 0) {
1184         barf("freeExec: not executable");
1185     }
1186
1187     if (*(StgPtr)p == 0) {
1188         barf("freeExec: already free?");
1189     }
1190
1191     ACQUIRE_SM_LOCK;
1192
1193     bd->gen_no -= *(StgPtr)p;
1194     *(StgPtr)p = 0;
1195
1196     if (bd->gen_no == 0) {
1197         // Free the block if it is empty, but not if it is the block at
1198         // the head of the queue.
1199         if (bd != exec_block) {
1200             debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1201             dbl_link_remove(bd, &exec_block);
1202             setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1203             freeGroup(bd);
1204         } else {
1205             bd->free = bd->start;
1206         }
1207     }
1208
1209     RELEASE_SM_LOCK
1210 }
1211
1212 /* -----------------------------------------------------------------------------
1213    Debugging
1214
1215    memInventory() checks for memory leaks by counting up all the
1216    blocks we know about and comparing that to the number of blocks
1217    allegedly floating around in the system.
1218    -------------------------------------------------------------------------- */
1219
1220 #ifdef DEBUG
1221
1222 // Useful for finding partially full blocks in gdb
1223 void findSlop(bdescr *bd);
1224 void findSlop(bdescr *bd)
1225 {
1226     lnat slop;
1227
1228     for (; bd != NULL; bd = bd->link) {
1229         slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start);
1230         if (slop > (1024/sizeof(W_))) {
1231             debugBelch("block at %p (bdescr %p) has %ldKB slop\n",
1232                        bd->start, bd, slop / (1024/sizeof(W_)));
1233         }
1234     }
1235 }
1236
1237 nat
1238 countBlocks(bdescr *bd)
1239 {
1240     nat n;
1241     for (n=0; bd != NULL; bd=bd->link) {
1242         n += bd->blocks;
1243     }
1244     return n;
1245 }
1246
1247 // (*1) Just like countBlocks, except that we adjust the count for a
1248 // megablock group so that it doesn't include the extra few blocks
1249 // that would be taken up by block descriptors in the second and
1250 // subsequent megablock.  This is so we can tally the count with the
1251 // number of blocks allocated in the system, for memInventory().
1252 static nat
1253 countAllocdBlocks(bdescr *bd)
1254 {
1255     nat n;
1256     for (n=0; bd != NULL; bd=bd->link) {
1257         n += bd->blocks;
1258         // hack for megablock groups: see (*1) above
1259         if (bd->blocks > BLOCKS_PER_MBLOCK) {
1260             n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1261                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1262         }
1263     }
1264     return n;
1265 }
1266
1267 static lnat
1268 stepBlocks (step *stp)
1269 {
1270     ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
1271     ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
1272     return stp->n_blocks + stp->n_old_blocks +
1273             countAllocdBlocks(stp->large_objects);
1274 }
1275
1276 // If memInventory() calculates that we have a memory leak, this
1277 // function will try to find the block(s) that are leaking by marking
1278 // all the ones that we know about, and search through memory to find
1279 // blocks that are not marked.  In the debugger this can help to give
1280 // us a clue about what kind of block leaked.  In the future we might
1281 // annotate blocks with their allocation site to give more helpful
1282 // info.
1283 static void
1284 findMemoryLeak (void)
1285 {
1286   nat g, s, i;
1287   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1288       for (i = 0; i < n_capabilities; i++) {
1289           markBlocks(capabilities[i].mut_lists[g]);
1290       }
1291       markBlocks(generations[g].mut_list);
1292       for (s = 0; s < generations[g].n_steps; s++) {
1293           markBlocks(generations[g].steps[s].blocks);
1294           markBlocks(generations[g].steps[s].large_objects);
1295       }
1296   }
1297
1298   for (i = 0; i < n_nurseries; i++) {
1299       markBlocks(nurseries[i].blocks);
1300       markBlocks(nurseries[i].large_objects);
1301   }
1302
1303 #ifdef PROFILING
1304   // TODO:
1305   // if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1306   //    markRetainerBlocks();
1307   // }
1308 #endif
1309
1310   // count the blocks allocated by the arena allocator
1311   // TODO:
1312   // markArenaBlocks();
1313
1314   // count the blocks containing executable memory
1315   markBlocks(exec_block);
1316
1317   reportUnmarkedBlocks();
1318 }
1319
1320
1321 void
1322 memInventory (rtsBool show)
1323 {
1324   nat g, s, i;
1325   step *stp;
1326   lnat gen_blocks[RtsFlags.GcFlags.generations];
1327   lnat nursery_blocks, retainer_blocks,
1328        arena_blocks, exec_blocks;
1329   lnat live_blocks = 0, free_blocks = 0;
1330   rtsBool leak;
1331
1332   // count the blocks we current have
1333
1334   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1335       gen_blocks[g] = 0;
1336       for (i = 0; i < n_capabilities; i++) {
1337           gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
1338       }
1339       gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
1340       for (s = 0; s < generations[g].n_steps; s++) {
1341           stp = &generations[g].steps[s];
1342           gen_blocks[g] += stepBlocks(stp);
1343       }
1344   }
1345
1346   nursery_blocks = 0;
1347   for (i = 0; i < n_nurseries; i++) {
1348       nursery_blocks += stepBlocks(&nurseries[i]);
1349   }
1350
1351   retainer_blocks = 0;
1352 #ifdef PROFILING
1353   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1354       retainer_blocks = retainerStackBlocks();
1355   }
1356 #endif
1357
1358   // count the blocks allocated by the arena allocator
1359   arena_blocks = arenaBlocks();
1360
1361   // count the blocks containing executable memory
1362   exec_blocks = countAllocdBlocks(exec_block);
1363
1364   /* count the blocks on the free list */
1365   free_blocks = countFreeList();
1366
1367   live_blocks = 0;
1368   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1369       live_blocks += gen_blocks[g];
1370   }
1371   live_blocks += nursery_blocks +
1372                + retainer_blocks + arena_blocks + exec_blocks;
1373
1374 #define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_)))
1375
1376   leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK;
1377
1378   if (show || leak)
1379   {
1380       if (leak) {
1381           debugBelch("Memory leak detected:\n");
1382       } else {
1383           debugBelch("Memory inventory:\n");
1384       }
1385       for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1386           debugBelch("  gen %d blocks : %5lu blocks (%lu MB)\n", g,
1387                      gen_blocks[g], MB(gen_blocks[g]));
1388       }
1389       debugBelch("  nursery      : %5lu blocks (%lu MB)\n",
1390                  nursery_blocks, MB(nursery_blocks));
1391       debugBelch("  retainer     : %5lu blocks (%lu MB)\n",
1392                  retainer_blocks, MB(retainer_blocks));
1393       debugBelch("  arena blocks : %5lu blocks (%lu MB)\n",
1394                  arena_blocks, MB(arena_blocks));
1395       debugBelch("  exec         : %5lu blocks (%lu MB)\n",
1396                  exec_blocks, MB(exec_blocks));
1397       debugBelch("  free         : %5lu blocks (%lu MB)\n",
1398                  free_blocks, MB(free_blocks));
1399       debugBelch("  total        : %5lu blocks (%lu MB)\n",
1400                  live_blocks + free_blocks, MB(live_blocks+free_blocks));
1401       if (leak) {
1402           debugBelch("\n  in system    : %5lu blocks (%lu MB)\n",
1403                      mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated);
1404       }
1405   }
1406
1407   if (leak) {
1408       debugBelch("\n");
1409       findMemoryLeak();
1410   }
1411   ASSERT(n_alloc_blocks == live_blocks);
1412   ASSERT(!leak);
1413 }
1414
1415
1416 /* Full heap sanity check. */
1417 void
1418 checkSanity( void )
1419 {
1420     nat g, s;
1421
1422     if (RtsFlags.GcFlags.generations == 1) {
1423         checkHeap(g0s0->blocks);
1424         checkChain(g0s0->large_objects);
1425     } else {
1426
1427         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1428             for (s = 0; s < generations[g].n_steps; s++) {
1429                 if (g == 0 && s == 0) { continue; }
1430                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1431                        == generations[g].steps[s].n_blocks);
1432                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1433                        == generations[g].steps[s].n_large_blocks);
1434                 checkHeap(generations[g].steps[s].blocks);
1435                 checkChain(generations[g].steps[s].large_objects);
1436                 if (g > 0) {
1437                     checkMutableList(generations[g].mut_list, g);
1438                 }
1439             }
1440         }
1441
1442         for (s = 0; s < n_nurseries; s++) {
1443             ASSERT(countBlocks(nurseries[s].blocks)
1444                    == nurseries[s].n_blocks);
1445             ASSERT(countBlocks(nurseries[s].large_objects)
1446                    == nurseries[s].n_large_blocks);
1447         }
1448
1449         checkFreeListSanity();
1450     }
1451
1452 #if defined(THREADED_RTS)
1453     // check the stacks too in threaded mode, because we don't do a
1454     // full heap sanity check in this case (see checkHeap())
1455     checkGlobalTSOList(rtsTrue);
1456 #else
1457     checkGlobalTSOList(rtsFalse);
1458 #endif
1459 }
1460
1461 /* Nursery sanity check */
1462 void
1463 checkNurserySanity( step *stp )
1464 {
1465     bdescr *bd, *prev;
1466     nat blocks = 0;
1467
1468     prev = NULL;
1469     for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1470         ASSERT(bd->u.back == prev);
1471         prev = bd;
1472         blocks += bd->blocks;
1473     }
1474     ASSERT(blocks == stp->n_blocks);
1475 }
1476
1477 // handy function for use in gdb, because Bdescr() is inlined.
1478 extern bdescr *_bdescr( StgPtr p );
1479
1480 bdescr *
1481 _bdescr( StgPtr p )
1482 {
1483     return Bdescr(p);
1484 }
1485
1486 #endif