ghc/rts/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  * $Id: Storage.c,v 1.59 2002/02/04 20:21:22 sof Exp $
   3  *
   4  * (c) The GHC Team, 1998-1999
   5  *
   6  * Storage manager front end
   7  *
   8  * ---------------------------------------------------------------------------*/
   9
  10 #include "PosixSource.h"
  11 #include "Rts.h"
  12 #include "RtsUtils.h"
  13 #include "RtsFlags.h"
  14 #include "Stats.h"
  15 #include "Hooks.h"
  16 #include "BlockAlloc.h"
  17 #include "MBlock.h"
  18 #include "Weak.h"
  19 #include "Sanity.h"
  20 #include "Arena.h"
  21
  22 #include "Storage.h"
  23 #include "Schedule.h"
  24 #include "OSThreads.h"
  25 #include "StoragePriv.h"
  26
  27 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  28
  29 StgClosure    *caf_list         = NULL;
  30
  31 bdescr *small_alloc_list;       /* allocate()d small objects */
  32 bdescr *large_alloc_list;       /* allocate()d large objects */
  33 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  34 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  35 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  36
  37 StgPtr alloc_Hp    = NULL;      /* next free byte in small_alloc_list */
  38 StgPtr alloc_HpLim = NULL;      /* end of block at small_alloc_list   */
  39
  40 generation *generations;        /* all the generations */
  41 generation *g0;                 /* generation 0, for convenience */
  42 generation *oldest_gen;         /* oldest generation, for convenience */
  43 step *g0s0;                     /* generation 0, step 0, for convenience */
  44
  45 lnat total_allocated = 0;       /* total memory allocated during run */
  46
  47 /*
  48  * Storage manager mutex:  protects all the above state from
  49  * simultaneous access by two STG threads.
  50  */
  51 #ifdef SMP
  52 Mutex sm_mutex = INIT_MUTEX_VAR;
  53 #endif
  54
  55 /*
  56  * Forward references
  57  */
  58 static void *stgAllocForGMP   (size_t size_in_bytes);
  59 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  60 static void  stgDeallocForGMP (void *ptr, size_t size);
  61
  62 void
  63 initStorage( void )
  64 {
  65   nat g, s;
  66   step *stp;
  67   generation *gen;
  68
  69   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
  70       RtsFlags.GcFlags.heapSizeSuggestion >
  71       RtsFlags.GcFlags.maxHeapSize) {
  72     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
  73   }
  74
  75   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
  76       RtsFlags.GcFlags.minAllocAreaSize >
  77       RtsFlags.GcFlags.maxHeapSize) {
  78       prog_belch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
  79       exit(1);
  80   }
  81
  82   initBlockAllocator();
  83
  84 #if defined(SMP)
  85   initCondition(&sm_mutex);
  86 #endif
  87
  88   /* allocate generation info array */
  89   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
  90                                              * sizeof(struct _generation),
  91                                              "initStorage: gens");
  92
  93   /* Initialise all generations */
  94   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
  95     gen = &generations[g];
  96     gen->no = g;
  97     gen->mut_list = END_MUT_LIST;
  98     gen->mut_once_list = END_MUT_LIST;
  99     gen->collections = 0;
 100     gen->failed_promotions = 0;
 101     gen->max_blocks = 0;
 102   }
 103
 104   /* A couple of convenience pointers */
 105   g0 = &generations[0];
 106   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 107
 108   /* Allocate step structures in each generation */
 109   if (RtsFlags.GcFlags.generations > 1) {
 110     /* Only for multiple-generations */
 111
 112     /* Oldest generation: one step */
 113     oldest_gen->n_steps = 1;
 114     oldest_gen->steps =
 115       stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step");
 116
 117     /* set up all except the oldest generation with 2 steps */
 118     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 119       generations[g].n_steps = RtsFlags.GcFlags.steps;
 120       generations[g].steps  =
 121         stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step),
 122                         "initStorage: steps");
 123     }
 124
 125   } else {
 126     /* single generation, i.e. a two-space collector */
 127     g0->n_steps = 1;
 128     g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps");
 129   }
 130
 131   /* Initialise all steps */
 132   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 133     for (s = 0; s < generations[g].n_steps; s++) {
 134       stp = &generations[g].steps[s];
 135       stp->no = s;
 136       stp->blocks = NULL;
 137       stp->n_blocks = 0;
 138       stp->gen = &generations[g];
 139       stp->gen_no = g;
 140       stp->hp = NULL;
 141       stp->hpLim = NULL;
 142       stp->hp_bd = NULL;
 143       stp->scan = NULL;
 144       stp->scan_bd = NULL;
 145       stp->large_objects = NULL;
 146       stp->n_large_blocks = 0;
 147       stp->new_large_objects = NULL;
 148       stp->scavenged_large_objects = NULL;
 149       stp->n_scavenged_large_blocks = 0;
 150       stp->is_compacted = 0;
 151       stp->bitmap = NULL;
 152     }
 153   }
 154
 155   /* Set up the destination pointers in each younger gen. step */
 156   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 157     for (s = 0; s < generations[g].n_steps-1; s++) {
 158       generations[g].steps[s].to = &generations[g].steps[s+1];
 159     }
 160     generations[g].steps[s].to = &generations[g+1].steps[0];
 161   }
 162
 163   /* The oldest generation has one step and it is compacted. */
 164   if (RtsFlags.GcFlags.compact) {
 165       if (RtsFlags.GcFlags.generations == 1) {
 166           belch("WARNING: compaction is incompatible with -G1; disabled");
 167       } else {
 168           oldest_gen->steps[0].is_compacted = 1;
 169       }
 170   }
 171   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 172
 173   /* generation 0 is special: that's the nursery */
 174   generations[0].max_blocks = 0;
 175
 176   /* G0S0: the allocation area.  Policy: keep the allocation area
 177    * small to begin with, even if we have a large suggested heap
 178    * size.  Reason: we're going to do a major collection first, and we
 179    * don't want it to be a big one.  This vague idea is borne out by
 180    * rigorous experimental evidence.
 181    */
 182   g0s0 = &generations[0].steps[0];
 183
 184   allocNurseries();
 185
 186   weak_ptr_list = NULL;
 187   caf_list = NULL;
 188
 189   /* initialise the allocate() interface */
 190   small_alloc_list = NULL;
 191   large_alloc_list = NULL;
 192   alloc_blocks = 0;
 193   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 194
 195   /* Tell GNU multi-precision pkg about our custom alloc functions */
 196   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 197
 198 #if defined(SMP)
 199   initMutex(&sm_mutex);
 200 #endif
 201
 202   IF_DEBUG(gc, statDescribeGens());
 203 }
 204
 205 void
 206 exitStorage (void)
 207 {
 208     stat_exit(calcAllocated());
 209 }
 210
 211 /* -----------------------------------------------------------------------------
 212    CAF management.
 213
 214    The entry code for every CAF does the following:
 215
 216       - builds a CAF_BLACKHOLE in the heap
 217       - pushes an update frame pointing to the CAF_BLACKHOLE
 218       - invokes UPD_CAF(), which:
 219           - calls newCaf, below
 220           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 221
 222    Why do we build a BLACKHOLE in the heap rather than just updating
 223    the thunk directly?  It's so that we only need one kind of update
 224    frame - otherwise we'd need a static version of the update frame too.
 225
 226    newCaf() does the following:
 227
 228       - it puts the CAF on the oldest generation's mut-once list.
 229         This is so that we can treat the CAF as a root when collecting
 230         younger generations.
 231
 232    For GHCI, we have additional requirements when dealing with CAFs:
 233
 234       - we must *retain* all dynamically-loaded CAFs ever entered,
 235         just in case we need them again.
 236       - we must be able to *revert* CAFs that have been evaluated, to
 237         their pre-evaluated form.
 238
 239       To do this, we use an additional CAF list.  When newCaf() is
 240       called on a dynamically-loaded CAF, we add it to the CAF list
 241       instead of the old-generation mutable list, and save away its
 242       old info pointer (in caf->saved_info) for later reversion.
 243
 244       To revert all the CAFs, we traverse the CAF list and reset the
 245       info pointer to caf->saved_info, then throw away the CAF list.
 246       (see GC.c:revertCAFs()).
 247
 248       -- SDM 29/1/01
 249
 250    -------------------------------------------------------------------------- */
 251
 252 void
 253 newCAF(StgClosure* caf)
 254 {
 255   /* Put this CAF on the mutable list for the old generation.
 256    * This is a HACK - the IND_STATIC closure doesn't really have
 257    * a mut_link field, but we pretend it has - in fact we re-use
 258    * the STATIC_LINK field for the time being, because when we
 259    * come to do a major GC we won't need the mut_link field
 260    * any more and can use it as a STATIC_LINK.
 261    */
 262   ACQUIRE_SM_LOCK;
 263
 264   if (is_dynamically_loaded_rwdata_ptr((StgPtr)caf)) {
 265       ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 266       ((StgIndStatic *)caf)->static_link = caf_list;
 267       caf_list = caf;
 268   } else {
 269       ((StgIndStatic *)caf)->saved_info = NULL;
 270       ((StgMutClosure *)caf)->mut_link = oldest_gen->mut_once_list;
 271       oldest_gen->mut_once_list = (StgMutClosure *)caf;
 272   }
 273
 274   RELEASE_SM_LOCK;
 275
 276 #ifdef PAR
 277   /* If we are PAR or DIST then  we never forget a CAF */
 278   { globalAddr *newGA;
 279     //belch("<##> Globalising CAF %08x %s",caf,info_type(caf));
 280     newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
 281     ASSERT(newGA);
 282   }
 283 #endif /* PAR */
 284 }
 285
 286 /* -----------------------------------------------------------------------------
 287    Nursery management.
 288    -------------------------------------------------------------------------- */
 289
 290 void
 291 allocNurseries( void )
 292 {
 293 #ifdef SMP
 294   {
 295     Capability *cap;
 296     bdescr *bd;
 297
 298     g0s0->blocks = NULL;
 299     g0s0->n_blocks = 0;
 300     for (cap = free_capabilities; cap != NULL; cap = cap->link) {
 301       cap->r.rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
 302       cap->r.rCurrentNursery = cap->r.rNursery;
 303       for (bd = cap->r.rNursery; bd != NULL; bd = bd->link) {
 304         bd->u.back = (bdescr *)cap;
 305       }
 306     }
 307     /* Set the back links to be equal to the Capability,
 308      * so we can do slightly better informed locking.
 309      */
 310   }
 311 #else /* SMP */
 312   g0s0->blocks      = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
 313   g0s0->n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 314   g0s0->to_blocks   = NULL;
 315   g0s0->n_to_blocks = 0;
 316   MainCapability.r.rNursery        = g0s0->blocks;
 317   MainCapability.r.rCurrentNursery = g0s0->blocks;
 318   /* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */
 319 #endif
 320 }
 321
 322 void
 323 resetNurseries( void )
 324 {
 325   bdescr *bd;
 326 #ifdef SMP
 327   Capability *cap;
 328
 329   /* All tasks must be stopped */
 330   ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes);
 331
 332   for (cap = free_capabilities; cap != NULL; cap = cap->link) {
 333     for (bd = cap->r.rNursery; bd; bd = bd->link) {
 334       bd->free = bd->start;
 335       ASSERT(bd->gen_no == 0);
 336       ASSERT(bd->step == g0s0);
 337       IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 338     }
 339     cap->r.rCurrentNursery = cap->r.rNursery;
 340   }
 341 #else
 342   for (bd = g0s0->blocks; bd; bd = bd->link) {
 343     bd->free = bd->start;
 344     ASSERT(bd->gen_no == 0);
 345     ASSERT(bd->step == g0s0);
 346     IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 347   }
 348   MainCapability.r.rNursery = g0s0->blocks;
 349   MainCapability.r.rCurrentNursery = g0s0->blocks;
 350 #endif
 351 }
 352
 353 bdescr *
 354 allocNursery (bdescr *tail, nat blocks)
 355 {
 356   bdescr *bd;
 357   nat i;
 358
 359   // Allocate a nursery: we allocate fresh blocks one at a time and
 360   // cons them on to the front of the list, not forgetting to update
 361   // the back pointer on the tail of the list to point to the new block.
 362   for (i=0; i < blocks; i++) {
 363     // @LDV profiling
 364     /*
 365       processNursery() in LdvProfile.c assumes that every block group in
 366       the nursery contains only a single block. So, if a block group is
 367       given multiple blocks, change processNursery() accordingly.
 368      */
 369     bd = allocBlock();
 370     bd->link = tail;
 371     // double-link the nursery: we might need to insert blocks
 372     if (tail != NULL) {
 373         tail->u.back = bd;
 374     }
 375     bd->step = g0s0;
 376     bd->gen_no = 0;
 377     bd->flags = 0;
 378     bd->free = bd->start;
 379     tail = bd;
 380   }
 381   tail->u.back = NULL;
 382   return tail;
 383 }
 384
 385 void
 386 resizeNursery ( nat blocks )
 387 {
 388   bdescr *bd;
 389   nat nursery_blocks;
 390
 391 #ifdef SMP
 392   barf("resizeNursery: can't resize in SMP mode");
 393 #endif
 394
 395   nursery_blocks = g0s0->n_blocks;
 396   if (nursery_blocks == blocks) {
 397     return;
 398   }
 399
 400   else if (nursery_blocks < blocks) {
 401     IF_DEBUG(gc, fprintf(stderr, "Increasing size of nursery to %d blocks\n",
 402                          blocks));
 403     g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
 404   }
 405
 406   else {
 407     bdescr *next_bd;
 408
 409     IF_DEBUG(gc, fprintf(stderr, "Decreasing size of nursery to %d blocks\n",
 410                          blocks));
 411
 412     bd = g0s0->blocks;
 413     while (nursery_blocks > blocks) {
 414         next_bd = bd->link;
 415         next_bd->u.back = NULL;
 416         nursery_blocks -= bd->blocks; // might be a large block
 417         freeGroup(bd);
 418         bd = next_bd;
 419     }
 420     g0s0->blocks = bd;
 421     // might have gone just under, by freeing a large block, so make
 422     // up the difference.
 423     if (nursery_blocks < blocks) {
 424         g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
 425     }
 426   }
 427
 428   g0s0->n_blocks = blocks;
 429   ASSERT(countBlocks(g0s0->blocks) == g0s0->n_blocks);
 430 }
 431
 432 /* -----------------------------------------------------------------------------
 433    The allocate() interface
 434
 435    allocate(n) always succeeds, and returns a chunk of memory n words
 436    long.  n can be larger than the size of a block if necessary, in
 437    which case a contiguous block group will be allocated.
 438    -------------------------------------------------------------------------- */
 439
 440 StgPtr
 441 allocate( nat n )
 442 {
 443   bdescr *bd;
 444   StgPtr p;
 445
 446   ACQUIRE_SM_LOCK;
 447
 448   TICK_ALLOC_HEAP_NOCTR(n);
 449   CCS_ALLOC(CCCS,n);
 450
 451   /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 452   /* ToDo: allocate directly into generation 1 */
 453   if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 454     nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 455     bd = allocGroup(req_blocks);
 456     dbl_link_onto(bd, &g0s0->large_objects);
 457     bd->gen_no  = 0;
 458     bd->step = g0s0;
 459     bd->flags = BF_LARGE;
 460     bd->free = bd->start;
 461     /* don't add these blocks to alloc_blocks, since we're assuming
 462      * that large objects are likely to remain live for quite a while
 463      * (eg. running threads), so garbage collecting early won't make
 464      * much difference.
 465      */
 466     alloc_blocks += req_blocks;
 467     RELEASE_SM_LOCK;
 468     return bd->start;
 469
 470   /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 471   } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
 472     if (small_alloc_list) {
 473       small_alloc_list->free = alloc_Hp;
 474     }
 475     bd = allocBlock();
 476     bd->link = small_alloc_list;
 477     small_alloc_list = bd;
 478     bd->gen_no = 0;
 479     bd->step = g0s0;
 480     bd->flags = 0;
 481     alloc_Hp = bd->start;
 482     alloc_HpLim = bd->start + BLOCK_SIZE_W;
 483     alloc_blocks++;
 484   }
 485
 486   p = alloc_Hp;
 487   alloc_Hp += n;
 488   RELEASE_SM_LOCK;
 489   return p;
 490 }
 491
 492 lnat
 493 allocated_bytes( void )
 494 {
 495   return (alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp));
 496 }
 497
 498 /* ---------------------------------------------------------------------------
 499    Allocate a fixed/pinned object.
 500
 501    We allocate small pinned objects into a single block, allocating a
 502    new block when the current one overflows.  The block is chained
 503    onto the large_object_list of generation 0 step 0.
 504
 505    NOTE: The GC can't in general handle pinned objects.  This
 506    interface is only safe to use for ByteArrays, which have no
 507    pointers and don't require scavenging.  It works because the
 508    block's descriptor has the BF_LARGE flag set, so the block is
 509    treated as a large object and chained onto various lists, rather
 510    than the individual objects being copied.  However, when it comes
 511    to scavenge the block, the GC will only scavenge the first object.
 512    The reason is that the GC can't linearly scan a block of pinned
 513    objects at the moment (doing so would require using the
 514    mostly-copying techniques).  But since we're restricting ourselves
 515    to pinned ByteArrays, not scavenging is ok.
 516
 517    This function is called by newPinnedByteArray# which immediately
 518    fills the allocated memory with a MutableByteArray#.
 519    ------------------------------------------------------------------------- */
 520
 521 StgPtr
 522 allocatePinned( nat n )
 523 {
 524     StgPtr p;
 525     bdescr *bd = pinned_object_block;
 526
 527     ACQUIRE_SM_LOCK;
 528
 529     TICK_ALLOC_HEAP_NOCTR(n);
 530     CCS_ALLOC(CCCS,n);
 531
 532     // If the request is for a large object, then allocate()
 533     // will give us a pinned object anyway.
 534     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 535         RELEASE_SM_LOCK;
 536         return allocate(n);
 537     }
 538
 539     // If we don't have a block of pinned objects yet, or the current
 540     // one isn't large enough to hold the new object, allocate a new one.
 541     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 542         pinned_object_block = bd = allocBlock();
 543         dbl_link_onto(bd, &g0s0->large_objects);
 544         bd->gen_no = 0;
 545         bd->step   = g0s0;
 546         bd->flags  = BF_LARGE;
 547         bd->free   = bd->start;
 548         alloc_blocks++;
 549     }
 550
 551     p = bd->free;
 552     bd->free += n;
 553     RELEASE_SM_LOCK;
 554     return p;
 555 }
 556
 557 /* -----------------------------------------------------------------------------
 558    Allocation functions for GMP.
 559
 560    These all use the allocate() interface - we can't have any garbage
 561    collection going on during a gmp operation, so we use allocate()
 562    which always succeeds.  The gmp operations which might need to
 563    allocate will ask the storage manager (via doYouWantToGC()) whether
 564    a garbage collection is required, in case we get into a loop doing
 565    only allocate() style allocation.
 566    -------------------------------------------------------------------------- */
 567
 568 static void *
 569 stgAllocForGMP (size_t size_in_bytes)
 570 {
 571   StgArrWords* arr;
 572   nat data_size_in_words, total_size_in_words;
 573
 574   /* should be a multiple of sizeof(StgWord) (whole no. of limbs) */
 575   ASSERT(size_in_bytes % sizeof(W_) == 0);
 576
 577   data_size_in_words  = size_in_bytes / sizeof(W_);
 578   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 579
 580   /* allocate and fill it in. */
 581   arr = (StgArrWords *)allocate(total_size_in_words);
 582   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 583
 584   /* and return a ptr to the goods inside the array */
 585   return(BYTE_ARR_CTS(arr));
 586 }
 587
 588 static void *
 589 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 590 {
 591     void *new_stuff_ptr = stgAllocForGMP(new_size);
 592     nat i = 0;
 593     char *p = (char *) ptr;
 594     char *q = (char *) new_stuff_ptr;
 595
 596     for (; i < old_size; i++, p++, q++) {
 597         *q = *p;
 598     }
 599
 600     return(new_stuff_ptr);
 601 }
 602
 603 static void
 604 stgDeallocForGMP (void *ptr STG_UNUSED,
 605                   size_t size STG_UNUSED)
 606 {
 607     /* easy for us: the garbage collector does the dealloc'n */
 608 }
 609
 610 /* -----------------------------------------------------------------------------
 611  * Stats and stuff
 612  * -------------------------------------------------------------------------- */
 613
 614 /* -----------------------------------------------------------------------------
 615  * calcAllocated()
 616  *
 617  * Approximate how much we've allocated: number of blocks in the
 618  * nursery + blocks allocated via allocate() - unused nusery blocks.
 619  * This leaves a little slop at the end of each block, and doesn't
 620  * take into account large objects (ToDo).
 621  * -------------------------------------------------------------------------- */
 622
 623 lnat
 624 calcAllocated( void )
 625 {
 626   nat allocated;
 627   bdescr *bd;
 628
 629 #ifdef SMP
 630   Capability *cap;
 631
 632   /* All tasks must be stopped.  Can't assert that all the
 633      capabilities are owned by the scheduler, though: one or more
 634      tasks might have been stopped while they were running (non-main)
 635      threads. */
 636   /*  ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); */
 637
 638   allocated =
 639     n_free_capabilities * RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W
 640     + allocated_bytes();
 641
 642   for (cap = free_capabilities; cap != NULL; cap = cap->link) {
 643     for ( bd = cap->r.rCurrentNursery->link; bd != NULL; bd = bd->link ) {
 644       allocated -= BLOCK_SIZE_W;
 645     }
 646     if (cap->r.rCurrentNursery->free < cap->r.rCurrentNursery->start
 647         + BLOCK_SIZE_W) {
 648       allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 649         - cap->r.rCurrentNursery->free;
 650     }
 651   }
 652
 653 #else /* !SMP */
 654   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 655
 656   allocated = (g0s0->n_blocks * BLOCK_SIZE_W) + allocated_bytes();
 657   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 658     allocated -= BLOCK_SIZE_W;
 659   }
 660   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 661     allocated -= (current_nursery->start + BLOCK_SIZE_W)
 662       - current_nursery->free;
 663   }
 664 #endif
 665
 666   total_allocated += allocated;
 667   return allocated;
 668 }
 669
 670 /* Approximate the amount of live data in the heap.  To be called just
 671  * after garbage collection (see GarbageCollect()).
 672  */
 673 extern lnat
 674 calcLive(void)
 675 {
 676   nat g, s;
 677   lnat live = 0;
 678   step *stp;
 679
 680   if (RtsFlags.GcFlags.generations == 1) {
 681     live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
 682       ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
 683     return live;
 684   }
 685
 686   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 687     for (s = 0; s < generations[g].n_steps; s++) {
 688       /* approximate amount of live data (doesn't take into account slop
 689        * at end of each block).
 690        */
 691       if (g == 0 && s == 0) {
 692           continue;
 693       }
 694       stp = &generations[g].steps[s];
 695       live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
 696       if (stp->hp_bd != NULL) {
 697           live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
 698               / sizeof(W_);
 699       }
 700     }
 701   }
 702   return live;
 703 }
 704
 705 /* Approximate the number of blocks that will be needed at the next
 706  * garbage collection.
 707  *
 708  * Assume: all data currently live will remain live.  Steps that will
 709  * be collected next time will therefore need twice as many blocks
 710  * since all the data will be copied.
 711  */
 712 extern lnat
 713 calcNeeded(void)
 714 {
 715     lnat needed = 0;
 716     nat g, s;
 717     step *stp;
 718
 719     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 720         for (s = 0; s < generations[g].n_steps; s++) {
 721             if (g == 0 && s == 0) { continue; }
 722             stp = &generations[g].steps[s];
 723             if (generations[g].steps[0].n_blocks +
 724                 generations[g].steps[0].n_large_blocks
 725                 > generations[g].max_blocks
 726                 && stp->is_compacted == 0) {
 727                 needed += 2 * stp->n_blocks;
 728             } else {
 729                 needed += stp->n_blocks;
 730             }
 731         }
 732     }
 733     return needed;
 734 }
 735
 736 /* -----------------------------------------------------------------------------
 737    Debugging
 738
 739    memInventory() checks for memory leaks by counting up all the
 740    blocks we know about and comparing that to the number of blocks
 741    allegedly floating around in the system.
 742    -------------------------------------------------------------------------- */
 743
 744 #ifdef DEBUG
 745
 746 void
 747 memInventory(void)
 748 {
 749   nat g, s;
 750   step *stp;
 751   bdescr *bd;
 752   lnat total_blocks = 0, free_blocks = 0;
 753
 754   /* count the blocks we current have */
 755
 756   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 757     for (s = 0; s < generations[g].n_steps; s++) {
 758       stp = &generations[g].steps[s];
 759       total_blocks += stp->n_blocks;
 760       if (RtsFlags.GcFlags.generations == 1) {
 761         /* two-space collector has a to-space too :-) */
 762         total_blocks += g0s0->n_to_blocks;
 763       }
 764       for (bd = stp->large_objects; bd; bd = bd->link) {
 765         total_blocks += bd->blocks;
 766         /* hack for megablock groups: they have an extra block or two in
 767            the second and subsequent megablocks where the block
 768            descriptors would normally go.
 769         */
 770         if (bd->blocks > BLOCKS_PER_MBLOCK) {
 771           total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
 772                           * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
 773         }
 774       }
 775     }
 776   }
 777
 778   /* any blocks held by allocate() */
 779   for (bd = small_alloc_list; bd; bd = bd->link) {
 780     total_blocks += bd->blocks;
 781   }
 782   for (bd = large_alloc_list; bd; bd = bd->link) {
 783     total_blocks += bd->blocks;
 784   }
 785
 786 #ifdef PROFILING
 787   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
 788     for (bd = firstStack; bd != NULL; bd = bd->link)
 789       total_blocks += bd->blocks;
 790   }
 791 #endif
 792
 793   // count the blocks allocated by the arena allocator
 794   total_blocks += arenaBlocks();
 795
 796   /* count the blocks on the free list */
 797   free_blocks = countFreeList();
 798
 799   if (total_blocks + free_blocks != mblocks_allocated *
 800       BLOCKS_PER_MBLOCK) {
 801     fprintf(stderr, "Blocks: %ld live + %ld free  = %ld total (%ld around)\n",
 802             total_blocks, free_blocks, total_blocks + free_blocks,
 803             mblocks_allocated * BLOCKS_PER_MBLOCK);
 804   }
 805
 806   ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
 807 }
 808
 809
 810 nat
 811 countBlocks(bdescr *bd)
 812 {
 813     nat n;
 814     for (n=0; bd != NULL; bd=bd->link) {
 815         n += bd->blocks;
 816     }
 817     return n;
 818 }
 819
 820 /* Full heap sanity check. */
 821 void
 822 checkSanity( void )
 823 {
 824     nat g, s;
 825
 826     if (RtsFlags.GcFlags.generations == 1) {
 827         checkHeap(g0s0->to_blocks);
 828         checkChain(g0s0->large_objects);
 829     } else {
 830
 831         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 832             for (s = 0; s < generations[g].n_steps; s++) {
 833                 ASSERT(countBlocks(generations[g].steps[s].blocks)
 834                        == generations[g].steps[s].n_blocks);
 835                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
 836                        == generations[g].steps[s].n_large_blocks);
 837                 if (g == 0 && s == 0) { continue; }
 838                 checkHeap(generations[g].steps[s].blocks);
 839                 checkChain(generations[g].steps[s].large_objects);
 840                 if (g > 0) {
 841                     checkMutableList(generations[g].mut_list, g);
 842                     checkMutOnceList(generations[g].mut_once_list, g);
 843                 }
 844             }
 845         }
 846         checkFreeListSanity();
 847     }
 848 }
 849
 850 // handy function for use in gdb, because Bdescr() is inlined.
 851 extern bdescr *_bdescr( StgPtr p );
 852
 853 bdescr *
 854 _bdescr( StgPtr p )
 855 {
 856     return Bdescr(p);
 857 }
 858
 859 #endif