ghc/rts/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  * $Id: Storage.c,v 1.70 2002/11/01 11:05:47 simonmar Exp $
   3  *
   4  * (c) The GHC Team, 1998-1999
   5  *
   6  * Storage manager front end
   7  *
   8  * ---------------------------------------------------------------------------*/
   9
  10 #include "PosixSource.h"
  11 #include "Rts.h"
  12 #include "RtsUtils.h"
  13 #include "RtsFlags.h"
  14 #include "Stats.h"
  15 #include "Hooks.h"
  16 #include "BlockAlloc.h"
  17 #include "MBlock.h"
  18 #include "Weak.h"
  19 #include "Sanity.h"
  20 #include "Arena.h"
  21
  22 #include "Storage.h"
  23 #include "Schedule.h"
  24 #include "OSThreads.h"
  25 #include "StoragePriv.h"
  26
  27 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  28
  29 #include <stdlib.h>
  30 #include <string.h>
  31
  32 #ifdef darwin_TARGET_OS
  33 #include <mach-o/getsect.h>
  34 unsigned long macho_etext = 0;
  35 unsigned long macho_edata = 0;
  36
  37 static void macosx_get_memory_layout(void)
  38 {
  39   struct segment_command *seg;
  40
  41   seg = getsegbyname("__TEXT");
  42   macho_etext = seg->vmaddr + seg->vmsize;
  43   seg = getsegbyname("__DATA");
  44   macho_edata = seg->vmaddr + seg->vmsize;
  45 }
  46 #endif
  47
  48 StgClosure    *caf_list         = NULL;
  49
  50 bdescr *small_alloc_list;       /* allocate()d small objects */
  51 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  52 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  53 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  54
  55 StgPtr alloc_Hp    = NULL;      /* next free byte in small_alloc_list */
  56 StgPtr alloc_HpLim = NULL;      /* end of block at small_alloc_list   */
  57
  58 generation *generations;        /* all the generations */
  59 generation *g0;                 /* generation 0, for convenience */
  60 generation *oldest_gen;         /* oldest generation, for convenience */
  61 step *g0s0;                     /* generation 0, step 0, for convenience */
  62
  63 lnat total_allocated = 0;       /* total memory allocated during run */
  64
  65 /*
  66  * Storage manager mutex:  protects all the above state from
  67  * simultaneous access by two STG threads.
  68  */
  69 #ifdef SMP
  70 Mutex sm_mutex = INIT_MUTEX_VAR;
  71 #endif
  72
  73 /*
  74  * Forward references
  75  */
  76 static void *stgAllocForGMP   (size_t size_in_bytes);
  77 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  78 static void  stgDeallocForGMP (void *ptr, size_t size);
  79
  80 void
  81 initStorage( void )
  82 {
  83   nat g, s;
  84   step *stp;
  85   generation *gen;
  86
  87 #if defined(darwin_TARGET_OS)
  88     macosx_get_memory_layout();
  89 #endif
  90
  91     /* Sanity check to make sure we are able to make the distinction
  92      * between closures and infotables
  93      */
  94   if (!LOOKS_LIKE_GHC_INFO(&stg_BLACKHOLE_info)) {
  95     barf("LOOKS_LIKE_GHC_INFO+ is incorrectly defined");
  96     exit(0);
  97   }
  98   if (LOOKS_LIKE_GHC_INFO(&stg_dummy_ret_closure)) {
  99     barf("LOOKS_LIKE_GHC_INFO- is incorrectly defined");
 100     exit(0);
 101   }
 102   if (LOOKS_LIKE_STATIC_CLOSURE(&stg_BLACKHOLE_info)) {
 103     barf("LOOKS_LIKE_STATIC_CLOSURE- is incorrectly defined");
 104     exit(0);
 105   }
 106   if (!LOOKS_LIKE_STATIC_CLOSURE(&stg_dummy_ret_closure)) {
 107     barf("LOOKS_LIKE_STATIC_CLOSURE+ is incorrectly defined");
 108     exit(0);
 109   }
 110
 111   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 112       RtsFlags.GcFlags.heapSizeSuggestion >
 113       RtsFlags.GcFlags.maxHeapSize) {
 114     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 115   }
 116
 117   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 118       RtsFlags.GcFlags.minAllocAreaSize >
 119       RtsFlags.GcFlags.maxHeapSize) {
 120       prog_belch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 121       exit(1);
 122   }
 123
 124   initBlockAllocator();
 125
 126 #if defined(SMP)
 127   initCondition(&sm_mutex);
 128 #endif
 129
 130   /* allocate generation info array */
 131   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 132                                              * sizeof(struct _generation),
 133                                              "initStorage: gens");
 134
 135   /* Initialise all generations */
 136   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 137     gen = &generations[g];
 138     gen->no = g;
 139     gen->mut_list = END_MUT_LIST;
 140     gen->mut_once_list = END_MUT_LIST;
 141     gen->collections = 0;
 142     gen->failed_promotions = 0;
 143     gen->max_blocks = 0;
 144   }
 145
 146   /* A couple of convenience pointers */
 147   g0 = &generations[0];
 148   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 149
 150   /* Allocate step structures in each generation */
 151   if (RtsFlags.GcFlags.generations > 1) {
 152     /* Only for multiple-generations */
 153
 154     /* Oldest generation: one step */
 155     oldest_gen->n_steps = 1;
 156     oldest_gen->steps =
 157       stgMallocBytes(1 * sizeof(struct _step), "initStorage: last step");
 158
 159     /* set up all except the oldest generation with 2 steps */
 160     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 161       generations[g].n_steps = RtsFlags.GcFlags.steps;
 162       generations[g].steps  =
 163         stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct _step),
 164                         "initStorage: steps");
 165     }
 166
 167   } else {
 168     /* single generation, i.e. a two-space collector */
 169     g0->n_steps = 1;
 170     g0->steps = stgMallocBytes (sizeof(struct _step), "initStorage: steps");
 171   }
 172
 173   /* Initialise all steps */
 174   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 175     for (s = 0; s < generations[g].n_steps; s++) {
 176       stp = &generations[g].steps[s];
 177       stp->no = s;
 178       stp->blocks = NULL;
 179       stp->n_blocks = 0;
 180       stp->gen = &generations[g];
 181       stp->gen_no = g;
 182       stp->hp = NULL;
 183       stp->hpLim = NULL;
 184       stp->hp_bd = NULL;
 185       stp->scan = NULL;
 186       stp->scan_bd = NULL;
 187       stp->large_objects = NULL;
 188       stp->n_large_blocks = 0;
 189       stp->new_large_objects = NULL;
 190       stp->scavenged_large_objects = NULL;
 191       stp->n_scavenged_large_blocks = 0;
 192       stp->is_compacted = 0;
 193       stp->bitmap = NULL;
 194     }
 195   }
 196
 197   /* Set up the destination pointers in each younger gen. step */
 198   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 199     for (s = 0; s < generations[g].n_steps-1; s++) {
 200       generations[g].steps[s].to = &generations[g].steps[s+1];
 201     }
 202     generations[g].steps[s].to = &generations[g+1].steps[0];
 203   }
 204
 205   /* The oldest generation has one step and it is compacted. */
 206   if (RtsFlags.GcFlags.compact) {
 207       if (RtsFlags.GcFlags.generations == 1) {
 208           belch("WARNING: compaction is incompatible with -G1; disabled");
 209       } else {
 210           oldest_gen->steps[0].is_compacted = 1;
 211       }
 212   }
 213   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 214
 215   /* generation 0 is special: that's the nursery */
 216   generations[0].max_blocks = 0;
 217
 218   /* G0S0: the allocation area.  Policy: keep the allocation area
 219    * small to begin with, even if we have a large suggested heap
 220    * size.  Reason: we're going to do a major collection first, and we
 221    * don't want it to be a big one.  This vague idea is borne out by
 222    * rigorous experimental evidence.
 223    */
 224   g0s0 = &generations[0].steps[0];
 225
 226   allocNurseries();
 227
 228   weak_ptr_list = NULL;
 229   caf_list = NULL;
 230
 231   /* initialise the allocate() interface */
 232   small_alloc_list = NULL;
 233   alloc_blocks = 0;
 234   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 235
 236   /* Tell GNU multi-precision pkg about our custom alloc functions */
 237   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 238
 239 #if defined(SMP)
 240   initMutex(&sm_mutex);
 241 #endif
 242
 243   IF_DEBUG(gc, statDescribeGens());
 244 }
 245
 246 void
 247 exitStorage (void)
 248 {
 249     stat_exit(calcAllocated());
 250 }
 251
 252 /* -----------------------------------------------------------------------------
 253    CAF management.
 254
 255    The entry code for every CAF does the following:
 256
 257       - builds a CAF_BLACKHOLE in the heap
 258       - pushes an update frame pointing to the CAF_BLACKHOLE
 259       - invokes UPD_CAF(), which:
 260           - calls newCaf, below
 261           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 262
 263    Why do we build a BLACKHOLE in the heap rather than just updating
 264    the thunk directly?  It's so that we only need one kind of update
 265    frame - otherwise we'd need a static version of the update frame too.
 266
 267    newCaf() does the following:
 268
 269       - it puts the CAF on the oldest generation's mut-once list.
 270         This is so that we can treat the CAF as a root when collecting
 271         younger generations.
 272
 273    For GHCI, we have additional requirements when dealing with CAFs:
 274
 275       - we must *retain* all dynamically-loaded CAFs ever entered,
 276         just in case we need them again.
 277       - we must be able to *revert* CAFs that have been evaluated, to
 278         their pre-evaluated form.
 279
 280       To do this, we use an additional CAF list.  When newCaf() is
 281       called on a dynamically-loaded CAF, we add it to the CAF list
 282       instead of the old-generation mutable list, and save away its
 283       old info pointer (in caf->saved_info) for later reversion.
 284
 285       To revert all the CAFs, we traverse the CAF list and reset the
 286       info pointer to caf->saved_info, then throw away the CAF list.
 287       (see GC.c:revertCAFs()).
 288
 289       -- SDM 29/1/01
 290
 291    -------------------------------------------------------------------------- */
 292
 293 void
 294 newCAF(StgClosure* caf)
 295 {
 296   /* Put this CAF on the mutable list for the old generation.
 297    * This is a HACK - the IND_STATIC closure doesn't really have
 298    * a mut_link field, but we pretend it has - in fact we re-use
 299    * the STATIC_LINK field for the time being, because when we
 300    * come to do a major GC we won't need the mut_link field
 301    * any more and can use it as a STATIC_LINK.
 302    */
 303   ACQUIRE_SM_LOCK;
 304
 305   if (is_dynamically_loaded_rwdata_ptr((StgPtr)caf)) {
 306       ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 307       ((StgIndStatic *)caf)->static_link = caf_list;
 308       caf_list = caf;
 309   } else {
 310       ((StgIndStatic *)caf)->saved_info = NULL;
 311       ((StgMutClosure *)caf)->mut_link = oldest_gen->mut_once_list;
 312       oldest_gen->mut_once_list = (StgMutClosure *)caf;
 313   }
 314
 315   RELEASE_SM_LOCK;
 316
 317 #ifdef PAR
 318   /* If we are PAR or DIST then  we never forget a CAF */
 319   { globalAddr *newGA;
 320     //belch("<##> Globalising CAF %08x %s",caf,info_type(caf));
 321     newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
 322     ASSERT(newGA);
 323   }
 324 #endif /* PAR */
 325 }
 326
 327 /* -----------------------------------------------------------------------------
 328    Nursery management.
 329    -------------------------------------------------------------------------- */
 330
 331 void
 332 allocNurseries( void )
 333 {
 334 #ifdef SMP
 335   {
 336     Capability *cap;
 337     bdescr *bd;
 338
 339     g0s0->blocks = NULL;
 340     g0s0->n_blocks = 0;
 341     for (cap = free_capabilities; cap != NULL; cap = cap->link) {
 342       cap->r.rNursery = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
 343       cap->r.rCurrentNursery = cap->r.rNursery;
 344       for (bd = cap->r.rNursery; bd != NULL; bd = bd->link) {
 345         bd->u.back = (bdescr *)cap;
 346       }
 347     }
 348     /* Set the back links to be equal to the Capability,
 349      * so we can do slightly better informed locking.
 350      */
 351   }
 352 #else /* SMP */
 353   g0s0->blocks      = allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
 354   g0s0->n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 355   g0s0->to_blocks   = NULL;
 356   g0s0->n_to_blocks = 0;
 357   MainCapability.r.rNursery        = g0s0->blocks;
 358   MainCapability.r.rCurrentNursery = g0s0->blocks;
 359   /* hp, hpLim, hp_bd, to_space etc. aren't used in G0S0 */
 360 #endif
 361 }
 362
 363 void
 364 resetNurseries( void )
 365 {
 366   bdescr *bd;
 367 #ifdef SMP
 368   Capability *cap;
 369
 370   /* All tasks must be stopped */
 371   ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes);
 372
 373   for (cap = free_capabilities; cap != NULL; cap = cap->link) {
 374     for (bd = cap->r.rNursery; bd; bd = bd->link) {
 375       bd->free = bd->start;
 376       ASSERT(bd->gen_no == 0);
 377       ASSERT(bd->step == g0s0);
 378       IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 379     }
 380     cap->r.rCurrentNursery = cap->r.rNursery;
 381   }
 382 #else
 383   for (bd = g0s0->blocks; bd; bd = bd->link) {
 384     bd->free = bd->start;
 385     ASSERT(bd->gen_no == 0);
 386     ASSERT(bd->step == g0s0);
 387     IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 388   }
 389   MainCapability.r.rNursery = g0s0->blocks;
 390   MainCapability.r.rCurrentNursery = g0s0->blocks;
 391 #endif
 392 }
 393
 394 bdescr *
 395 allocNursery (bdescr *tail, nat blocks)
 396 {
 397   bdescr *bd;
 398   nat i;
 399
 400   // Allocate a nursery: we allocate fresh blocks one at a time and
 401   // cons them on to the front of the list, not forgetting to update
 402   // the back pointer on the tail of the list to point to the new block.
 403   for (i=0; i < blocks; i++) {
 404     // @LDV profiling
 405     /*
 406       processNursery() in LdvProfile.c assumes that every block group in
 407       the nursery contains only a single block. So, if a block group is
 408       given multiple blocks, change processNursery() accordingly.
 409      */
 410     bd = allocBlock();
 411     bd->link = tail;
 412     // double-link the nursery: we might need to insert blocks
 413     if (tail != NULL) {
 414         tail->u.back = bd;
 415     }
 416     bd->step = g0s0;
 417     bd->gen_no = 0;
 418     bd->flags = 0;
 419     bd->free = bd->start;
 420     tail = bd;
 421   }
 422   tail->u.back = NULL;
 423   return tail;
 424 }
 425
 426 void
 427 resizeNursery ( nat blocks )
 428 {
 429   bdescr *bd;
 430   nat nursery_blocks;
 431
 432 #ifdef SMP
 433   barf("resizeNursery: can't resize in SMP mode");
 434 #endif
 435
 436   nursery_blocks = g0s0->n_blocks;
 437   if (nursery_blocks == blocks) {
 438     return;
 439   }
 440
 441   else if (nursery_blocks < blocks) {
 442     IF_DEBUG(gc, fprintf(stderr, "Increasing size of nursery to %d blocks\n",
 443                          blocks));
 444     g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
 445   }
 446
 447   else {
 448     bdescr *next_bd;
 449
 450     IF_DEBUG(gc, fprintf(stderr, "Decreasing size of nursery to %d blocks\n",
 451                          blocks));
 452
 453     bd = g0s0->blocks;
 454     while (nursery_blocks > blocks) {
 455         next_bd = bd->link;
 456         next_bd->u.back = NULL;
 457         nursery_blocks -= bd->blocks; // might be a large block
 458         freeGroup(bd);
 459         bd = next_bd;
 460     }
 461     g0s0->blocks = bd;
 462     // might have gone just under, by freeing a large block, so make
 463     // up the difference.
 464     if (nursery_blocks < blocks) {
 465         g0s0->blocks = allocNursery(g0s0->blocks, blocks-nursery_blocks);
 466     }
 467   }
 468
 469   g0s0->n_blocks = blocks;
 470   ASSERT(countBlocks(g0s0->blocks) == g0s0->n_blocks);
 471 }
 472
 473 /* -----------------------------------------------------------------------------
 474    The allocate() interface
 475
 476    allocate(n) always succeeds, and returns a chunk of memory n words
 477    long.  n can be larger than the size of a block if necessary, in
 478    which case a contiguous block group will be allocated.
 479    -------------------------------------------------------------------------- */
 480
 481 StgPtr
 482 allocate( nat n )
 483 {
 484   bdescr *bd;
 485   StgPtr p;
 486
 487   ACQUIRE_SM_LOCK;
 488
 489   TICK_ALLOC_HEAP_NOCTR(n);
 490   CCS_ALLOC(CCCS,n);
 491
 492   /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 493   /* ToDo: allocate directly into generation 1 */
 494   if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 495     nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 496     bd = allocGroup(req_blocks);
 497     dbl_link_onto(bd, &g0s0->large_objects);
 498     bd->gen_no  = 0;
 499     bd->step = g0s0;
 500     bd->flags = BF_LARGE;
 501     bd->free = bd->start;
 502     /* don't add these blocks to alloc_blocks, since we're assuming
 503      * that large objects are likely to remain live for quite a while
 504      * (eg. running threads), so garbage collecting early won't make
 505      * much difference.
 506      */
 507     alloc_blocks += req_blocks;
 508     RELEASE_SM_LOCK;
 509     return bd->start;
 510
 511   /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 512   } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
 513     if (small_alloc_list) {
 514       small_alloc_list->free = alloc_Hp;
 515     }
 516     bd = allocBlock();
 517     bd->link = small_alloc_list;
 518     small_alloc_list = bd;
 519     bd->gen_no = 0;
 520     bd->step = g0s0;
 521     bd->flags = 0;
 522     alloc_Hp = bd->start;
 523     alloc_HpLim = bd->start + BLOCK_SIZE_W;
 524     alloc_blocks++;
 525   }
 526
 527   p = alloc_Hp;
 528   alloc_Hp += n;
 529   RELEASE_SM_LOCK;
 530   return p;
 531 }
 532
 533 lnat
 534 allocated_bytes( void )
 535 {
 536     lnat allocated;
 537
 538     allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
 539     if (pinned_object_block != NULL) {
 540         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 541             pinned_object_block->free;
 542     }
 543
 544     return allocated;
 545 }
 546
 547 void
 548 tidyAllocateLists (void)
 549 {
 550     if (small_alloc_list != NULL) {
 551         ASSERT(alloc_Hp >= small_alloc_list->start &&
 552                alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
 553         small_alloc_list->free = alloc_Hp;
 554     }
 555 }
 556
 557 /* ---------------------------------------------------------------------------
 558    Allocate a fixed/pinned object.
 559
 560    We allocate small pinned objects into a single block, allocating a
 561    new block when the current one overflows.  The block is chained
 562    onto the large_object_list of generation 0 step 0.
 563
 564    NOTE: The GC can't in general handle pinned objects.  This
 565    interface is only safe to use for ByteArrays, which have no
 566    pointers and don't require scavenging.  It works because the
 567    block's descriptor has the BF_LARGE flag set, so the block is
 568    treated as a large object and chained onto various lists, rather
 569    than the individual objects being copied.  However, when it comes
 570    to scavenge the block, the GC will only scavenge the first object.
 571    The reason is that the GC can't linearly scan a block of pinned
 572    objects at the moment (doing so would require using the
 573    mostly-copying techniques).  But since we're restricting ourselves
 574    to pinned ByteArrays, not scavenging is ok.
 575
 576    This function is called by newPinnedByteArray# which immediately
 577    fills the allocated memory with a MutableByteArray#.
 578    ------------------------------------------------------------------------- */
 579
 580 StgPtr
 581 allocatePinned( nat n )
 582 {
 583     StgPtr p;
 584     bdescr *bd = pinned_object_block;
 585
 586     ACQUIRE_SM_LOCK;
 587
 588     TICK_ALLOC_HEAP_NOCTR(n);
 589     CCS_ALLOC(CCCS,n);
 590
 591     // If the request is for a large object, then allocate()
 592     // will give us a pinned object anyway.
 593     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 594         RELEASE_SM_LOCK;
 595         return allocate(n);
 596     }
 597
 598     // we always return 8-byte aligned memory.  bd->free must be
 599     // 8-byte aligned to begin with, so we just round up n to
 600     // the nearest multiple of 8 bytes.
 601     if (sizeof(StgWord) == 4) {
 602         n = (n+1) & ~1;
 603     }
 604
 605     // If we don't have a block of pinned objects yet, or the current
 606     // one isn't large enough to hold the new object, allocate a new one.
 607     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 608         pinned_object_block = bd = allocBlock();
 609         dbl_link_onto(bd, &g0s0->large_objects);
 610         bd->gen_no = 0;
 611         bd->step   = g0s0;
 612         bd->flags  = BF_LARGE;
 613         bd->free   = bd->start;
 614         alloc_blocks++;
 615     }
 616
 617     p = bd->free;
 618     bd->free += n;
 619     RELEASE_SM_LOCK;
 620     return p;
 621 }
 622
 623 /* -----------------------------------------------------------------------------
 624    Allocation functions for GMP.
 625
 626    These all use the allocate() interface - we can't have any garbage
 627    collection going on during a gmp operation, so we use allocate()
 628    which always succeeds.  The gmp operations which might need to
 629    allocate will ask the storage manager (via doYouWantToGC()) whether
 630    a garbage collection is required, in case we get into a loop doing
 631    only allocate() style allocation.
 632    -------------------------------------------------------------------------- */
 633
 634 static void *
 635 stgAllocForGMP (size_t size_in_bytes)
 636 {
 637   StgArrWords* arr;
 638   nat data_size_in_words, total_size_in_words;
 639
 640   /* round up to a whole number of words */
 641   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 642   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 643
 644   /* allocate and fill it in. */
 645   arr = (StgArrWords *)allocate(total_size_in_words);
 646   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 647
 648   /* and return a ptr to the goods inside the array */
 649   return(BYTE_ARR_CTS(arr));
 650 }
 651
 652 static void *
 653 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 654 {
 655     void *new_stuff_ptr = stgAllocForGMP(new_size);
 656     nat i = 0;
 657     char *p = (char *) ptr;
 658     char *q = (char *) new_stuff_ptr;
 659
 660     for (; i < old_size; i++, p++, q++) {
 661         *q = *p;
 662     }
 663
 664     return(new_stuff_ptr);
 665 }
 666
 667 static void
 668 stgDeallocForGMP (void *ptr STG_UNUSED,
 669                   size_t size STG_UNUSED)
 670 {
 671     /* easy for us: the garbage collector does the dealloc'n */
 672 }
 673
 674 /* -----------------------------------------------------------------------------
 675  * Stats and stuff
 676  * -------------------------------------------------------------------------- */
 677
 678 /* -----------------------------------------------------------------------------
 679  * calcAllocated()
 680  *
 681  * Approximate how much we've allocated: number of blocks in the
 682  * nursery + blocks allocated via allocate() - unused nusery blocks.
 683  * This leaves a little slop at the end of each block, and doesn't
 684  * take into account large objects (ToDo).
 685  * -------------------------------------------------------------------------- */
 686
 687 lnat
 688 calcAllocated( void )
 689 {
 690   nat allocated;
 691   bdescr *bd;
 692
 693 #ifdef SMP
 694   Capability *cap;
 695
 696   /* All tasks must be stopped.  Can't assert that all the
 697      capabilities are owned by the scheduler, though: one or more
 698      tasks might have been stopped while they were running (non-main)
 699      threads. */
 700   /*  ASSERT(n_free_capabilities == RtsFlags.ParFlags.nNodes); */
 701
 702   allocated =
 703     n_free_capabilities * RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W
 704     + allocated_bytes();
 705
 706   for (cap = free_capabilities; cap != NULL; cap = cap->link) {
 707     for ( bd = cap->r.rCurrentNursery->link; bd != NULL; bd = bd->link ) {
 708       allocated -= BLOCK_SIZE_W;
 709     }
 710     if (cap->r.rCurrentNursery->free < cap->r.rCurrentNursery->start
 711         + BLOCK_SIZE_W) {
 712       allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 713         - cap->r.rCurrentNursery->free;
 714     }
 715   }
 716
 717 #else /* !SMP */
 718   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 719
 720   allocated = (g0s0->n_blocks * BLOCK_SIZE_W) + allocated_bytes();
 721   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 722     allocated -= BLOCK_SIZE_W;
 723   }
 724   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 725     allocated -= (current_nursery->start + BLOCK_SIZE_W)
 726       - current_nursery->free;
 727   }
 728 #endif
 729
 730   total_allocated += allocated;
 731   return allocated;
 732 }
 733
 734 /* Approximate the amount of live data in the heap.  To be called just
 735  * after garbage collection (see GarbageCollect()).
 736  */
 737 extern lnat
 738 calcLive(void)
 739 {
 740   nat g, s;
 741   lnat live = 0;
 742   step *stp;
 743
 744   if (RtsFlags.GcFlags.generations == 1) {
 745     live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
 746       ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
 747     return live;
 748   }
 749
 750   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 751     for (s = 0; s < generations[g].n_steps; s++) {
 752       /* approximate amount of live data (doesn't take into account slop
 753        * at end of each block).
 754        */
 755       if (g == 0 && s == 0) {
 756           continue;
 757       }
 758       stp = &generations[g].steps[s];
 759       live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
 760       if (stp->hp_bd != NULL) {
 761           live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
 762               / sizeof(W_);
 763       }
 764     }
 765   }
 766   return live;
 767 }
 768
 769 /* Approximate the number of blocks that will be needed at the next
 770  * garbage collection.
 771  *
 772  * Assume: all data currently live will remain live.  Steps that will
 773  * be collected next time will therefore need twice as many blocks
 774  * since all the data will be copied.
 775  */
 776 extern lnat
 777 calcNeeded(void)
 778 {
 779     lnat needed = 0;
 780     nat g, s;
 781     step *stp;
 782
 783     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 784         for (s = 0; s < generations[g].n_steps; s++) {
 785             if (g == 0 && s == 0) { continue; }
 786             stp = &generations[g].steps[s];
 787             if (generations[g].steps[0].n_blocks +
 788                 generations[g].steps[0].n_large_blocks
 789                 > generations[g].max_blocks
 790                 && stp->is_compacted == 0) {
 791                 needed += 2 * stp->n_blocks;
 792             } else {
 793                 needed += stp->n_blocks;
 794             }
 795         }
 796     }
 797     return needed;
 798 }
 799
 800 /* -----------------------------------------------------------------------------
 801    Debugging
 802
 803    memInventory() checks for memory leaks by counting up all the
 804    blocks we know about and comparing that to the number of blocks
 805    allegedly floating around in the system.
 806    -------------------------------------------------------------------------- */
 807
 808 #ifdef DEBUG
 809
 810 void
 811 memInventory(void)
 812 {
 813   nat g, s;
 814   step *stp;
 815   bdescr *bd;
 816   lnat total_blocks = 0, free_blocks = 0;
 817
 818   /* count the blocks we current have */
 819
 820   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 821     for (s = 0; s < generations[g].n_steps; s++) {
 822       stp = &generations[g].steps[s];
 823       total_blocks += stp->n_blocks;
 824       if (RtsFlags.GcFlags.generations == 1) {
 825         /* two-space collector has a to-space too :-) */
 826         total_blocks += g0s0->n_to_blocks;
 827       }
 828       for (bd = stp->large_objects; bd; bd = bd->link) {
 829         total_blocks += bd->blocks;
 830         /* hack for megablock groups: they have an extra block or two in
 831            the second and subsequent megablocks where the block
 832            descriptors would normally go.
 833         */
 834         if (bd->blocks > BLOCKS_PER_MBLOCK) {
 835           total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
 836                           * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
 837         }
 838       }
 839     }
 840   }
 841
 842   /* any blocks held by allocate() */
 843   for (bd = small_alloc_list; bd; bd = bd->link) {
 844     total_blocks += bd->blocks;
 845   }
 846
 847 #ifdef PROFILING
 848   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
 849     for (bd = firstStack; bd != NULL; bd = bd->link)
 850       total_blocks += bd->blocks;
 851   }
 852 #endif
 853
 854   // count the blocks allocated by the arena allocator
 855   total_blocks += arenaBlocks();
 856
 857   /* count the blocks on the free list */
 858   free_blocks = countFreeList();
 859
 860   if (total_blocks + free_blocks != mblocks_allocated *
 861       BLOCKS_PER_MBLOCK) {
 862     fprintf(stderr, "Blocks: %ld live + %ld free  = %ld total (%ld around)\n",
 863             total_blocks, free_blocks, total_blocks + free_blocks,
 864             mblocks_allocated * BLOCKS_PER_MBLOCK);
 865   }
 866
 867   ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
 868 }
 869
 870
 871 nat
 872 countBlocks(bdescr *bd)
 873 {
 874     nat n;
 875     for (n=0; bd != NULL; bd=bd->link) {
 876         n += bd->blocks;
 877     }
 878     return n;
 879 }
 880
 881 /* Full heap sanity check. */
 882 void
 883 checkSanity( void )
 884 {
 885     nat g, s;
 886
 887     if (RtsFlags.GcFlags.generations == 1) {
 888         checkHeap(g0s0->to_blocks);
 889         checkChain(g0s0->large_objects);
 890     } else {
 891
 892         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 893             for (s = 0; s < generations[g].n_steps; s++) {
 894                 ASSERT(countBlocks(generations[g].steps[s].blocks)
 895                        == generations[g].steps[s].n_blocks);
 896                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
 897                        == generations[g].steps[s].n_large_blocks);
 898                 if (g == 0 && s == 0) { continue; }
 899                 checkHeap(generations[g].steps[s].blocks);
 900                 checkChain(generations[g].steps[s].large_objects);
 901                 if (g > 0) {
 902                     checkMutableList(generations[g].mut_list, g);
 903                     checkMutOnceList(generations[g].mut_once_list, g);
 904                 }
 905             }
 906         }
 907         checkFreeListSanity();
 908     }
 909 }
 910
 911 // handy function for use in gdb, because Bdescr() is inlined.
 912 extern bdescr *_bdescr( StgPtr p );
 913
 914 bdescr *
 915 _bdescr( StgPtr p )
 916 {
 917     return Bdescr(p);
 918 }
 919
 920 #endif