ghc/rts/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2004
   4  *
   5  * Storage manager front end
   6  *
   7  * ---------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #include "Rts.h"
  11 #include "RtsUtils.h"
  12 #include "RtsFlags.h"
  13 #include "Stats.h"
  14 #include "Hooks.h"
  15 #include "BlockAlloc.h"
  16 #include "MBlock.h"
  17 #include "Weak.h"
  18 #include "Sanity.h"
  19 #include "Arena.h"
  20 #include "OSThreads.h"
  21 #include "Capability.h"
  22 #include "Storage.h"
  23 #include "Schedule.h"
  24 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  25
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 /*
  30  * All these globals require sm_mutex to access in SMP mode.
  31  */
  32 StgClosure    *caf_list         = NULL;
  33 StgClosure    *revertible_caf_list = NULL;
  34 rtsBool       keepCAFs;
  35
  36 bdescr *small_alloc_list;       /* allocate()d small objects */
  37 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  38 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  39 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  40
  41 StgPtr alloc_Hp    = NULL;      /* next free byte in small_alloc_list */
  42 StgPtr alloc_HpLim = NULL;      /* end of block at small_alloc_list   */
  43
  44 generation *generations = NULL; /* all the generations */
  45 generation *g0          = NULL; /* generation 0, for convenience */
  46 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  47 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  48
  49 ullong total_allocated = 0;     /* total memory allocated during run */
  50
  51 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  52 step *nurseries         = NULL; /* array of nurseries, >1 only if SMP */
  53
  54 /*
  55  * Storage manager mutex:  protects all the above state from
  56  * simultaneous access by two STG threads.
  57  */
  58 #ifdef SMP
  59 Mutex sm_mutex = INIT_MUTEX_VAR;
  60 #endif
  61
  62 /*
  63  * Forward references
  64  */
  65 static void *stgAllocForGMP   (size_t size_in_bytes);
  66 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  67 static void  stgDeallocForGMP (void *ptr, size_t size);
  68
  69 static void
  70 initStep (step *stp, int g, int s)
  71 {
  72     stp->no = s;
  73     stp->blocks = NULL;
  74     stp->n_to_blocks = 0;
  75     stp->n_blocks = 0;
  76     stp->gen = &generations[g];
  77     stp->gen_no = g;
  78     stp->hp = NULL;
  79     stp->hpLim = NULL;
  80     stp->hp_bd = NULL;
  81     stp->scan = NULL;
  82     stp->scan_bd = NULL;
  83     stp->large_objects = NULL;
  84     stp->n_large_blocks = 0;
  85     stp->new_large_objects = NULL;
  86     stp->scavenged_large_objects = NULL;
  87     stp->n_scavenged_large_blocks = 0;
  88     stp->is_compacted = 0;
  89     stp->bitmap = NULL;
  90 }
  91
  92 void
  93 initStorage( void )
  94 {
  95   nat g, s;
  96   generation *gen;
  97
  98   if (generations != NULL) {
  99       // multi-init protection
 100       return;
 101   }
 102
 103   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 104    * doing something reasonable.
 105    */
 106   ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info));
 107   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 108   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 109
 110   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 111       RtsFlags.GcFlags.heapSizeSuggestion >
 112       RtsFlags.GcFlags.maxHeapSize) {
 113     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 114   }
 115
 116   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 117       RtsFlags.GcFlags.minAllocAreaSize >
 118       RtsFlags.GcFlags.maxHeapSize) {
 119       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 120       exit(1);
 121   }
 122
 123   initBlockAllocator();
 124
 125 #if defined(SMP)
 126   initMutex(&sm_mutex);
 127 #endif
 128
 129   /* allocate generation info array */
 130   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 131                                              * sizeof(struct generation_),
 132                                              "initStorage: gens");
 133
 134   /* Initialise all generations */
 135   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 136     gen = &generations[g];
 137     gen->no = g;
 138     gen->mut_list = allocBlock();
 139     gen->collections = 0;
 140     gen->failed_promotions = 0;
 141     gen->max_blocks = 0;
 142   }
 143
 144   /* A couple of convenience pointers */
 145   g0 = &generations[0];
 146   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 147
 148   /* Allocate step structures in each generation */
 149   if (RtsFlags.GcFlags.generations > 1) {
 150     /* Only for multiple-generations */
 151
 152     /* Oldest generation: one step */
 153     oldest_gen->n_steps = 1;
 154     oldest_gen->steps =
 155       stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step");
 156
 157     /* set up all except the oldest generation with 2 steps */
 158     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 159       generations[g].n_steps = RtsFlags.GcFlags.steps;
 160       generations[g].steps  =
 161         stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_),
 162                         "initStorage: steps");
 163     }
 164
 165   } else {
 166     /* single generation, i.e. a two-space collector */
 167     g0->n_steps = 1;
 168     g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
 169   }
 170
 171 #ifdef SMP
 172   n_nurseries = RtsFlags.ParFlags.nNodes;
 173   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 174                               "initStorage: nurseries");
 175 #else
 176   n_nurseries = 1;
 177   nurseries = g0->steps; // just share nurseries[0] with g0s0
 178 #endif
 179
 180   /* Initialise all steps */
 181   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 182     for (s = 0; s < generations[g].n_steps; s++) {
 183         initStep(&generations[g].steps[s], g, s);
 184     }
 185   }
 186
 187 #ifdef SMP
 188   for (s = 0; s < n_nurseries; s++) {
 189       initStep(&nurseries[s], 0, s);
 190   }
 191 #endif
 192
 193   /* Set up the destination pointers in each younger gen. step */
 194   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 195     for (s = 0; s < generations[g].n_steps-1; s++) {
 196       generations[g].steps[s].to = &generations[g].steps[s+1];
 197     }
 198     generations[g].steps[s].to = &generations[g+1].steps[0];
 199   }
 200   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 201
 202 #ifdef SMP
 203   for (s = 0; s < n_nurseries; s++) {
 204       nurseries[s].to = generations[0].steps[0].to;
 205   }
 206 #endif
 207
 208   /* The oldest generation has one step. */
 209   if (RtsFlags.GcFlags.compact) {
 210       if (RtsFlags.GcFlags.generations == 1) {
 211           errorBelch("WARNING: compaction is incompatible with -G1; disabled");
 212       } else {
 213           oldest_gen->steps[0].is_compacted = 1;
 214       }
 215   }
 216
 217 #ifdef SMP
 218   if (RtsFlags.GcFlags.generations == 1) {
 219       errorBelch("-G1 is incompatible with SMP");
 220       stg_exit(1);
 221   }
 222 #endif
 223
 224   /* generation 0 is special: that's the nursery */
 225   generations[0].max_blocks = 0;
 226
 227   /* G0S0: the allocation area.  Policy: keep the allocation area
 228    * small to begin with, even if we have a large suggested heap
 229    * size.  Reason: we're going to do a major collection first, and we
 230    * don't want it to be a big one.  This vague idea is borne out by
 231    * rigorous experimental evidence.
 232    */
 233   g0s0 = &generations[0].steps[0];
 234
 235   allocNurseries();
 236
 237   weak_ptr_list = NULL;
 238   caf_list = NULL;
 239   revertible_caf_list = NULL;
 240
 241   /* initialise the allocate() interface */
 242   small_alloc_list = NULL;
 243   alloc_blocks = 0;
 244   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 245
 246   /* Tell GNU multi-precision pkg about our custom alloc functions */
 247   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 248
 249   IF_DEBUG(gc, statDescribeGens());
 250 }
 251
 252 void
 253 exitStorage (void)
 254 {
 255     stat_exit(calcAllocated());
 256 }
 257
 258 /* -----------------------------------------------------------------------------
 259    CAF management.
 260
 261    The entry code for every CAF does the following:
 262
 263       - builds a CAF_BLACKHOLE in the heap
 264       - pushes an update frame pointing to the CAF_BLACKHOLE
 265       - invokes UPD_CAF(), which:
 266           - calls newCaf, below
 267           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 268
 269    Why do we build a BLACKHOLE in the heap rather than just updating
 270    the thunk directly?  It's so that we only need one kind of update
 271    frame - otherwise we'd need a static version of the update frame too.
 272
 273    newCaf() does the following:
 274
 275       - it puts the CAF on the oldest generation's mut-once list.
 276         This is so that we can treat the CAF as a root when collecting
 277         younger generations.
 278
 279    For GHCI, we have additional requirements when dealing with CAFs:
 280
 281       - we must *retain* all dynamically-loaded CAFs ever entered,
 282         just in case we need them again.
 283       - we must be able to *revert* CAFs that have been evaluated, to
 284         their pre-evaluated form.
 285
 286       To do this, we use an additional CAF list.  When newCaf() is
 287       called on a dynamically-loaded CAF, we add it to the CAF list
 288       instead of the old-generation mutable list, and save away its
 289       old info pointer (in caf->saved_info) for later reversion.
 290
 291       To revert all the CAFs, we traverse the CAF list and reset the
 292       info pointer to caf->saved_info, then throw away the CAF list.
 293       (see GC.c:revertCAFs()).
 294
 295       -- SDM 29/1/01
 296
 297    -------------------------------------------------------------------------- */
 298
 299 void
 300 newCAF(StgClosure* caf)
 301 {
 302   ACQUIRE_SM_LOCK;
 303
 304   if(keepCAFs)
 305   {
 306     // HACK:
 307     // If we are in GHCi _and_ we are using dynamic libraries,
 308     // then we can't redirect newCAF calls to newDynCAF (see below),
 309     // so we make newCAF behave almost like newDynCAF.
 310     // The dynamic libraries might be used by both the interpreted
 311     // program and GHCi itself, so they must not be reverted.
 312     // This also means that in GHCi with dynamic libraries, CAFs are not
 313     // garbage collected. If this turns out to be a problem, we could
 314     // do another hack here and do an address range test on caf to figure
 315     // out whether it is from a dynamic library.
 316     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 317     ((StgIndStatic *)caf)->static_link = caf_list;
 318     caf_list = caf;
 319   }
 320   else
 321   {
 322     /* Put this CAF on the mutable list for the old generation.
 323     * This is a HACK - the IND_STATIC closure doesn't really have
 324     * a mut_link field, but we pretend it has - in fact we re-use
 325     * the STATIC_LINK field for the time being, because when we
 326     * come to do a major GC we won't need the mut_link field
 327     * any more and can use it as a STATIC_LINK.
 328     */
 329     ((StgIndStatic *)caf)->saved_info = NULL;
 330     recordMutableGen(caf, oldest_gen);
 331   }
 332
 333   RELEASE_SM_LOCK;
 334
 335 #ifdef PAR
 336   /* If we are PAR or DIST then  we never forget a CAF */
 337   { globalAddr *newGA;
 338     //debugBelch("<##> Globalising CAF %08x %s",caf,info_type(caf));
 339     newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
 340     ASSERT(newGA);
 341   }
 342 #endif /* PAR */
 343 }
 344
 345 // An alternate version of newCaf which is used for dynamically loaded
 346 // object code in GHCi.  In this case we want to retain *all* CAFs in
 347 // the object code, because they might be demanded at any time from an
 348 // expression evaluated on the command line.
 349 // Also, GHCi might want to revert CAFs, so we add these to the
 350 // revertible_caf_list.
 351 //
 352 // The linker hackily arranges that references to newCaf from dynamic
 353 // code end up pointing to newDynCAF.
 354 void
 355 newDynCAF(StgClosure *caf)
 356 {
 357     ACQUIRE_SM_LOCK;
 358
 359     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 360     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 361     revertible_caf_list = caf;
 362
 363     RELEASE_SM_LOCK;
 364 }
 365
 366 /* -----------------------------------------------------------------------------
 367    Nursery management.
 368    -------------------------------------------------------------------------- */
 369
 370 static bdescr *
 371 allocNursery (step *stp, bdescr *tail, nat blocks)
 372 {
 373     bdescr *bd;
 374     nat i;
 375
 376     // Allocate a nursery: we allocate fresh blocks one at a time and
 377     // cons them on to the front of the list, not forgetting to update
 378     // the back pointer on the tail of the list to point to the new block.
 379     for (i=0; i < blocks; i++) {
 380         // @LDV profiling
 381         /*
 382           processNursery() in LdvProfile.c assumes that every block group in
 383           the nursery contains only a single block. So, if a block group is
 384           given multiple blocks, change processNursery() accordingly.
 385         */
 386         bd = allocBlock();
 387         bd->link = tail;
 388         // double-link the nursery: we might need to insert blocks
 389         if (tail != NULL) {
 390             tail->u.back = bd;
 391         }
 392         bd->step = stp;
 393         bd->gen_no = 0;
 394         bd->flags = 0;
 395         bd->free = bd->start;
 396         tail = bd;
 397     }
 398     tail->u.back = NULL;
 399     return tail;
 400 }
 401
 402 static void
 403 assignNurseriesToCapabilities (void)
 404 {
 405 #ifdef SMP
 406     nat i;
 407
 408     for (i = 0; i < n_nurseries; i++) {
 409         capabilities[i].r.rNursery        = &nurseries[i];
 410         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 411         capabilities[i].r.rCurrentAlloc   = NULL;
 412     }
 413 #else /* SMP */
 414     MainCapability.r.rNursery        = &nurseries[0];
 415     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 416     MainCapability.r.rCurrentAlloc   = NULL;
 417 #endif
 418 }
 419
 420 void
 421 allocNurseries( void )
 422 {
 423     nat i;
 424
 425     for (i = 0; i < n_nurseries; i++) {
 426         nurseries[i].blocks =
 427             allocNursery(&nurseries[i], NULL,
 428                          RtsFlags.GcFlags.minAllocAreaSize);
 429         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 430         nurseries[i].to_blocks   = NULL;
 431         nurseries[i].n_to_blocks = 0;
 432         /* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
 433     }
 434     assignNurseriesToCapabilities();
 435 }
 436
 437 void
 438 resetNurseries( void )
 439 {
 440     nat i;
 441     bdescr *bd;
 442     step *stp;
 443
 444     for (i = 0; i < n_nurseries; i++) {
 445         stp = &nurseries[i];
 446         for (bd = stp->blocks; bd; bd = bd->link) {
 447             bd->free = bd->start;
 448             ASSERT(bd->gen_no == 0);
 449             ASSERT(bd->step == stp);
 450             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 451         }
 452     }
 453     assignNurseriesToCapabilities();
 454 }
 455
 456 lnat
 457 countNurseryBlocks (void)
 458 {
 459     nat i;
 460     lnat blocks = 0;
 461
 462     for (i = 0; i < n_nurseries; i++) {
 463         blocks += nurseries[i].n_blocks;
 464     }
 465     return blocks;
 466 }
 467
 468 static void
 469 resizeNursery ( step *stp, nat blocks )
 470 {
 471   bdescr *bd;
 472   nat nursery_blocks;
 473
 474   nursery_blocks = stp->n_blocks;
 475   if (nursery_blocks == blocks) return;
 476
 477   if (nursery_blocks < blocks) {
 478     IF_DEBUG(gc, debugBelch("Increasing size of nursery to %d blocks\n",
 479                          blocks));
 480     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 481   }
 482   else {
 483     bdescr *next_bd;
 484
 485     IF_DEBUG(gc, debugBelch("Decreasing size of nursery to %d blocks\n",
 486                          blocks));
 487
 488     bd = stp->blocks;
 489     while (nursery_blocks > blocks) {
 490         next_bd = bd->link;
 491         next_bd->u.back = NULL;
 492         nursery_blocks -= bd->blocks; // might be a large block
 493         freeGroup(bd);
 494         bd = next_bd;
 495     }
 496     stp->blocks = bd;
 497     // might have gone just under, by freeing a large block, so make
 498     // up the difference.
 499     if (nursery_blocks < blocks) {
 500         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 501     }
 502   }
 503
 504   stp->n_blocks = blocks;
 505   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 506 }
 507
 508 //
 509 // Resize each of the nurseries to the specified size.
 510 //
 511 void
 512 resizeNurseriesFixed (nat blocks)
 513 {
 514     nat i;
 515     for (i = 0; i < n_nurseries; i++) {
 516         resizeNursery(&nurseries[i], blocks);
 517     }
 518 }
 519
 520 //
 521 // Resize the nurseries to the total specified size.
 522 //
 523 void
 524 resizeNurseries (nat blocks)
 525 {
 526     // If there are multiple nurseries, then we just divide the number
 527     // of available blocks between them.
 528     resizeNurseriesFixed(blocks / n_nurseries);
 529 }
 530
 531 /* -----------------------------------------------------------------------------
 532    The allocate() interface
 533
 534    allocate(n) always succeeds, and returns a chunk of memory n words
 535    long.  n can be larger than the size of a block if necessary, in
 536    which case a contiguous block group will be allocated.
 537    -------------------------------------------------------------------------- */
 538
 539 StgPtr
 540 allocate( nat n )
 541 {
 542     bdescr *bd;
 543     StgPtr p;
 544
 545     ACQUIRE_SM_LOCK;
 546
 547     TICK_ALLOC_HEAP_NOCTR(n);
 548     CCS_ALLOC(CCCS,n);
 549
 550     /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 551     /* ToDo: allocate directly into generation 1 */
 552     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 553         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 554         bd = allocGroup(req_blocks);
 555         dbl_link_onto(bd, &g0s0->large_objects);
 556         g0s0->n_large_blocks += req_blocks;
 557         bd->gen_no  = 0;
 558         bd->step = g0s0;
 559         bd->flags = BF_LARGE;
 560         bd->free = bd->start + n;
 561         alloc_blocks += req_blocks;
 562         RELEASE_SM_LOCK;
 563         return bd->start;
 564
 565         /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 566     } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
 567         if (small_alloc_list) {
 568             small_alloc_list->free = alloc_Hp;
 569         }
 570         bd = allocBlock();
 571         bd->link = small_alloc_list;
 572         small_alloc_list = bd;
 573         bd->gen_no = 0;
 574         bd->step = g0s0;
 575         bd->flags = 0;
 576         alloc_Hp = bd->start;
 577         alloc_HpLim = bd->start + BLOCK_SIZE_W;
 578         alloc_blocks++;
 579     }
 580
 581     p = alloc_Hp;
 582     alloc_Hp += n;
 583     RELEASE_SM_LOCK;
 584     return p;
 585 }
 586
 587 lnat
 588 allocated_bytes( void )
 589 {
 590     lnat allocated;
 591
 592     allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
 593     if (pinned_object_block != NULL) {
 594         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 595             pinned_object_block->free;
 596     }
 597
 598     return allocated;
 599 }
 600
 601 void
 602 tidyAllocateLists (void)
 603 {
 604     if (small_alloc_list != NULL) {
 605         ASSERT(alloc_Hp >= small_alloc_list->start &&
 606                alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
 607         small_alloc_list->free = alloc_Hp;
 608     }
 609 }
 610
 611 /* -----------------------------------------------------------------------------
 612    allocateLocal()
 613
 614    This allocates memory in the current thread - it is intended for
 615    use primarily from STG-land where we have a Capability.  It is
 616    better than allocate() because it doesn't require taking the
 617    sm_mutex lock in the common case.
 618
 619    Memory is allocated directly from the nursery if possible (but not
 620    from the current nursery block, so as not to interfere with
 621    Hp/HpLim).
 622    -------------------------------------------------------------------------- */
 623
 624 StgPtr
 625 allocateLocal( StgRegTable *reg, nat n )
 626 {
 627     bdescr *bd;
 628     StgPtr p;
 629
 630     TICK_ALLOC_HEAP_NOCTR(n);
 631     CCS_ALLOC(CCCS,n);
 632
 633     /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 634     /* ToDo: allocate directly into generation 1 */
 635     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 636         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 637         ACQUIRE_SM_LOCK;
 638         bd = allocGroup(req_blocks);
 639         dbl_link_onto(bd, &g0s0->large_objects);
 640         g0s0->n_large_blocks += req_blocks;
 641         bd->gen_no  = 0;
 642         bd->step = g0s0;
 643         bd->flags = BF_LARGE;
 644         bd->free = bd->start + n;
 645         alloc_blocks += req_blocks;
 646         RELEASE_SM_LOCK;
 647         return bd->start;
 648
 649         /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 650     } else {
 651
 652         bd = reg->rCurrentAlloc;
 653         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 654
 655             // The CurrentAlloc block is full, we need to find another
 656             // one.  First, we try taking the next block from the
 657             // nursery:
 658             bd = reg->rCurrentNursery->link;
 659
 660             if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 661                 // The nursery is empty, or the next block is already
 662                 // full: allocate a fresh block (we can't fail here).
 663                 ACQUIRE_SM_LOCK;
 664                 bd = allocBlock();
 665                 reg->rNursery->n_blocks++;
 666                 RELEASE_SM_LOCK;
 667                 bd->gen_no = 0;
 668                 bd->step = g0s0;
 669                 bd->flags = 0;
 670             } else {
 671                 // we have a block in the nursery: take it and put
 672                 // it at the *front* of the nursery list, and use it
 673                 // to allocate() from.
 674                 reg->rCurrentNursery->link = bd->link;
 675             }
 676             dbl_link_onto(bd, &reg->rNursery->blocks);
 677             reg->rCurrentAlloc = bd;
 678         }
 679     }
 680     p = bd->free;
 681     bd->free += n;
 682     return p;
 683 }
 684
 685 /* ---------------------------------------------------------------------------
 686    Allocate a fixed/pinned object.
 687
 688    We allocate small pinned objects into a single block, allocating a
 689    new block when the current one overflows.  The block is chained
 690    onto the large_object_list of generation 0 step 0.
 691
 692    NOTE: The GC can't in general handle pinned objects.  This
 693    interface is only safe to use for ByteArrays, which have no
 694    pointers and don't require scavenging.  It works because the
 695    block's descriptor has the BF_LARGE flag set, so the block is
 696    treated as a large object and chained onto various lists, rather
 697    than the individual objects being copied.  However, when it comes
 698    to scavenge the block, the GC will only scavenge the first object.
 699    The reason is that the GC can't linearly scan a block of pinned
 700    objects at the moment (doing so would require using the
 701    mostly-copying techniques).  But since we're restricting ourselves
 702    to pinned ByteArrays, not scavenging is ok.
 703
 704    This function is called by newPinnedByteArray# which immediately
 705    fills the allocated memory with a MutableByteArray#.
 706    ------------------------------------------------------------------------- */
 707
 708 StgPtr
 709 allocatePinned( nat n )
 710 {
 711     StgPtr p;
 712     bdescr *bd = pinned_object_block;
 713
 714     // If the request is for a large object, then allocate()
 715     // will give us a pinned object anyway.
 716     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 717         return allocate(n);
 718     }
 719
 720     ACQUIRE_SM_LOCK;
 721
 722     TICK_ALLOC_HEAP_NOCTR(n);
 723     CCS_ALLOC(CCCS,n);
 724
 725     // we always return 8-byte aligned memory.  bd->free must be
 726     // 8-byte aligned to begin with, so we just round up n to
 727     // the nearest multiple of 8 bytes.
 728     if (sizeof(StgWord) == 4) {
 729         n = (n+1) & ~1;
 730     }
 731
 732     // If we don't have a block of pinned objects yet, or the current
 733     // one isn't large enough to hold the new object, allocate a new one.
 734     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 735         pinned_object_block = bd = allocBlock();
 736         dbl_link_onto(bd, &g0s0->large_objects);
 737         bd->gen_no = 0;
 738         bd->step   = g0s0;
 739         bd->flags  = BF_PINNED | BF_LARGE;
 740         bd->free   = bd->start;
 741         alloc_blocks++;
 742     }
 743
 744     p = bd->free;
 745     bd->free += n;
 746     RELEASE_SM_LOCK;
 747     return p;
 748 }
 749
 750 /* -----------------------------------------------------------------------------
 751    Allocation functions for GMP.
 752
 753    These all use the allocate() interface - we can't have any garbage
 754    collection going on during a gmp operation, so we use allocate()
 755    which always succeeds.  The gmp operations which might need to
 756    allocate will ask the storage manager (via doYouWantToGC()) whether
 757    a garbage collection is required, in case we get into a loop doing
 758    only allocate() style allocation.
 759    -------------------------------------------------------------------------- */
 760
 761 static void *
 762 stgAllocForGMP (size_t size_in_bytes)
 763 {
 764   StgArrWords* arr;
 765   nat data_size_in_words, total_size_in_words;
 766
 767   /* round up to a whole number of words */
 768   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 769   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 770
 771   /* allocate and fill it in. */
 772 #if defined(SMP)
 773   arr = (StgArrWords *)allocateLocal(&(myCapability()->r), total_size_in_words);
 774 #else
 775   arr = (StgArrWords *)allocateLocal(&MainCapability.r, total_size_in_words);
 776 #endif
 777   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 778
 779   /* and return a ptr to the goods inside the array */
 780   return arr->payload;
 781 }
 782
 783 static void *
 784 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 785 {
 786     void *new_stuff_ptr = stgAllocForGMP(new_size);
 787     nat i = 0;
 788     char *p = (char *) ptr;
 789     char *q = (char *) new_stuff_ptr;
 790
 791     for (; i < old_size; i++, p++, q++) {
 792         *q = *p;
 793     }
 794
 795     return(new_stuff_ptr);
 796 }
 797
 798 static void
 799 stgDeallocForGMP (void *ptr STG_UNUSED,
 800                   size_t size STG_UNUSED)
 801 {
 802     /* easy for us: the garbage collector does the dealloc'n */
 803 }
 804
 805 /* -----------------------------------------------------------------------------
 806  * Stats and stuff
 807  * -------------------------------------------------------------------------- */
 808
 809 /* -----------------------------------------------------------------------------
 810  * calcAllocated()
 811  *
 812  * Approximate how much we've allocated: number of blocks in the
 813  * nursery + blocks allocated via allocate() - unused nusery blocks.
 814  * This leaves a little slop at the end of each block, and doesn't
 815  * take into account large objects (ToDo).
 816  * -------------------------------------------------------------------------- */
 817
 818 lnat
 819 calcAllocated( void )
 820 {
 821   nat allocated;
 822   bdescr *bd;
 823
 824   allocated = allocated_bytes();
 825   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 826
 827 #ifdef SMP
 828   nat i;
 829   for (i = 0; i < n_nurseries; i++) {
 830       Capability *cap;
 831       for ( bd = capabilities[i].r.rCurrentNursery->link;
 832             bd != NULL; bd = bd->link ) {
 833           allocated -= BLOCK_SIZE_W;
 834       }
 835       cap = &capabilities[i];
 836       if (cap->r.rCurrentNursery->free <
 837           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 838           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 839               - cap->r.rCurrentNursery->free;
 840       }
 841   }
 842 #else
 843   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 844
 845   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 846       allocated -= BLOCK_SIZE_W;
 847   }
 848   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 849       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 850           - current_nursery->free;
 851   }
 852 #endif
 853
 854   total_allocated += allocated;
 855   return allocated;
 856 }
 857
 858 /* Approximate the amount of live data in the heap.  To be called just
 859  * after garbage collection (see GarbageCollect()).
 860  */
 861 extern lnat
 862 calcLive(void)
 863 {
 864   nat g, s;
 865   lnat live = 0;
 866   step *stp;
 867
 868   if (RtsFlags.GcFlags.generations == 1) {
 869     live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
 870       ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
 871     return live;
 872   }
 873
 874   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 875     for (s = 0; s < generations[g].n_steps; s++) {
 876       /* approximate amount of live data (doesn't take into account slop
 877        * at end of each block).
 878        */
 879       if (g == 0 && s == 0) {
 880           continue;
 881       }
 882       stp = &generations[g].steps[s];
 883       live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
 884       if (stp->hp_bd != NULL) {
 885           live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
 886               / sizeof(W_);
 887       }
 888     }
 889   }
 890   return live;
 891 }
 892
 893 /* Approximate the number of blocks that will be needed at the next
 894  * garbage collection.
 895  *
 896  * Assume: all data currently live will remain live.  Steps that will
 897  * be collected next time will therefore need twice as many blocks
 898  * since all the data will be copied.
 899  */
 900 extern lnat
 901 calcNeeded(void)
 902 {
 903     lnat needed = 0;
 904     nat g, s;
 905     step *stp;
 906
 907     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 908         for (s = 0; s < generations[g].n_steps; s++) {
 909             if (g == 0 && s == 0) { continue; }
 910             stp = &generations[g].steps[s];
 911             if (generations[g].steps[0].n_blocks +
 912                 generations[g].steps[0].n_large_blocks
 913                 > generations[g].max_blocks
 914                 && stp->is_compacted == 0) {
 915                 needed += 2 * stp->n_blocks;
 916             } else {
 917                 needed += stp->n_blocks;
 918             }
 919         }
 920     }
 921     return needed;
 922 }
 923
 924 /* -----------------------------------------------------------------------------
 925    Debugging
 926
 927    memInventory() checks for memory leaks by counting up all the
 928    blocks we know about and comparing that to the number of blocks
 929    allegedly floating around in the system.
 930    -------------------------------------------------------------------------- */
 931
 932 #ifdef DEBUG
 933
 934 static lnat
 935 stepBlocks (step *stp)
 936 {
 937     lnat total_blocks;
 938     bdescr *bd;
 939
 940     total_blocks = stp->n_blocks;
 941     for (bd = stp->large_objects; bd; bd = bd->link) {
 942         total_blocks += bd->blocks;
 943         /* hack for megablock groups: they have an extra block or two in
 944            the second and subsequent megablocks where the block
 945            descriptors would normally go.
 946         */
 947         if (bd->blocks > BLOCKS_PER_MBLOCK) {
 948             total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
 949                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
 950         }
 951     }
 952     return total_blocks;
 953 }
 954
 955 void
 956 memInventory(void)
 957 {
 958   nat g, s, i;
 959   step *stp;
 960   bdescr *bd;
 961   lnat total_blocks = 0, free_blocks = 0;
 962
 963   /* count the blocks we current have */
 964
 965   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 966       for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 967           total_blocks += bd->blocks;
 968       }
 969       for (s = 0; s < generations[g].n_steps; s++) {
 970           if (g==0 && s==0) continue;
 971           stp = &generations[g].steps[s];
 972           total_blocks += stepBlocks(stp);
 973       }
 974   }
 975
 976   for (i = 0; i < n_nurseries; i++) {
 977       total_blocks += stepBlocks(&nurseries[i]);
 978   }
 979
 980   if (RtsFlags.GcFlags.generations == 1) {
 981       /* two-space collector has a to-space too :-) */
 982       total_blocks += g0s0->n_to_blocks;
 983   }
 984
 985   /* any blocks held by allocate() */
 986   for (bd = small_alloc_list; bd; bd = bd->link) {
 987     total_blocks += bd->blocks;
 988   }
 989
 990 #ifdef PROFILING
 991   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
 992       total_blocks += retainerStackBlocks();
 993   }
 994 #endif
 995
 996   // count the blocks allocated by the arena allocator
 997   total_blocks += arenaBlocks();
 998
 999   /* count the blocks on the free list */
1000   free_blocks = countFreeList();
1001
1002   if (total_blocks + free_blocks != mblocks_allocated *
1003       BLOCKS_PER_MBLOCK) {
1004     debugBelch("Blocks: %ld live + %ld free  = %ld total (%ld around)\n",
1005             total_blocks, free_blocks, total_blocks + free_blocks,
1006             mblocks_allocated * BLOCKS_PER_MBLOCK);
1007   }
1008
1009   ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
1010 }
1011
1012
1013 nat
1014 countBlocks(bdescr *bd)
1015 {
1016     nat n;
1017     for (n=0; bd != NULL; bd=bd->link) {
1018         n += bd->blocks;
1019     }
1020     return n;
1021 }
1022
1023 /* Full heap sanity check. */
1024 void
1025 checkSanity( void )
1026 {
1027     nat g, s;
1028
1029     if (RtsFlags.GcFlags.generations == 1) {
1030         checkHeap(g0s0->to_blocks);
1031         checkChain(g0s0->large_objects);
1032     } else {
1033
1034         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1035             for (s = 0; s < generations[g].n_steps; s++) {
1036                 if (g == 0 && s == 0) { continue; }
1037                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1038                        == generations[g].steps[s].n_blocks);
1039                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1040                        == generations[g].steps[s].n_large_blocks);
1041                 checkHeap(generations[g].steps[s].blocks);
1042                 checkChain(generations[g].steps[s].large_objects);
1043                 if (g > 0) {
1044                     checkMutableList(generations[g].mut_list, g);
1045                 }
1046             }
1047         }
1048
1049         for (s = 0; s < n_nurseries; s++) {
1050             ASSERT(countBlocks(nurseries[s].blocks)
1051                    == nurseries[s].n_blocks);
1052             ASSERT(countBlocks(nurseries[s].large_objects)
1053                    == nurseries[s].n_large_blocks);
1054         }
1055
1056         checkFreeListSanity();
1057     }
1058 }
1059
1060 // handy function for use in gdb, because Bdescr() is inlined.
1061 extern bdescr *_bdescr( StgPtr p );
1062
1063 bdescr *
1064 _bdescr( StgPtr p )
1065 {
1066     return Bdescr(p);
1067 }
1068
1069 #endif