ghc/rts/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2004
   4  *
   5  * Storage manager front end
   6  *
   7  * ---------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #include "Rts.h"
  11 #include "RtsUtils.h"
  12 #include "RtsFlags.h"
  13 #include "Stats.h"
  14 #include "Hooks.h"
  15 #include "BlockAlloc.h"
  16 #include "MBlock.h"
  17 #include "Weak.h"
  18 #include "Sanity.h"
  19 #include "Arena.h"
  20 #include "OSThreads.h"
  21 #include "Capability.h"
  22 #include "Storage.h"
  23 #include "Schedule.h"
  24 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  25
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 /*
  30  * All these globals require sm_mutex to access in SMP mode.
  31  */
  32 StgClosure    *caf_list         = NULL;
  33 StgClosure    *revertible_caf_list = NULL;
  34 rtsBool       keepCAFs;
  35
  36 bdescr *small_alloc_list;       /* allocate()d small objects */
  37 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  38 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  39 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  40
  41 StgPtr alloc_Hp    = NULL;      /* next free byte in small_alloc_list */
  42 StgPtr alloc_HpLim = NULL;      /* end of block at small_alloc_list   */
  43
  44 generation *generations = NULL; /* all the generations */
  45 generation *g0          = NULL; /* generation 0, for convenience */
  46 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  47 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  48
  49 ullong total_allocated = 0;     /* total memory allocated during run */
  50
  51 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  52 step *nurseries         = NULL; /* array of nurseries, >1 only if SMP */
  53
  54 /*
  55  * Storage manager mutex:  protects all the above state from
  56  * simultaneous access by two STG threads.
  57  */
  58 #ifdef SMP
  59 Mutex sm_mutex = INIT_MUTEX_VAR;
  60 #endif
  61
  62 /*
  63  * Forward references
  64  */
  65 static void *stgAllocForGMP   (size_t size_in_bytes);
  66 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  67 static void  stgDeallocForGMP (void *ptr, size_t size);
  68
  69 static void
  70 initStep (step *stp, int g, int s)
  71 {
  72     stp->no = s;
  73     stp->blocks = NULL;
  74     stp->n_to_blocks = 0;
  75     stp->n_blocks = 0;
  76     stp->gen = &generations[g];
  77     stp->gen_no = g;
  78     stp->hp = NULL;
  79     stp->hpLim = NULL;
  80     stp->hp_bd = NULL;
  81     stp->scan = NULL;
  82     stp->scan_bd = NULL;
  83     stp->large_objects = NULL;
  84     stp->n_large_blocks = 0;
  85     stp->new_large_objects = NULL;
  86     stp->scavenged_large_objects = NULL;
  87     stp->n_scavenged_large_blocks = 0;
  88     stp->is_compacted = 0;
  89     stp->bitmap = NULL;
  90 }
  91
  92 void
  93 initStorage( void )
  94 {
  95   nat g, s;
  96   generation *gen;
  97
  98   if (generations != NULL) {
  99       // multi-init protection
 100       return;
 101   }
 102
 103   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 104    * doing something reasonable.
 105    */
 106   ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info));
 107   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 108   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 109
 110   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 111       RtsFlags.GcFlags.heapSizeSuggestion >
 112       RtsFlags.GcFlags.maxHeapSize) {
 113     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 114   }
 115
 116   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 117       RtsFlags.GcFlags.minAllocAreaSize >
 118       RtsFlags.GcFlags.maxHeapSize) {
 119       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 120       exit(1);
 121   }
 122
 123   initBlockAllocator();
 124
 125 #if defined(SMP)
 126   initMutex(&sm_mutex);
 127 #endif
 128
 129   /* allocate generation info array */
 130   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 131                                              * sizeof(struct generation_),
 132                                              "initStorage: gens");
 133
 134   /* Initialise all generations */
 135   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 136     gen = &generations[g];
 137     gen->no = g;
 138     gen->mut_list = allocBlock();
 139     gen->collections = 0;
 140     gen->failed_promotions = 0;
 141     gen->max_blocks = 0;
 142   }
 143
 144   /* A couple of convenience pointers */
 145   g0 = &generations[0];
 146   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 147
 148   /* Allocate step structures in each generation */
 149   if (RtsFlags.GcFlags.generations > 1) {
 150     /* Only for multiple-generations */
 151
 152     /* Oldest generation: one step */
 153     oldest_gen->n_steps = 1;
 154     oldest_gen->steps =
 155       stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step");
 156
 157     /* set up all except the oldest generation with 2 steps */
 158     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 159       generations[g].n_steps = RtsFlags.GcFlags.steps;
 160       generations[g].steps  =
 161         stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_),
 162                         "initStorage: steps");
 163     }
 164
 165   } else {
 166     /* single generation, i.e. a two-space collector */
 167     g0->n_steps = 1;
 168     g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
 169   }
 170
 171 #ifdef SMP
 172   n_nurseries = RtsFlags.ParFlags.nNodes;
 173   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 174                               "initStorage: nurseries");
 175 #else
 176   n_nurseries = 1;
 177   nurseries = g0->steps; // just share nurseries[0] with g0s0
 178 #endif
 179
 180   /* Initialise all steps */
 181   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 182     for (s = 0; s < generations[g].n_steps; s++) {
 183         initStep(&generations[g].steps[s], g, s);
 184     }
 185   }
 186
 187 #ifdef SMP
 188   for (s = 0; s < n_nurseries; s++) {
 189       initStep(&nurseries[s], 0, s);
 190   }
 191 #endif
 192
 193   /* Set up the destination pointers in each younger gen. step */
 194   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 195     for (s = 0; s < generations[g].n_steps-1; s++) {
 196       generations[g].steps[s].to = &generations[g].steps[s+1];
 197     }
 198     generations[g].steps[s].to = &generations[g+1].steps[0];
 199   }
 200   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 201
 202 #ifdef SMP
 203   for (s = 0; s < n_nurseries; s++) {
 204       nurseries[s].to = generations[0].steps[0].to;
 205   }
 206 #endif
 207
 208   /* The oldest generation has one step. */
 209   if (RtsFlags.GcFlags.compact) {
 210       if (RtsFlags.GcFlags.generations == 1) {
 211           errorBelch("WARNING: compaction is incompatible with -G1; disabled");
 212       } else {
 213           oldest_gen->steps[0].is_compacted = 1;
 214       }
 215   }
 216
 217 #ifdef SMP
 218   if (RtsFlags.GcFlags.generations == 1) {
 219       errorBelch("-G1 is incompatible with SMP");
 220       stg_exit(1);
 221   }
 222 #endif
 223
 224   /* generation 0 is special: that's the nursery */
 225   generations[0].max_blocks = 0;
 226
 227   /* G0S0: the allocation area.  Policy: keep the allocation area
 228    * small to begin with, even if we have a large suggested heap
 229    * size.  Reason: we're going to do a major collection first, and we
 230    * don't want it to be a big one.  This vague idea is borne out by
 231    * rigorous experimental evidence.
 232    */
 233   g0s0 = &generations[0].steps[0];
 234
 235   allocNurseries();
 236
 237   weak_ptr_list = NULL;
 238   caf_list = NULL;
 239   revertible_caf_list = NULL;
 240
 241   /* initialise the allocate() interface */
 242   small_alloc_list = NULL;
 243   alloc_blocks = 0;
 244   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 245
 246   /* Tell GNU multi-precision pkg about our custom alloc functions */
 247   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 248
 249   IF_DEBUG(gc, statDescribeGens());
 250 }
 251
 252 void
 253 exitStorage (void)
 254 {
 255     stat_exit(calcAllocated());
 256 }
 257
 258 /* -----------------------------------------------------------------------------
 259    CAF management.
 260
 261    The entry code for every CAF does the following:
 262
 263       - builds a CAF_BLACKHOLE in the heap
 264       - pushes an update frame pointing to the CAF_BLACKHOLE
 265       - invokes UPD_CAF(), which:
 266           - calls newCaf, below
 267           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 268
 269    Why do we build a BLACKHOLE in the heap rather than just updating
 270    the thunk directly?  It's so that we only need one kind of update
 271    frame - otherwise we'd need a static version of the update frame too.
 272
 273    newCaf() does the following:
 274
 275       - it puts the CAF on the oldest generation's mut-once list.
 276         This is so that we can treat the CAF as a root when collecting
 277         younger generations.
 278
 279    For GHCI, we have additional requirements when dealing with CAFs:
 280
 281       - we must *retain* all dynamically-loaded CAFs ever entered,
 282         just in case we need them again.
 283       - we must be able to *revert* CAFs that have been evaluated, to
 284         their pre-evaluated form.
 285
 286       To do this, we use an additional CAF list.  When newCaf() is
 287       called on a dynamically-loaded CAF, we add it to the CAF list
 288       instead of the old-generation mutable list, and save away its
 289       old info pointer (in caf->saved_info) for later reversion.
 290
 291       To revert all the CAFs, we traverse the CAF list and reset the
 292       info pointer to caf->saved_info, then throw away the CAF list.
 293       (see GC.c:revertCAFs()).
 294
 295       -- SDM 29/1/01
 296
 297    -------------------------------------------------------------------------- */
 298
 299 void
 300 newCAF(StgClosure* caf)
 301 {
 302   ACQUIRE_SM_LOCK;
 303
 304   if(keepCAFs)
 305   {
 306     // HACK:
 307     // If we are in GHCi _and_ we are using dynamic libraries,
 308     // then we can't redirect newCAF calls to newDynCAF (see below),
 309     // so we make newCAF behave almost like newDynCAF.
 310     // The dynamic libraries might be used by both the interpreted
 311     // program and GHCi itself, so they must not be reverted.
 312     // This also means that in GHCi with dynamic libraries, CAFs are not
 313     // garbage collected. If this turns out to be a problem, we could
 314     // do another hack here and do an address range test on caf to figure
 315     // out whether it is from a dynamic library.
 316     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 317     ((StgIndStatic *)caf)->static_link = caf_list;
 318     caf_list = caf;
 319   }
 320   else
 321   {
 322     /* Put this CAF on the mutable list for the old generation.
 323     * This is a HACK - the IND_STATIC closure doesn't really have
 324     * a mut_link field, but we pretend it has - in fact we re-use
 325     * the STATIC_LINK field for the time being, because when we
 326     * come to do a major GC we won't need the mut_link field
 327     * any more and can use it as a STATIC_LINK.
 328     */
 329     ((StgIndStatic *)caf)->saved_info = NULL;
 330     recordMutableGen(caf, oldest_gen);
 331   }
 332
 333   RELEASE_SM_LOCK;
 334
 335 #ifdef PAR
 336   /* If we are PAR or DIST then  we never forget a CAF */
 337   { globalAddr *newGA;
 338     //debugBelch("<##> Globalising CAF %08x %s",caf,info_type(caf));
 339     newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
 340     ASSERT(newGA);
 341   }
 342 #endif /* PAR */
 343 }
 344
 345 // An alternate version of newCaf which is used for dynamically loaded
 346 // object code in GHCi.  In this case we want to retain *all* CAFs in
 347 // the object code, because they might be demanded at any time from an
 348 // expression evaluated on the command line.
 349 // Also, GHCi might want to revert CAFs, so we add these to the
 350 // revertible_caf_list.
 351 //
 352 // The linker hackily arranges that references to newCaf from dynamic
 353 // code end up pointing to newDynCAF.
 354 void
 355 newDynCAF(StgClosure *caf)
 356 {
 357     ACQUIRE_SM_LOCK;
 358
 359     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 360     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 361     revertible_caf_list = caf;
 362
 363     RELEASE_SM_LOCK;
 364 }
 365
 366 /* -----------------------------------------------------------------------------
 367    Nursery management.
 368    -------------------------------------------------------------------------- */
 369
 370 static bdescr *
 371 allocNursery (step *stp, bdescr *tail, nat blocks)
 372 {
 373     bdescr *bd;
 374     nat i;
 375
 376     // Allocate a nursery: we allocate fresh blocks one at a time and
 377     // cons them on to the front of the list, not forgetting to update
 378     // the back pointer on the tail of the list to point to the new block.
 379     for (i=0; i < blocks; i++) {
 380         // @LDV profiling
 381         /*
 382           processNursery() in LdvProfile.c assumes that every block group in
 383           the nursery contains only a single block. So, if a block group is
 384           given multiple blocks, change processNursery() accordingly.
 385         */
 386         bd = allocBlock();
 387         bd->link = tail;
 388         // double-link the nursery: we might need to insert blocks
 389         if (tail != NULL) {
 390             tail->u.back = bd;
 391         }
 392         bd->step = stp;
 393         bd->gen_no = 0;
 394         bd->flags = 0;
 395         bd->free = bd->start;
 396         tail = bd;
 397     }
 398     tail->u.back = NULL;
 399     return tail;
 400 }
 401
 402 static void
 403 assignNurseriesToCapabilities (void)
 404 {
 405 #ifdef SMP
 406     nat i;
 407
 408     for (i = 0; i < n_nurseries; i++) {
 409         capabilities[i].r.rNursery        = &nurseries[i];
 410         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 411         capabilities[i].r.rCurrentAlloc   = NULL;
 412     }
 413 #else /* SMP */
 414     MainCapability.r.rNursery        = &nurseries[0];
 415     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 416     MainCapability.r.rCurrentAlloc   = NULL;
 417 #endif
 418 }
 419
 420 void
 421 allocNurseries( void )
 422 {
 423     nat i;
 424
 425     for (i = 0; i < n_nurseries; i++) {
 426         nurseries[i].blocks =
 427             allocNursery(&nurseries[i], NULL,
 428                          RtsFlags.GcFlags.minAllocAreaSize);
 429         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 430         nurseries[i].to_blocks   = NULL;
 431         nurseries[i].n_to_blocks = 0;
 432         /* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
 433     }
 434     assignNurseriesToCapabilities();
 435 }
 436
 437 void
 438 resetNurseries( void )
 439 {
 440     nat i;
 441     bdescr *bd;
 442     step *stp;
 443
 444     for (i = 0; i < n_nurseries; i++) {
 445         stp = &nurseries[i];
 446         for (bd = stp->blocks; bd; bd = bd->link) {
 447             bd->free = bd->start;
 448             ASSERT(bd->gen_no == 0);
 449             ASSERT(bd->step == stp);
 450             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 451         }
 452     }
 453     assignNurseriesToCapabilities();
 454 }
 455
 456 lnat
 457 countNurseryBlocks (void)
 458 {
 459     nat i;
 460     lnat blocks = 0;
 461
 462     for (i = 0; i < n_nurseries; i++) {
 463         blocks += nurseries[i].n_blocks;
 464     }
 465     return blocks;
 466 }
 467
 468 static void
 469 resizeNursery ( step *stp, nat blocks )
 470 {
 471   bdescr *bd;
 472   nat nursery_blocks;
 473
 474   nursery_blocks = stp->n_blocks;
 475   if (nursery_blocks == blocks) return;
 476
 477   if (nursery_blocks < blocks) {
 478     IF_DEBUG(gc, debugBelch("Increasing size of nursery to %d blocks\n",
 479                          blocks));
 480     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 481   }
 482   else {
 483     bdescr *next_bd;
 484
 485     IF_DEBUG(gc, debugBelch("Decreasing size of nursery to %d blocks\n",
 486                          blocks));
 487
 488     bd = stp->blocks;
 489     while (nursery_blocks > blocks) {
 490         next_bd = bd->link;
 491         next_bd->u.back = NULL;
 492         nursery_blocks -= bd->blocks; // might be a large block
 493         freeGroup(bd);
 494         bd = next_bd;
 495     }
 496     stp->blocks = bd;
 497     // might have gone just under, by freeing a large block, so make
 498     // up the difference.
 499     if (nursery_blocks < blocks) {
 500         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 501     }
 502   }
 503
 504   stp->n_blocks = blocks;
 505   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 506 }
 507
 508 //
 509 // Resize each of the nurseries to the specified size.
 510 //
 511 void
 512 resizeNurseriesFixed (nat blocks)
 513 {
 514     nat i;
 515     for (i = 0; i < n_nurseries; i++) {
 516         resizeNursery(&nurseries[i], blocks);
 517     }
 518 }
 519
 520 //
 521 // Resize the nurseries to the total specified size.
 522 //
 523 void
 524 resizeNurseries (nat blocks)
 525 {
 526     // If there are multiple nurseries, then we just divide the number
 527     // of available blocks between them.
 528     resizeNurseriesFixed(blocks / n_nurseries);
 529 }
 530
 531 /* -----------------------------------------------------------------------------
 532    The allocate() interface
 533
 534    allocate(n) always succeeds, and returns a chunk of memory n words
 535    long.  n can be larger than the size of a block if necessary, in
 536    which case a contiguous block group will be allocated.
 537    -------------------------------------------------------------------------- */
 538
 539 StgPtr
 540 allocate( nat n )
 541 {
 542     bdescr *bd;
 543     StgPtr p;
 544
 545     ACQUIRE_SM_LOCK;
 546
 547     TICK_ALLOC_HEAP_NOCTR(n);
 548     CCS_ALLOC(CCCS,n);
 549
 550     /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 551     /* ToDo: allocate directly into generation 1 */
 552     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 553         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 554         bd = allocGroup(req_blocks);
 555         dbl_link_onto(bd, &g0s0->large_objects);
 556         g0s0->n_large_blocks += req_blocks;
 557         bd->gen_no  = 0;
 558         bd->step = g0s0;
 559         bd->flags = BF_LARGE;
 560         bd->free = bd->start + n;
 561         alloc_blocks += req_blocks;
 562         RELEASE_SM_LOCK;
 563         return bd->start;
 564
 565         /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 566     } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
 567         if (small_alloc_list) {
 568             small_alloc_list->free = alloc_Hp;
 569         }
 570         bd = allocBlock();
 571         bd->link = small_alloc_list;
 572         small_alloc_list = bd;
 573         bd->gen_no = 0;
 574         bd->step = g0s0;
 575         bd->flags = 0;
 576         alloc_Hp = bd->start;
 577         alloc_HpLim = bd->start + BLOCK_SIZE_W;
 578         alloc_blocks++;
 579     }
 580
 581     p = alloc_Hp;
 582     alloc_Hp += n;
 583     RELEASE_SM_LOCK;
 584     return p;
 585 }
 586
 587 lnat
 588 allocated_bytes( void )
 589 {
 590     lnat allocated;
 591
 592     allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
 593     if (pinned_object_block != NULL) {
 594         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 595             pinned_object_block->free;
 596     }
 597
 598     return allocated;
 599 }
 600
 601 void
 602 tidyAllocateLists (void)
 603 {
 604     if (small_alloc_list != NULL) {
 605         ASSERT(alloc_Hp >= small_alloc_list->start &&
 606                alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
 607         small_alloc_list->free = alloc_Hp;
 608     }
 609 }
 610
 611 /* -----------------------------------------------------------------------------
 612    allocateLocal()
 613
 614    This allocates memory in the current thread - it is intended for
 615    use primarily from STG-land where we have a Capability.  It is
 616    better than allocate() because it doesn't require taking the
 617    sm_mutex lock in the common case.
 618
 619    Memory is allocated directly from the nursery if possible (but not
 620    from the current nursery block, so as not to interfere with
 621    Hp/HpLim).
 622    -------------------------------------------------------------------------- */
 623
 624 StgPtr
 625 allocateLocal( StgRegTable *reg, nat n )
 626 {
 627     bdescr *bd;
 628     StgPtr p;
 629
 630     TICK_ALLOC_HEAP_NOCTR(n);
 631     CCS_ALLOC(CCCS,n);
 632
 633     /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 634     /* ToDo: allocate directly into generation 1 */
 635     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 636         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 637         ACQUIRE_SM_LOCK;
 638         bd = allocGroup(req_blocks);
 639         dbl_link_onto(bd, &g0s0->large_objects);
 640         g0s0->n_large_blocks += req_blocks;
 641         bd->gen_no  = 0;
 642         bd->step = g0s0;
 643         bd->flags = BF_LARGE;
 644         bd->free = bd->start + n;
 645         alloc_blocks += req_blocks;
 646         RELEASE_SM_LOCK;
 647         return bd->start;
 648
 649         /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 650     } else {
 651
 652         bd = reg->rCurrentAlloc;
 653         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 654
 655             // The CurrentAlloc block is full, we need to find another
 656             // one.  First, we try taking the next block from the
 657             // nursery:
 658             bd = reg->rCurrentNursery->link;
 659
 660             if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 661                 // The nursery is empty, or the next block is already
 662                 // full: allocate a fresh block (we can't fail here).
 663                 ACQUIRE_SM_LOCK;
 664                 bd = allocBlock();
 665                 alloc_blocks++;
 666                 RELEASE_SM_LOCK;
 667                 bd->gen_no = 0;
 668                 bd->step = g0s0;
 669                 bd->flags = 0;
 670             } else {
 671                 // we have a block in the nursery: take it and put
 672                 // it at the *front* of the nursery list, and use it
 673                 // to allocate() from.
 674                 reg->rCurrentNursery->link = bd->link;
 675             }
 676             bd->link = reg->rNursery->blocks;
 677             reg->rNursery->blocks = bd;
 678             bd->u.back = NULL;
 679             reg->rCurrentAlloc = bd;
 680         }
 681     }
 682     p = bd->free;
 683     bd->free += n;
 684     return p;
 685 }
 686
 687 /* ---------------------------------------------------------------------------
 688    Allocate a fixed/pinned object.
 689
 690    We allocate small pinned objects into a single block, allocating a
 691    new block when the current one overflows.  The block is chained
 692    onto the large_object_list of generation 0 step 0.
 693
 694    NOTE: The GC can't in general handle pinned objects.  This
 695    interface is only safe to use for ByteArrays, which have no
 696    pointers and don't require scavenging.  It works because the
 697    block's descriptor has the BF_LARGE flag set, so the block is
 698    treated as a large object and chained onto various lists, rather
 699    than the individual objects being copied.  However, when it comes
 700    to scavenge the block, the GC will only scavenge the first object.
 701    The reason is that the GC can't linearly scan a block of pinned
 702    objects at the moment (doing so would require using the
 703    mostly-copying techniques).  But since we're restricting ourselves
 704    to pinned ByteArrays, not scavenging is ok.
 705
 706    This function is called by newPinnedByteArray# which immediately
 707    fills the allocated memory with a MutableByteArray#.
 708    ------------------------------------------------------------------------- */
 709
 710 StgPtr
 711 allocatePinned( nat n )
 712 {
 713     StgPtr p;
 714     bdescr *bd = pinned_object_block;
 715
 716     // If the request is for a large object, then allocate()
 717     // will give us a pinned object anyway.
 718     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 719         return allocate(n);
 720     }
 721
 722     ACQUIRE_SM_LOCK;
 723
 724     TICK_ALLOC_HEAP_NOCTR(n);
 725     CCS_ALLOC(CCCS,n);
 726
 727     // we always return 8-byte aligned memory.  bd->free must be
 728     // 8-byte aligned to begin with, so we just round up n to
 729     // the nearest multiple of 8 bytes.
 730     if (sizeof(StgWord) == 4) {
 731         n = (n+1) & ~1;
 732     }
 733
 734     // If we don't have a block of pinned objects yet, or the current
 735     // one isn't large enough to hold the new object, allocate a new one.
 736     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 737         pinned_object_block = bd = allocBlock();
 738         dbl_link_onto(bd, &g0s0->large_objects);
 739         bd->gen_no = 0;
 740         bd->step   = g0s0;
 741         bd->flags  = BF_PINNED | BF_LARGE;
 742         bd->free   = bd->start;
 743         alloc_blocks++;
 744     }
 745
 746     p = bd->free;
 747     bd->free += n;
 748     RELEASE_SM_LOCK;
 749     return p;
 750 }
 751
 752 /* -----------------------------------------------------------------------------
 753    Allocation functions for GMP.
 754
 755    These all use the allocate() interface - we can't have any garbage
 756    collection going on during a gmp operation, so we use allocate()
 757    which always succeeds.  The gmp operations which might need to
 758    allocate will ask the storage manager (via doYouWantToGC()) whether
 759    a garbage collection is required, in case we get into a loop doing
 760    only allocate() style allocation.
 761    -------------------------------------------------------------------------- */
 762
 763 static void *
 764 stgAllocForGMP (size_t size_in_bytes)
 765 {
 766   StgArrWords* arr;
 767   nat data_size_in_words, total_size_in_words;
 768
 769   /* round up to a whole number of words */
 770   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 771   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 772
 773   /* allocate and fill it in. */
 774 #if defined(SMP)
 775   arr = (StgArrWords *)allocateLocal(&(myCapability()->r), total_size_in_words);
 776 #else
 777   arr = (StgArrWords *)allocateLocal(&MainCapability.r, total_size_in_words);
 778 #endif
 779   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 780
 781   /* and return a ptr to the goods inside the array */
 782   return arr->payload;
 783 }
 784
 785 static void *
 786 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 787 {
 788     void *new_stuff_ptr = stgAllocForGMP(new_size);
 789     nat i = 0;
 790     char *p = (char *) ptr;
 791     char *q = (char *) new_stuff_ptr;
 792
 793     for (; i < old_size; i++, p++, q++) {
 794         *q = *p;
 795     }
 796
 797     return(new_stuff_ptr);
 798 }
 799
 800 static void
 801 stgDeallocForGMP (void *ptr STG_UNUSED,
 802                   size_t size STG_UNUSED)
 803 {
 804     /* easy for us: the garbage collector does the dealloc'n */
 805 }
 806
 807 /* -----------------------------------------------------------------------------
 808  * Stats and stuff
 809  * -------------------------------------------------------------------------- */
 810
 811 /* -----------------------------------------------------------------------------
 812  * calcAllocated()
 813  *
 814  * Approximate how much we've allocated: number of blocks in the
 815  * nursery + blocks allocated via allocate() - unused nusery blocks.
 816  * This leaves a little slop at the end of each block, and doesn't
 817  * take into account large objects (ToDo).
 818  * -------------------------------------------------------------------------- */
 819
 820 lnat
 821 calcAllocated( void )
 822 {
 823   nat allocated;
 824   bdescr *bd;
 825   nat i;
 826
 827   allocated = allocated_bytes();
 828   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 829
 830 #ifdef SMP
 831   for (i = 0; i < n_nurseries; i++) {
 832       Capability *cap;
 833       for ( bd = capabilities[i].r.rCurrentNursery->link;
 834             bd != NULL; bd = bd->link ) {
 835           allocated -= BLOCK_SIZE_W;
 836       }
 837       cap = &capabilities[i];
 838       if (cap->r.rCurrentNursery->free <
 839           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 840           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 841               - cap->r.rCurrentNursery->free;
 842       }
 843   }
 844 #else
 845   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 846
 847   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 848       allocated -= BLOCK_SIZE_W;
 849   }
 850   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 851       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 852           - current_nursery->free;
 853   }
 854 #endif
 855
 856   total_allocated += allocated;
 857   return allocated;
 858 }
 859
 860 /* Approximate the amount of live data in the heap.  To be called just
 861  * after garbage collection (see GarbageCollect()).
 862  */
 863 extern lnat
 864 calcLive(void)
 865 {
 866   nat g, s;
 867   lnat live = 0;
 868   step *stp;
 869
 870   if (RtsFlags.GcFlags.generations == 1) {
 871     live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
 872       ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
 873     return live;
 874   }
 875
 876   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 877     for (s = 0; s < generations[g].n_steps; s++) {
 878       /* approximate amount of live data (doesn't take into account slop
 879        * at end of each block).
 880        */
 881       if (g == 0 && s == 0) {
 882           continue;
 883       }
 884       stp = &generations[g].steps[s];
 885       live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
 886       if (stp->hp_bd != NULL) {
 887           live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
 888               / sizeof(W_);
 889       }
 890     }
 891   }
 892   return live;
 893 }
 894
 895 /* Approximate the number of blocks that will be needed at the next
 896  * garbage collection.
 897  *
 898  * Assume: all data currently live will remain live.  Steps that will
 899  * be collected next time will therefore need twice as many blocks
 900  * since all the data will be copied.
 901  */
 902 extern lnat
 903 calcNeeded(void)
 904 {
 905     lnat needed = 0;
 906     nat g, s;
 907     step *stp;
 908
 909     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 910         for (s = 0; s < generations[g].n_steps; s++) {
 911             if (g == 0 && s == 0) { continue; }
 912             stp = &generations[g].steps[s];
 913             if (generations[g].steps[0].n_blocks +
 914                 generations[g].steps[0].n_large_blocks
 915                 > generations[g].max_blocks
 916                 && stp->is_compacted == 0) {
 917                 needed += 2 * stp->n_blocks;
 918             } else {
 919                 needed += stp->n_blocks;
 920             }
 921         }
 922     }
 923     return needed;
 924 }
 925
 926 /* -----------------------------------------------------------------------------
 927    Debugging
 928
 929    memInventory() checks for memory leaks by counting up all the
 930    blocks we know about and comparing that to the number of blocks
 931    allegedly floating around in the system.
 932    -------------------------------------------------------------------------- */
 933
 934 #ifdef DEBUG
 935
 936 static lnat
 937 stepBlocks (step *stp)
 938 {
 939     lnat total_blocks;
 940     bdescr *bd;
 941
 942     total_blocks = stp->n_blocks;
 943     for (bd = stp->large_objects; bd; bd = bd->link) {
 944         total_blocks += bd->blocks;
 945         /* hack for megablock groups: they have an extra block or two in
 946            the second and subsequent megablocks where the block
 947            descriptors would normally go.
 948         */
 949         if (bd->blocks > BLOCKS_PER_MBLOCK) {
 950             total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
 951                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
 952         }
 953     }
 954     return total_blocks;
 955 }
 956
 957 void
 958 memInventory(void)
 959 {
 960   nat g, s, i;
 961   step *stp;
 962   bdescr *bd;
 963   lnat total_blocks = 0, free_blocks = 0;
 964
 965   /* count the blocks we current have */
 966
 967   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 968       for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 969           total_blocks += bd->blocks;
 970       }
 971       for (s = 0; s < generations[g].n_steps; s++) {
 972           if (g==0 && s==0) continue;
 973           stp = &generations[g].steps[s];
 974           total_blocks += stepBlocks(stp);
 975       }
 976   }
 977
 978   for (i = 0; i < n_nurseries; i++) {
 979       total_blocks += stepBlocks(&nurseries[i]);
 980   }
 981
 982   if (RtsFlags.GcFlags.generations == 1) {
 983       /* two-space collector has a to-space too :-) */
 984       total_blocks += g0s0->n_to_blocks;
 985   }
 986
 987   /* any blocks held by allocate() */
 988   for (bd = small_alloc_list; bd; bd = bd->link) {
 989     total_blocks += bd->blocks;
 990   }
 991
 992 #ifdef PROFILING
 993   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
 994       total_blocks += retainerStackBlocks();
 995   }
 996 #endif
 997
 998   // count the blocks allocated by the arena allocator
 999   total_blocks += arenaBlocks();
1000
1001   /* count the blocks on the free list */
1002   free_blocks = countFreeList();
1003
1004   if (total_blocks + free_blocks != mblocks_allocated *
1005       BLOCKS_PER_MBLOCK) {
1006     debugBelch("Blocks: %ld live + %ld free  = %ld total (%ld around)\n",
1007             total_blocks, free_blocks, total_blocks + free_blocks,
1008             mblocks_allocated * BLOCKS_PER_MBLOCK);
1009   }
1010
1011   ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
1012 }
1013
1014
1015 nat
1016 countBlocks(bdescr *bd)
1017 {
1018     nat n;
1019     for (n=0; bd != NULL; bd=bd->link) {
1020         n += bd->blocks;
1021     }
1022     return n;
1023 }
1024
1025 /* Full heap sanity check. */
1026 void
1027 checkSanity( void )
1028 {
1029     nat g, s;
1030
1031     if (RtsFlags.GcFlags.generations == 1) {
1032         checkHeap(g0s0->to_blocks);
1033         checkChain(g0s0->large_objects);
1034     } else {
1035
1036         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1037             for (s = 0; s < generations[g].n_steps; s++) {
1038                 if (g == 0 && s == 0) { continue; }
1039                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1040                        == generations[g].steps[s].n_blocks);
1041                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1042                        == generations[g].steps[s].n_large_blocks);
1043                 checkHeap(generations[g].steps[s].blocks);
1044                 checkChain(generations[g].steps[s].large_objects);
1045                 if (g > 0) {
1046                     checkMutableList(generations[g].mut_list, g);
1047                 }
1048             }
1049         }
1050
1051         for (s = 0; s < n_nurseries; s++) {
1052             ASSERT(countBlocks(nurseries[s].blocks)
1053                    == nurseries[s].n_blocks);
1054             ASSERT(countBlocks(nurseries[s].large_objects)
1055                    == nurseries[s].n_large_blocks);
1056         }
1057
1058         checkFreeListSanity();
1059     }
1060 }
1061
1062 // handy function for use in gdb, because Bdescr() is inlined.
1063 extern bdescr *_bdescr( StgPtr p );
1064
1065 bdescr *
1066 _bdescr( StgPtr p )
1067 {
1068     return Bdescr(p);
1069 }
1070
1071 #endif