ghc/rts/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2004
   4  *
   5  * Storage manager front end
   6  *
   7  * ---------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #include "Rts.h"
  11 #include "RtsUtils.h"
  12 #include "RtsFlags.h"
  13 #include "Stats.h"
  14 #include "Hooks.h"
  15 #include "BlockAlloc.h"
  16 #include "MBlock.h"
  17 #include "Weak.h"
  18 #include "Sanity.h"
  19 #include "Arena.h"
  20 #include "OSThreads.h"
  21 #include "Capability.h"
  22 #include "Storage.h"
  23 #include "Schedule.h"
  24 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  25
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 StgClosure    *caf_list         = NULL;
  30 StgClosure    *revertible_caf_list = NULL;
  31 rtsBool       keepCAFs;
  32
  33 bdescr *small_alloc_list;       /* allocate()d small objects */
  34 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  35 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  36 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  37
  38 StgPtr alloc_Hp    = NULL;      /* next free byte in small_alloc_list */
  39 StgPtr alloc_HpLim = NULL;      /* end of block at small_alloc_list   */
  40
  41 generation *generations = NULL; /* all the generations */
  42 generation *g0          = NULL; /* generation 0, for convenience */
  43 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  44 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  45
  46 ullong total_allocated = 0;     /* total memory allocated during run */
  47
  48 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  49 step *nurseries         = NULL; /* array of nurseries, >1 only if SMP */
  50
  51 /*
  52  * Storage manager mutex:  protects all the above state from
  53  * simultaneous access by two STG threads.
  54  */
  55 #ifdef SMP
  56 Mutex sm_mutex = INIT_MUTEX_VAR;
  57 #endif
  58
  59 /*
  60  * Forward references
  61  */
  62 static void *stgAllocForGMP   (size_t size_in_bytes);
  63 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  64 static void  stgDeallocForGMP (void *ptr, size_t size);
  65
  66 /*
  67  * Storage manager mutex
  68  */
  69 #if defined(SMP)
  70 extern Mutex sm_mutex;
  71 #define ACQUIRE_SM_LOCK   ACQUIRE_LOCK(&sm_mutex)
  72 #define RELEASE_SM_LOCK   RELEASE_LOCK(&sm_mutex)
  73 #else
  74 #define ACQUIRE_SM_LOCK
  75 #define RELEASE_SM_LOCK
  76 #endif
  77
  78 static void
  79 initStep (step *stp, int g, int s)
  80 {
  81     stp->no = s;
  82     stp->blocks = NULL;
  83     stp->n_to_blocks = 0;
  84     stp->n_blocks = 0;
  85     stp->gen = &generations[g];
  86     stp->gen_no = g;
  87     stp->hp = NULL;
  88     stp->hpLim = NULL;
  89     stp->hp_bd = NULL;
  90     stp->scan = NULL;
  91     stp->scan_bd = NULL;
  92     stp->large_objects = NULL;
  93     stp->n_large_blocks = 0;
  94     stp->new_large_objects = NULL;
  95     stp->scavenged_large_objects = NULL;
  96     stp->n_scavenged_large_blocks = 0;
  97     stp->is_compacted = 0;
  98     stp->bitmap = NULL;
  99 }
 100
 101 void
 102 initStorage( void )
 103 {
 104   nat g, s;
 105   generation *gen;
 106
 107   if (generations != NULL) {
 108       // multi-init protection
 109       return;
 110   }
 111
 112   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 113    * doing something reasonable.
 114    */
 115   ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info));
 116   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 117   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 118
 119   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 120       RtsFlags.GcFlags.heapSizeSuggestion >
 121       RtsFlags.GcFlags.maxHeapSize) {
 122     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 123   }
 124
 125   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 126       RtsFlags.GcFlags.minAllocAreaSize >
 127       RtsFlags.GcFlags.maxHeapSize) {
 128       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 129       exit(1);
 130   }
 131
 132   initBlockAllocator();
 133
 134 #if defined(SMP)
 135   initMutex(&sm_mutex);
 136 #endif
 137
 138   /* allocate generation info array */
 139   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 140                                              * sizeof(struct generation_),
 141                                              "initStorage: gens");
 142
 143   /* Initialise all generations */
 144   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 145     gen = &generations[g];
 146     gen->no = g;
 147     gen->mut_list = allocBlock();
 148     gen->collections = 0;
 149     gen->failed_promotions = 0;
 150     gen->max_blocks = 0;
 151   }
 152
 153   /* A couple of convenience pointers */
 154   g0 = &generations[0];
 155   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 156
 157   /* Allocate step structures in each generation */
 158   if (RtsFlags.GcFlags.generations > 1) {
 159     /* Only for multiple-generations */
 160
 161     /* Oldest generation: one step */
 162     oldest_gen->n_steps = 1;
 163     oldest_gen->steps =
 164       stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step");
 165
 166     /* set up all except the oldest generation with 2 steps */
 167     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 168       generations[g].n_steps = RtsFlags.GcFlags.steps;
 169       generations[g].steps  =
 170         stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_),
 171                         "initStorage: steps");
 172     }
 173
 174   } else {
 175     /* single generation, i.e. a two-space collector */
 176     g0->n_steps = 1;
 177     g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
 178   }
 179
 180 #ifdef SMP
 181   n_nurseries = RtsFlags.ParFlags.nNodes;
 182   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 183                               "initStorage: nurseries");
 184 #else
 185   n_nurseries = 1;
 186   nurseries = g0->steps; // just share nurseries[0] with g0s0
 187 #endif
 188
 189   /* Initialise all steps */
 190   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 191     for (s = 0; s < generations[g].n_steps; s++) {
 192         initStep(&generations[g].steps[s], g, s);
 193     }
 194   }
 195
 196 #ifdef SMP
 197   for (s = 0; s < n_nurseries; s++) {
 198       initStep(&nurseries[s], 0, s);
 199   }
 200 #endif
 201
 202   /* Set up the destination pointers in each younger gen. step */
 203   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 204     for (s = 0; s < generations[g].n_steps-1; s++) {
 205       generations[g].steps[s].to = &generations[g].steps[s+1];
 206     }
 207     generations[g].steps[s].to = &generations[g+1].steps[0];
 208   }
 209   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 210
 211 #ifdef SMP
 212   for (s = 0; s < n_nurseries; s++) {
 213       nurseries[s].to = generations[0].steps[0].to;
 214   }
 215 #endif
 216
 217   /* The oldest generation has one step. */
 218   if (RtsFlags.GcFlags.compact) {
 219       if (RtsFlags.GcFlags.generations == 1) {
 220           errorBelch("WARNING: compaction is incompatible with -G1; disabled");
 221       } else {
 222           oldest_gen->steps[0].is_compacted = 1;
 223       }
 224   }
 225
 226 #ifdef SMP
 227   if (RtsFlags.GcFlags.generations == 1) {
 228       errorBelch("-G1 is incompatible with SMP");
 229       stg_exit(1);
 230   }
 231   // No -H, for now
 232   if (RtsFlags.GcFlags.heapSizeSuggestion > 0) {
 233       errorBelch("-H<size> is incompatible with SMP");
 234       stg_exit(1);
 235   }
 236 #endif
 237
 238   /* generation 0 is special: that's the nursery */
 239   generations[0].max_blocks = 0;
 240
 241   /* G0S0: the allocation area.  Policy: keep the allocation area
 242    * small to begin with, even if we have a large suggested heap
 243    * size.  Reason: we're going to do a major collection first, and we
 244    * don't want it to be a big one.  This vague idea is borne out by
 245    * rigorous experimental evidence.
 246    */
 247   g0s0 = &generations[0].steps[0];
 248
 249   allocNurseries();
 250
 251   weak_ptr_list = NULL;
 252   caf_list = NULL;
 253   revertible_caf_list = NULL;
 254
 255   /* initialise the allocate() interface */
 256   small_alloc_list = NULL;
 257   alloc_blocks = 0;
 258   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 259
 260   /* Tell GNU multi-precision pkg about our custom alloc functions */
 261   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 262
 263   IF_DEBUG(gc, statDescribeGens());
 264 }
 265
 266 void
 267 exitStorage (void)
 268 {
 269     stat_exit(calcAllocated());
 270 }
 271
 272 /* -----------------------------------------------------------------------------
 273    CAF management.
 274
 275    The entry code for every CAF does the following:
 276
 277       - builds a CAF_BLACKHOLE in the heap
 278       - pushes an update frame pointing to the CAF_BLACKHOLE
 279       - invokes UPD_CAF(), which:
 280           - calls newCaf, below
 281           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 282
 283    Why do we build a BLACKHOLE in the heap rather than just updating
 284    the thunk directly?  It's so that we only need one kind of update
 285    frame - otherwise we'd need a static version of the update frame too.
 286
 287    newCaf() does the following:
 288
 289       - it puts the CAF on the oldest generation's mut-once list.
 290         This is so that we can treat the CAF as a root when collecting
 291         younger generations.
 292
 293    For GHCI, we have additional requirements when dealing with CAFs:
 294
 295       - we must *retain* all dynamically-loaded CAFs ever entered,
 296         just in case we need them again.
 297       - we must be able to *revert* CAFs that have been evaluated, to
 298         their pre-evaluated form.
 299
 300       To do this, we use an additional CAF list.  When newCaf() is
 301       called on a dynamically-loaded CAF, we add it to the CAF list
 302       instead of the old-generation mutable list, and save away its
 303       old info pointer (in caf->saved_info) for later reversion.
 304
 305       To revert all the CAFs, we traverse the CAF list and reset the
 306       info pointer to caf->saved_info, then throw away the CAF list.
 307       (see GC.c:revertCAFs()).
 308
 309       -- SDM 29/1/01
 310
 311    -------------------------------------------------------------------------- */
 312
 313 void
 314 newCAF(StgClosure* caf)
 315 {
 316   ACQUIRE_SM_LOCK;
 317
 318   if(keepCAFs)
 319   {
 320     // HACK:
 321     // If we are in GHCi _and_ we are using dynamic libraries,
 322     // then we can't redirect newCAF calls to newDynCAF (see below),
 323     // so we make newCAF behave almost like newDynCAF.
 324     // The dynamic libraries might be used by both the interpreted
 325     // program and GHCi itself, so they must not be reverted.
 326     // This also means that in GHCi with dynamic libraries, CAFs are not
 327     // garbage collected. If this turns out to be a problem, we could
 328     // do another hack here and do an address range test on caf to figure
 329     // out whether it is from a dynamic library.
 330     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 331     ((StgIndStatic *)caf)->static_link = caf_list;
 332     caf_list = caf;
 333   }
 334   else
 335   {
 336     /* Put this CAF on the mutable list for the old generation.
 337     * This is a HACK - the IND_STATIC closure doesn't really have
 338     * a mut_link field, but we pretend it has - in fact we re-use
 339     * the STATIC_LINK field for the time being, because when we
 340     * come to do a major GC we won't need the mut_link field
 341     * any more and can use it as a STATIC_LINK.
 342     */
 343     ((StgIndStatic *)caf)->saved_info = NULL;
 344     recordMutableGen(caf, oldest_gen);
 345   }
 346
 347   RELEASE_SM_LOCK;
 348
 349 #ifdef PAR
 350   /* If we are PAR or DIST then  we never forget a CAF */
 351   { globalAddr *newGA;
 352     //debugBelch("<##> Globalising CAF %08x %s",caf,info_type(caf));
 353     newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
 354     ASSERT(newGA);
 355   }
 356 #endif /* PAR */
 357 }
 358
 359 // An alternate version of newCaf which is used for dynamically loaded
 360 // object code in GHCi.  In this case we want to retain *all* CAFs in
 361 // the object code, because they might be demanded at any time from an
 362 // expression evaluated on the command line.
 363 // Also, GHCi might want to revert CAFs, so we add these to the
 364 // revertible_caf_list.
 365 //
 366 // The linker hackily arranges that references to newCaf from dynamic
 367 // code end up pointing to newDynCAF.
 368 void
 369 newDynCAF(StgClosure *caf)
 370 {
 371     ACQUIRE_SM_LOCK;
 372
 373     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 374     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 375     revertible_caf_list = caf;
 376
 377     RELEASE_SM_LOCK;
 378 }
 379
 380 /* -----------------------------------------------------------------------------
 381    Nursery management.
 382    -------------------------------------------------------------------------- */
 383
 384 static bdescr *
 385 allocNursery (step *stp, bdescr *tail, nat blocks)
 386 {
 387     bdescr *bd;
 388     nat i;
 389
 390     // Allocate a nursery: we allocate fresh blocks one at a time and
 391     // cons them on to the front of the list, not forgetting to update
 392     // the back pointer on the tail of the list to point to the new block.
 393     for (i=0; i < blocks; i++) {
 394         // @LDV profiling
 395         /*
 396           processNursery() in LdvProfile.c assumes that every block group in
 397           the nursery contains only a single block. So, if a block group is
 398           given multiple blocks, change processNursery() accordingly.
 399         */
 400         bd = allocBlock();
 401         bd->link = tail;
 402         // double-link the nursery: we might need to insert blocks
 403         if (tail != NULL) {
 404             tail->u.back = bd;
 405         }
 406         bd->step = stp;
 407         bd->gen_no = 0;
 408         bd->flags = 0;
 409         bd->free = bd->start;
 410         tail = bd;
 411     }
 412     tail->u.back = NULL;
 413     return tail;
 414 }
 415
 416 static void
 417 assignNurseriesToCapabilities (void)
 418 {
 419 #ifdef SMP
 420     nat i;
 421
 422     for (i = 0; i < n_nurseries; i++) {
 423         capabilities[i].r.rNursery        = &nurseries[i];
 424         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 425     }
 426 #else /* SMP */
 427     MainCapability.r.rNursery        = &nurseries[0];
 428     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 429 #endif
 430 }
 431
 432 void
 433 allocNurseries( void )
 434 {
 435     nat i;
 436
 437     for (i = 0; i < n_nurseries; i++) {
 438         nurseries[i].blocks =
 439             allocNursery(&nurseries[i], NULL,
 440                          RtsFlags.GcFlags.minAllocAreaSize);
 441         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 442         nurseries[i].to_blocks   = NULL;
 443         nurseries[i].n_to_blocks = 0;
 444         /* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
 445     }
 446     assignNurseriesToCapabilities();
 447 }
 448
 449 void
 450 resetNurseries( void )
 451 {
 452     nat i;
 453     bdescr *bd;
 454     step *stp;
 455
 456     for (i = 0; i < n_nurseries; i++) {
 457         stp = &nurseries[i];
 458         for (bd = stp->blocks; bd; bd = bd->link) {
 459             bd->free = bd->start;
 460             ASSERT(bd->gen_no == 0);
 461             ASSERT(bd->step == stp);
 462             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 463         }
 464     }
 465     assignNurseriesToCapabilities();
 466 }
 467
 468 lnat
 469 countNurseryBlocks (void)
 470 {
 471     nat i;
 472     lnat blocks = 0;
 473
 474     for (i = 0; i < n_nurseries; i++) {
 475         blocks += nurseries[i].n_blocks;
 476     }
 477     return blocks;
 478 }
 479
 480 static void
 481 resizeNursery ( step *stp, nat blocks )
 482 {
 483   bdescr *bd;
 484   nat nursery_blocks;
 485
 486   nursery_blocks = stp->n_blocks;
 487   if (nursery_blocks == blocks) return;
 488
 489   if (nursery_blocks < blocks) {
 490     IF_DEBUG(gc, debugBelch("Increasing size of nursery to %d blocks\n",
 491                          blocks));
 492     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 493   }
 494   else {
 495     bdescr *next_bd;
 496
 497     IF_DEBUG(gc, debugBelch("Decreasing size of nursery to %d blocks\n",
 498                          blocks));
 499
 500     bd = stp->blocks;
 501     while (nursery_blocks > blocks) {
 502         next_bd = bd->link;
 503         next_bd->u.back = NULL;
 504         nursery_blocks -= bd->blocks; // might be a large block
 505         freeGroup(bd);
 506         bd = next_bd;
 507     }
 508     stp->blocks = bd;
 509     // might have gone just under, by freeing a large block, so make
 510     // up the difference.
 511     if (nursery_blocks < blocks) {
 512         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 513     }
 514   }
 515
 516   stp->n_blocks = blocks;
 517   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 518 }
 519
 520 //
 521 // Resize each of the nurseries to the specified size.
 522 //
 523 void
 524 resizeNurseries (nat blocks)
 525 {
 526     nat i;
 527     for (i = 0; i < n_nurseries; i++) {
 528         resizeNursery(&nurseries[i], blocks);
 529     }
 530 }
 531
 532 /* -----------------------------------------------------------------------------
 533    The allocate() interface
 534
 535    allocate(n) always succeeds, and returns a chunk of memory n words
 536    long.  n can be larger than the size of a block if necessary, in
 537    which case a contiguous block group will be allocated.
 538    -------------------------------------------------------------------------- */
 539
 540 StgPtr
 541 allocate( nat n )
 542 {
 543   bdescr *bd;
 544   StgPtr p;
 545
 546   ACQUIRE_SM_LOCK;
 547
 548   TICK_ALLOC_HEAP_NOCTR(n);
 549   CCS_ALLOC(CCCS,n);
 550
 551   /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 552   /* ToDo: allocate directly into generation 1 */
 553   if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 554     nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 555     bd = allocGroup(req_blocks);
 556     dbl_link_onto(bd, &g0s0->large_objects);
 557     g0s0->n_large_blocks += req_blocks;
 558     bd->gen_no  = 0;
 559     bd->step = g0s0;
 560     bd->flags = BF_LARGE;
 561     bd->free = bd->start + n;
 562     alloc_blocks += req_blocks;
 563     RELEASE_SM_LOCK;
 564     return bd->start;
 565
 566   /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 567   } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
 568     if (small_alloc_list) {
 569       small_alloc_list->free = alloc_Hp;
 570     }
 571     bd = allocBlock();
 572     bd->link = small_alloc_list;
 573     small_alloc_list = bd;
 574     bd->gen_no = 0;
 575     bd->step = g0s0;
 576     bd->flags = 0;
 577     alloc_Hp = bd->start;
 578     alloc_HpLim = bd->start + BLOCK_SIZE_W;
 579     alloc_blocks++;
 580   }
 581
 582   p = alloc_Hp;
 583   alloc_Hp += n;
 584   RELEASE_SM_LOCK;
 585   return p;
 586 }
 587
 588 lnat
 589 allocated_bytes( void )
 590 {
 591     lnat allocated;
 592
 593     allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
 594     if (pinned_object_block != NULL) {
 595         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 596             pinned_object_block->free;
 597     }
 598
 599     return allocated;
 600 }
 601
 602 void
 603 tidyAllocateLists (void)
 604 {
 605     if (small_alloc_list != NULL) {
 606         ASSERT(alloc_Hp >= small_alloc_list->start &&
 607                alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
 608         small_alloc_list->free = alloc_Hp;
 609     }
 610 }
 611
 612 /* ---------------------------------------------------------------------------
 613    Allocate a fixed/pinned object.
 614
 615    We allocate small pinned objects into a single block, allocating a
 616    new block when the current one overflows.  The block is chained
 617    onto the large_object_list of generation 0 step 0.
 618
 619    NOTE: The GC can't in general handle pinned objects.  This
 620    interface is only safe to use for ByteArrays, which have no
 621    pointers and don't require scavenging.  It works because the
 622    block's descriptor has the BF_LARGE flag set, so the block is
 623    treated as a large object and chained onto various lists, rather
 624    than the individual objects being copied.  However, when it comes
 625    to scavenge the block, the GC will only scavenge the first object.
 626    The reason is that the GC can't linearly scan a block of pinned
 627    objects at the moment (doing so would require using the
 628    mostly-copying techniques).  But since we're restricting ourselves
 629    to pinned ByteArrays, not scavenging is ok.
 630
 631    This function is called by newPinnedByteArray# which immediately
 632    fills the allocated memory with a MutableByteArray#.
 633    ------------------------------------------------------------------------- */
 634
 635 StgPtr
 636 allocatePinned( nat n )
 637 {
 638     StgPtr p;
 639     bdescr *bd = pinned_object_block;
 640
 641     // If the request is for a large object, then allocate()
 642     // will give us a pinned object anyway.
 643     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 644         return allocate(n);
 645     }
 646
 647     ACQUIRE_SM_LOCK;
 648
 649     TICK_ALLOC_HEAP_NOCTR(n);
 650     CCS_ALLOC(CCCS,n);
 651
 652     // we always return 8-byte aligned memory.  bd->free must be
 653     // 8-byte aligned to begin with, so we just round up n to
 654     // the nearest multiple of 8 bytes.
 655     if (sizeof(StgWord) == 4) {
 656         n = (n+1) & ~1;
 657     }
 658
 659     // If we don't have a block of pinned objects yet, or the current
 660     // one isn't large enough to hold the new object, allocate a new one.
 661     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 662         pinned_object_block = bd = allocBlock();
 663         dbl_link_onto(bd, &g0s0->large_objects);
 664         bd->gen_no = 0;
 665         bd->step   = g0s0;
 666         bd->flags  = BF_PINNED | BF_LARGE;
 667         bd->free   = bd->start;
 668         alloc_blocks++;
 669     }
 670
 671     p = bd->free;
 672     bd->free += n;
 673     RELEASE_SM_LOCK;
 674     return p;
 675 }
 676
 677 /* -----------------------------------------------------------------------------
 678    Allocation functions for GMP.
 679
 680    These all use the allocate() interface - we can't have any garbage
 681    collection going on during a gmp operation, so we use allocate()
 682    which always succeeds.  The gmp operations which might need to
 683    allocate will ask the storage manager (via doYouWantToGC()) whether
 684    a garbage collection is required, in case we get into a loop doing
 685    only allocate() style allocation.
 686    -------------------------------------------------------------------------- */
 687
 688 static void *
 689 stgAllocForGMP (size_t size_in_bytes)
 690 {
 691   StgArrWords* arr;
 692   nat data_size_in_words, total_size_in_words;
 693
 694   /* round up to a whole number of words */
 695   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 696   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 697
 698   /* allocate and fill it in. */
 699   arr = (StgArrWords *)allocate(total_size_in_words);
 700   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 701
 702   /* and return a ptr to the goods inside the array */
 703   return arr->payload;
 704 }
 705
 706 static void *
 707 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 708 {
 709     void *new_stuff_ptr = stgAllocForGMP(new_size);
 710     nat i = 0;
 711     char *p = (char *) ptr;
 712     char *q = (char *) new_stuff_ptr;
 713
 714     for (; i < old_size; i++, p++, q++) {
 715         *q = *p;
 716     }
 717
 718     return(new_stuff_ptr);
 719 }
 720
 721 static void
 722 stgDeallocForGMP (void *ptr STG_UNUSED,
 723                   size_t size STG_UNUSED)
 724 {
 725     /* easy for us: the garbage collector does the dealloc'n */
 726 }
 727
 728 /* -----------------------------------------------------------------------------
 729  * Stats and stuff
 730  * -------------------------------------------------------------------------- */
 731
 732 /* -----------------------------------------------------------------------------
 733  * calcAllocated()
 734  *
 735  * Approximate how much we've allocated: number of blocks in the
 736  * nursery + blocks allocated via allocate() - unused nusery blocks.
 737  * This leaves a little slop at the end of each block, and doesn't
 738  * take into account large objects (ToDo).
 739  * -------------------------------------------------------------------------- */
 740
 741 lnat
 742 calcAllocated( void )
 743 {
 744   nat allocated;
 745   bdescr *bd;
 746   nat i;
 747
 748   allocated = allocated_bytes();
 749   for (i = 0; i < n_nurseries; i++) {
 750       allocated += nurseries[i].n_blocks * BLOCK_SIZE_W;
 751   }
 752
 753 #ifdef SMP
 754   for (i = 0; i < n_nurseries; i++) {
 755       Capability *cap;
 756       for ( bd = capabilities[i].r.rCurrentNursery;
 757             bd != NULL; bd = bd->link ) {
 758           allocated -= BLOCK_SIZE_W;
 759       }
 760       cap = &capabilities[i];
 761       if (cap->r.rCurrentNursery->free <
 762           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 763           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 764               - cap->r.rCurrentNursery->free;
 765       }
 766   }
 767 #else
 768   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 769
 770   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 771       allocated -= BLOCK_SIZE_W;
 772   }
 773   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 774       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 775           - current_nursery->free;
 776   }
 777 #endif
 778
 779   total_allocated += allocated;
 780   return allocated;
 781 }
 782
 783 /* Approximate the amount of live data in the heap.  To be called just
 784  * after garbage collection (see GarbageCollect()).
 785  */
 786 extern lnat
 787 calcLive(void)
 788 {
 789   nat g, s;
 790   lnat live = 0;
 791   step *stp;
 792
 793   if (RtsFlags.GcFlags.generations == 1) {
 794     live = (g0s0->n_to_blocks - 1) * BLOCK_SIZE_W +
 795       ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
 796     return live;
 797   }
 798
 799   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 800     for (s = 0; s < generations[g].n_steps; s++) {
 801       /* approximate amount of live data (doesn't take into account slop
 802        * at end of each block).
 803        */
 804       if (g == 0 && s == 0) {
 805           continue;
 806       }
 807       stp = &generations[g].steps[s];
 808       live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
 809       if (stp->hp_bd != NULL) {
 810           live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
 811               / sizeof(W_);
 812       }
 813     }
 814   }
 815   return live;
 816 }
 817
 818 /* Approximate the number of blocks that will be needed at the next
 819  * garbage collection.
 820  *
 821  * Assume: all data currently live will remain live.  Steps that will
 822  * be collected next time will therefore need twice as many blocks
 823  * since all the data will be copied.
 824  */
 825 extern lnat
 826 calcNeeded(void)
 827 {
 828     lnat needed = 0;
 829     nat g, s;
 830     step *stp;
 831
 832     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 833         for (s = 0; s < generations[g].n_steps; s++) {
 834             if (g == 0 && s == 0) { continue; }
 835             stp = &generations[g].steps[s];
 836             if (generations[g].steps[0].n_blocks +
 837                 generations[g].steps[0].n_large_blocks
 838                 > generations[g].max_blocks
 839                 && stp->is_compacted == 0) {
 840                 needed += 2 * stp->n_blocks;
 841             } else {
 842                 needed += stp->n_blocks;
 843             }
 844         }
 845     }
 846     return needed;
 847 }
 848
 849 /* -----------------------------------------------------------------------------
 850    Debugging
 851
 852    memInventory() checks for memory leaks by counting up all the
 853    blocks we know about and comparing that to the number of blocks
 854    allegedly floating around in the system.
 855    -------------------------------------------------------------------------- */
 856
 857 #ifdef DEBUG
 858
 859 static lnat
 860 stepBlocks (step *stp)
 861 {
 862     lnat total_blocks;
 863     bdescr *bd;
 864
 865     total_blocks = stp->n_blocks;
 866     for (bd = stp->large_objects; bd; bd = bd->link) {
 867         total_blocks += bd->blocks;
 868         /* hack for megablock groups: they have an extra block or two in
 869            the second and subsequent megablocks where the block
 870            descriptors would normally go.
 871         */
 872         if (bd->blocks > BLOCKS_PER_MBLOCK) {
 873             total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
 874                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
 875         }
 876     }
 877     return total_blocks;
 878 }
 879
 880 void
 881 memInventory(void)
 882 {
 883   nat g, s, i;
 884   step *stp;
 885   bdescr *bd;
 886   lnat total_blocks = 0, free_blocks = 0;
 887
 888   /* count the blocks we current have */
 889
 890   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 891       for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 892           total_blocks += bd->blocks;
 893       }
 894       for (s = 0; s < generations[g].n_steps; s++) {
 895           if (g==0 && s==0) continue;
 896           stp = &generations[g].steps[s];
 897           total_blocks += stepBlocks(stp);
 898       }
 899   }
 900
 901   for (i = 0; i < n_nurseries; i++) {
 902       total_blocks += stepBlocks(&nurseries[i]);
 903   }
 904
 905   if (RtsFlags.GcFlags.generations == 1) {
 906       /* two-space collector has a to-space too :-) */
 907       total_blocks += g0s0->n_to_blocks;
 908   }
 909
 910   /* any blocks held by allocate() */
 911   for (bd = small_alloc_list; bd; bd = bd->link) {
 912     total_blocks += bd->blocks;
 913   }
 914
 915 #ifdef PROFILING
 916   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
 917       total_blocks += retainerStackBlocks();
 918   }
 919 #endif
 920
 921   // count the blocks allocated by the arena allocator
 922   total_blocks += arenaBlocks();
 923
 924   /* count the blocks on the free list */
 925   free_blocks = countFreeList();
 926
 927   if (total_blocks + free_blocks != mblocks_allocated *
 928       BLOCKS_PER_MBLOCK) {
 929     debugBelch("Blocks: %ld live + %ld free  = %ld total (%ld around)\n",
 930             total_blocks, free_blocks, total_blocks + free_blocks,
 931             mblocks_allocated * BLOCKS_PER_MBLOCK);
 932   }
 933
 934   ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
 935 }
 936
 937
 938 nat
 939 countBlocks(bdescr *bd)
 940 {
 941     nat n;
 942     for (n=0; bd != NULL; bd=bd->link) {
 943         n += bd->blocks;
 944     }
 945     return n;
 946 }
 947
 948 /* Full heap sanity check. */
 949 void
 950 checkSanity( void )
 951 {
 952     nat g, s;
 953
 954     if (RtsFlags.GcFlags.generations == 1) {
 955         checkHeap(g0s0->to_blocks);
 956         checkChain(g0s0->large_objects);
 957     } else {
 958
 959         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 960             for (s = 0; s < generations[g].n_steps; s++) {
 961                 if (g == 0 && s == 0) { continue; }
 962                 ASSERT(countBlocks(generations[g].steps[s].blocks)
 963                        == generations[g].steps[s].n_blocks);
 964                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
 965                        == generations[g].steps[s].n_large_blocks);
 966                 checkHeap(generations[g].steps[s].blocks);
 967                 checkChain(generations[g].steps[s].large_objects);
 968                 if (g > 0) {
 969                     checkMutableList(generations[g].mut_list, g);
 970                 }
 971             }
 972         }
 973
 974         for (s = 0; s < n_nurseries; s++) {
 975             ASSERT(countBlocks(generations[g].steps[s].blocks)
 976                    == generations[g].steps[s].n_blocks);
 977             ASSERT(countBlocks(generations[g].steps[s].large_objects)
 978                    == generations[g].steps[s].n_large_blocks);
 979         }
 980
 981         checkFreeListSanity();
 982     }
 983 }
 984
 985 // handy function for use in gdb, because Bdescr() is inlined.
 986 extern bdescr *_bdescr( StgPtr p );
 987
 988 bdescr *
 989 _bdescr( StgPtr p )
 990 {
 991     return Bdescr(p);
 992 }
 993
 994 #endif