ghc/rts/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2004
   4  *
   5  * Storage manager front end
   6  *
   7  * ---------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #include "Rts.h"
  11 #include "RtsUtils.h"
  12 #include "RtsFlags.h"
  13 #include "Stats.h"
  14 #include "Hooks.h"
  15 #include "BlockAlloc.h"
  16 #include "MBlock.h"
  17 #include "Weak.h"
  18 #include "Sanity.h"
  19 #include "Arena.h"
  20 #include "OSThreads.h"
  21 #include "Capability.h"
  22 #include "Storage.h"
  23 #include "Schedule.h"
  24 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  25
  26 #include <stdlib.h>
  27 #include <string.h>
  28
  29 /*
  30  * All these globals require sm_mutex to access in THREADED_RTS mode.
  31  */
  32 StgClosure    *caf_list         = NULL;
  33 StgClosure    *revertible_caf_list = NULL;
  34 rtsBool       keepCAFs;
  35
  36 bdescr *small_alloc_list;       /* allocate()d small objects */
  37 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  38 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  39 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  40
  41 StgPtr alloc_Hp    = NULL;      /* next free byte in small_alloc_list */
  42 StgPtr alloc_HpLim = NULL;      /* end of block at small_alloc_list   */
  43
  44 generation *generations = NULL; /* all the generations */
  45 generation *g0          = NULL; /* generation 0, for convenience */
  46 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  47 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  48
  49 ullong total_allocated = 0;     /* total memory allocated during run */
  50
  51 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  52 step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
  53
  54 #ifdef THREADED_RTS
  55 /*
  56  * Storage manager mutex:  protects all the above state from
  57  * simultaneous access by two STG threads.
  58  */
  59 Mutex sm_mutex;
  60 /*
  61  * This mutex is used by atomicModifyMutVar# only
  62  */
  63 Mutex atomic_modify_mutvar_mutex;
  64 #endif
  65
  66
  67 /*
  68  * Forward references
  69  */
  70 static void *stgAllocForGMP   (size_t size_in_bytes);
  71 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  72 static void  stgDeallocForGMP (void *ptr, size_t size);
  73
  74 static void
  75 initStep (step *stp, int g, int s)
  76 {
  77     stp->no = s;
  78     stp->blocks = NULL;
  79     stp->n_blocks = 0;
  80     stp->old_blocks = NULL;
  81     stp->n_old_blocks = 0;
  82     stp->gen = &generations[g];
  83     stp->gen_no = g;
  84     stp->hp = NULL;
  85     stp->hpLim = NULL;
  86     stp->hp_bd = NULL;
  87     stp->scavd_hp = NULL;
  88     stp->scavd_hpLim = NULL;
  89     stp->scan = NULL;
  90     stp->scan_bd = NULL;
  91     stp->large_objects = NULL;
  92     stp->n_large_blocks = 0;
  93     stp->new_large_objects = NULL;
  94     stp->scavenged_large_objects = NULL;
  95     stp->n_scavenged_large_blocks = 0;
  96     stp->is_compacted = 0;
  97     stp->bitmap = NULL;
  98 }
  99
 100 void
 101 initStorage( void )
 102 {
 103   nat g, s;
 104   generation *gen;
 105
 106   if (generations != NULL) {
 107       // multi-init protection
 108       return;
 109   }
 110
 111   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 112    * doing something reasonable.
 113    */
 114   ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info));
 115   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 116   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 117
 118   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 119       RtsFlags.GcFlags.heapSizeSuggestion >
 120       RtsFlags.GcFlags.maxHeapSize) {
 121     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 122   }
 123
 124   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 125       RtsFlags.GcFlags.minAllocAreaSize >
 126       RtsFlags.GcFlags.maxHeapSize) {
 127       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 128       exit(1);
 129   }
 130
 131   initBlockAllocator();
 132
 133 #if defined(THREADED_RTS)
 134   initMutex(&sm_mutex);
 135 #endif
 136
 137   ACQUIRE_SM_LOCK;
 138
 139   /* allocate generation info array */
 140   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 141                                              * sizeof(struct generation_),
 142                                              "initStorage: gens");
 143
 144   /* Initialise all generations */
 145   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 146     gen = &generations[g];
 147     gen->no = g;
 148     gen->mut_list = allocBlock();
 149     gen->collections = 0;
 150     gen->failed_promotions = 0;
 151     gen->max_blocks = 0;
 152   }
 153
 154   /* A couple of convenience pointers */
 155   g0 = &generations[0];
 156   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 157
 158   /* Allocate step structures in each generation */
 159   if (RtsFlags.GcFlags.generations > 1) {
 160     /* Only for multiple-generations */
 161
 162     /* Oldest generation: one step */
 163     oldest_gen->n_steps = 1;
 164     oldest_gen->steps =
 165       stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step");
 166
 167     /* set up all except the oldest generation with 2 steps */
 168     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 169       generations[g].n_steps = RtsFlags.GcFlags.steps;
 170       generations[g].steps  =
 171         stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_),
 172                         "initStorage: steps");
 173     }
 174
 175   } else {
 176     /* single generation, i.e. a two-space collector */
 177     g0->n_steps = 1;
 178     g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
 179   }
 180
 181 #ifdef THREADED_RTS
 182   n_nurseries = n_capabilities;
 183   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 184                               "initStorage: nurseries");
 185 #else
 186   n_nurseries = 1;
 187   nurseries = g0->steps; // just share nurseries[0] with g0s0
 188 #endif
 189
 190   /* Initialise all steps */
 191   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 192     for (s = 0; s < generations[g].n_steps; s++) {
 193         initStep(&generations[g].steps[s], g, s);
 194     }
 195   }
 196
 197 #ifdef THREADED_RTS
 198   for (s = 0; s < n_nurseries; s++) {
 199       initStep(&nurseries[s], 0, s);
 200   }
 201 #endif
 202
 203   /* Set up the destination pointers in each younger gen. step */
 204   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 205     for (s = 0; s < generations[g].n_steps-1; s++) {
 206       generations[g].steps[s].to = &generations[g].steps[s+1];
 207     }
 208     generations[g].steps[s].to = &generations[g+1].steps[0];
 209   }
 210   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 211
 212 #ifdef THREADED_RTS
 213   for (s = 0; s < n_nurseries; s++) {
 214       nurseries[s].to = generations[0].steps[0].to;
 215   }
 216 #endif
 217
 218   /* The oldest generation has one step. */
 219   if (RtsFlags.GcFlags.compact) {
 220       if (RtsFlags.GcFlags.generations == 1) {
 221           errorBelch("WARNING: compaction is incompatible with -G1; disabled");
 222       } else {
 223           oldest_gen->steps[0].is_compacted = 1;
 224       }
 225   }
 226
 227 #ifdef THREADED_RTS
 228   if (RtsFlags.GcFlags.generations == 1) {
 229       errorBelch("-G1 is incompatible with -threaded");
 230       stg_exit(EXIT_FAILURE);
 231   }
 232 #endif
 233
 234   /* generation 0 is special: that's the nursery */
 235   generations[0].max_blocks = 0;
 236
 237   /* G0S0: the allocation area.  Policy: keep the allocation area
 238    * small to begin with, even if we have a large suggested heap
 239    * size.  Reason: we're going to do a major collection first, and we
 240    * don't want it to be a big one.  This vague idea is borne out by
 241    * rigorous experimental evidence.
 242    */
 243   g0s0 = &generations[0].steps[0];
 244
 245   allocNurseries();
 246
 247   weak_ptr_list = NULL;
 248   caf_list = NULL;
 249   revertible_caf_list = NULL;
 250
 251   /* initialise the allocate() interface */
 252   small_alloc_list = NULL;
 253   alloc_blocks = 0;
 254   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 255
 256   /* Tell GNU multi-precision pkg about our custom alloc functions */
 257   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 258
 259   IF_DEBUG(gc, statDescribeGens());
 260
 261   RELEASE_SM_LOCK;
 262 }
 263
 264 void
 265 exitStorage (void)
 266 {
 267     stat_exit(calcAllocated());
 268 }
 269
 270 /* -----------------------------------------------------------------------------
 271    CAF management.
 272
 273    The entry code for every CAF does the following:
 274
 275       - builds a CAF_BLACKHOLE in the heap
 276       - pushes an update frame pointing to the CAF_BLACKHOLE
 277       - invokes UPD_CAF(), which:
 278           - calls newCaf, below
 279           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 280
 281    Why do we build a BLACKHOLE in the heap rather than just updating
 282    the thunk directly?  It's so that we only need one kind of update
 283    frame - otherwise we'd need a static version of the update frame too.
 284
 285    newCaf() does the following:
 286
 287       - it puts the CAF on the oldest generation's mut-once list.
 288         This is so that we can treat the CAF as a root when collecting
 289         younger generations.
 290
 291    For GHCI, we have additional requirements when dealing with CAFs:
 292
 293       - we must *retain* all dynamically-loaded CAFs ever entered,
 294         just in case we need them again.
 295       - we must be able to *revert* CAFs that have been evaluated, to
 296         their pre-evaluated form.
 297
 298       To do this, we use an additional CAF list.  When newCaf() is
 299       called on a dynamically-loaded CAF, we add it to the CAF list
 300       instead of the old-generation mutable list, and save away its
 301       old info pointer (in caf->saved_info) for later reversion.
 302
 303       To revert all the CAFs, we traverse the CAF list and reset the
 304       info pointer to caf->saved_info, then throw away the CAF list.
 305       (see GC.c:revertCAFs()).
 306
 307       -- SDM 29/1/01
 308
 309    -------------------------------------------------------------------------- */
 310
 311 void
 312 newCAF(StgClosure* caf)
 313 {
 314   ACQUIRE_SM_LOCK;
 315
 316   if(keepCAFs)
 317   {
 318     // HACK:
 319     // If we are in GHCi _and_ we are using dynamic libraries,
 320     // then we can't redirect newCAF calls to newDynCAF (see below),
 321     // so we make newCAF behave almost like newDynCAF.
 322     // The dynamic libraries might be used by both the interpreted
 323     // program and GHCi itself, so they must not be reverted.
 324     // This also means that in GHCi with dynamic libraries, CAFs are not
 325     // garbage collected. If this turns out to be a problem, we could
 326     // do another hack here and do an address range test on caf to figure
 327     // out whether it is from a dynamic library.
 328     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 329     ((StgIndStatic *)caf)->static_link = caf_list;
 330     caf_list = caf;
 331   }
 332   else
 333   {
 334     /* Put this CAF on the mutable list for the old generation.
 335     * This is a HACK - the IND_STATIC closure doesn't really have
 336     * a mut_link field, but we pretend it has - in fact we re-use
 337     * the STATIC_LINK field for the time being, because when we
 338     * come to do a major GC we won't need the mut_link field
 339     * any more and can use it as a STATIC_LINK.
 340     */
 341     ((StgIndStatic *)caf)->saved_info = NULL;
 342     recordMutableGen(caf, oldest_gen);
 343   }
 344
 345   RELEASE_SM_LOCK;
 346
 347 #ifdef PAR
 348   /* If we are PAR or DIST then  we never forget a CAF */
 349   { globalAddr *newGA;
 350     //debugBelch("<##> Globalising CAF %08x %s",caf,info_type(caf));
 351     newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
 352     ASSERT(newGA);
 353   }
 354 #endif /* PAR */
 355 }
 356
 357 // An alternate version of newCaf which is used for dynamically loaded
 358 // object code in GHCi.  In this case we want to retain *all* CAFs in
 359 // the object code, because they might be demanded at any time from an
 360 // expression evaluated on the command line.
 361 // Also, GHCi might want to revert CAFs, so we add these to the
 362 // revertible_caf_list.
 363 //
 364 // The linker hackily arranges that references to newCaf from dynamic
 365 // code end up pointing to newDynCAF.
 366 void
 367 newDynCAF(StgClosure *caf)
 368 {
 369     ACQUIRE_SM_LOCK;
 370
 371     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 372     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 373     revertible_caf_list = caf;
 374
 375     RELEASE_SM_LOCK;
 376 }
 377
 378 /* -----------------------------------------------------------------------------
 379    Nursery management.
 380    -------------------------------------------------------------------------- */
 381
 382 static bdescr *
 383 allocNursery (step *stp, bdescr *tail, nat blocks)
 384 {
 385     bdescr *bd;
 386     nat i;
 387
 388     // Allocate a nursery: we allocate fresh blocks one at a time and
 389     // cons them on to the front of the list, not forgetting to update
 390     // the back pointer on the tail of the list to point to the new block.
 391     for (i=0; i < blocks; i++) {
 392         // @LDV profiling
 393         /*
 394           processNursery() in LdvProfile.c assumes that every block group in
 395           the nursery contains only a single block. So, if a block group is
 396           given multiple blocks, change processNursery() accordingly.
 397         */
 398         bd = allocBlock();
 399         bd->link = tail;
 400         // double-link the nursery: we might need to insert blocks
 401         if (tail != NULL) {
 402             tail->u.back = bd;
 403         }
 404         bd->step = stp;
 405         bd->gen_no = 0;
 406         bd->flags = 0;
 407         bd->free = bd->start;
 408         tail = bd;
 409     }
 410     tail->u.back = NULL;
 411     return tail;
 412 }
 413
 414 static void
 415 assignNurseriesToCapabilities (void)
 416 {
 417 #ifdef THREADED_RTS
 418     nat i;
 419
 420     for (i = 0; i < n_nurseries; i++) {
 421         capabilities[i].r.rNursery        = &nurseries[i];
 422         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 423         capabilities[i].r.rCurrentAlloc   = NULL;
 424     }
 425 #else /* THREADED_RTS */
 426     MainCapability.r.rNursery        = &nurseries[0];
 427     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 428     MainCapability.r.rCurrentAlloc   = NULL;
 429 #endif
 430 }
 431
 432 void
 433 allocNurseries( void )
 434 {
 435     nat i;
 436
 437     for (i = 0; i < n_nurseries; i++) {
 438         nurseries[i].blocks =
 439             allocNursery(&nurseries[i], NULL,
 440                          RtsFlags.GcFlags.minAllocAreaSize);
 441         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 442         nurseries[i].old_blocks   = NULL;
 443         nurseries[i].n_old_blocks = 0;
 444         /* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
 445     }
 446     assignNurseriesToCapabilities();
 447 }
 448
 449 void
 450 resetNurseries( void )
 451 {
 452     nat i;
 453     bdescr *bd;
 454     step *stp;
 455
 456     for (i = 0; i < n_nurseries; i++) {
 457         stp = &nurseries[i];
 458         for (bd = stp->blocks; bd; bd = bd->link) {
 459             bd->free = bd->start;
 460             ASSERT(bd->gen_no == 0);
 461             ASSERT(bd->step == stp);
 462             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 463         }
 464     }
 465     assignNurseriesToCapabilities();
 466 }
 467
 468 lnat
 469 countNurseryBlocks (void)
 470 {
 471     nat i;
 472     lnat blocks = 0;
 473
 474     for (i = 0; i < n_nurseries; i++) {
 475         blocks += nurseries[i].n_blocks;
 476     }
 477     return blocks;
 478 }
 479
 480 static void
 481 resizeNursery ( step *stp, nat blocks )
 482 {
 483   bdescr *bd;
 484   nat nursery_blocks;
 485
 486   nursery_blocks = stp->n_blocks;
 487   if (nursery_blocks == blocks) return;
 488
 489   if (nursery_blocks < blocks) {
 490     IF_DEBUG(gc, debugBelch("Increasing size of nursery to %d blocks\n",
 491                          blocks));
 492     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 493   }
 494   else {
 495     bdescr *next_bd;
 496
 497     IF_DEBUG(gc, debugBelch("Decreasing size of nursery to %d blocks\n",
 498                          blocks));
 499
 500     bd = stp->blocks;
 501     while (nursery_blocks > blocks) {
 502         next_bd = bd->link;
 503         next_bd->u.back = NULL;
 504         nursery_blocks -= bd->blocks; // might be a large block
 505         freeGroup(bd);
 506         bd = next_bd;
 507     }
 508     stp->blocks = bd;
 509     // might have gone just under, by freeing a large block, so make
 510     // up the difference.
 511     if (nursery_blocks < blocks) {
 512         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 513     }
 514   }
 515
 516   stp->n_blocks = blocks;
 517   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 518 }
 519
 520 //
 521 // Resize each of the nurseries to the specified size.
 522 //
 523 void
 524 resizeNurseriesFixed (nat blocks)
 525 {
 526     nat i;
 527     for (i = 0; i < n_nurseries; i++) {
 528         resizeNursery(&nurseries[i], blocks);
 529     }
 530 }
 531
 532 //
 533 // Resize the nurseries to the total specified size.
 534 //
 535 void
 536 resizeNurseries (nat blocks)
 537 {
 538     // If there are multiple nurseries, then we just divide the number
 539     // of available blocks between them.
 540     resizeNurseriesFixed(blocks / n_nurseries);
 541 }
 542
 543 /* -----------------------------------------------------------------------------
 544    The allocate() interface
 545
 546    allocate(n) always succeeds, and returns a chunk of memory n words
 547    long.  n can be larger than the size of a block if necessary, in
 548    which case a contiguous block group will be allocated.
 549    -------------------------------------------------------------------------- */
 550
 551 StgPtr
 552 allocate( nat n )
 553 {
 554     bdescr *bd;
 555     StgPtr p;
 556
 557     ACQUIRE_SM_LOCK;
 558
 559     TICK_ALLOC_HEAP_NOCTR(n);
 560     CCS_ALLOC(CCCS,n);
 561
 562     /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 563     /* ToDo: allocate directly into generation 1 */
 564     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 565         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 566         bd = allocGroup(req_blocks);
 567         dbl_link_onto(bd, &g0s0->large_objects);
 568         g0s0->n_large_blocks += req_blocks;
 569         bd->gen_no  = 0;
 570         bd->step = g0s0;
 571         bd->flags = BF_LARGE;
 572         bd->free = bd->start + n;
 573         alloc_blocks += req_blocks;
 574         RELEASE_SM_LOCK;
 575         return bd->start;
 576
 577         /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 578     } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
 579         if (small_alloc_list) {
 580             small_alloc_list->free = alloc_Hp;
 581         }
 582         bd = allocBlock();
 583         bd->link = small_alloc_list;
 584         small_alloc_list = bd;
 585         bd->gen_no = 0;
 586         bd->step = g0s0;
 587         bd->flags = 0;
 588         alloc_Hp = bd->start;
 589         alloc_HpLim = bd->start + BLOCK_SIZE_W;
 590         alloc_blocks++;
 591     }
 592
 593     p = alloc_Hp;
 594     alloc_Hp += n;
 595     RELEASE_SM_LOCK;
 596     return p;
 597 }
 598
 599 lnat
 600 allocated_bytes( void )
 601 {
 602     lnat allocated;
 603
 604     allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
 605     if (pinned_object_block != NULL) {
 606         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 607             pinned_object_block->free;
 608     }
 609
 610     return allocated;
 611 }
 612
 613 void
 614 tidyAllocateLists (void)
 615 {
 616     if (small_alloc_list != NULL) {
 617         ASSERT(alloc_Hp >= small_alloc_list->start &&
 618                alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
 619         small_alloc_list->free = alloc_Hp;
 620     }
 621 }
 622
 623 /* -----------------------------------------------------------------------------
 624    allocateLocal()
 625
 626    This allocates memory in the current thread - it is intended for
 627    use primarily from STG-land where we have a Capability.  It is
 628    better than allocate() because it doesn't require taking the
 629    sm_mutex lock in the common case.
 630
 631    Memory is allocated directly from the nursery if possible (but not
 632    from the current nursery block, so as not to interfere with
 633    Hp/HpLim).
 634    -------------------------------------------------------------------------- */
 635
 636 StgPtr
 637 allocateLocal (Capability *cap, nat n)
 638 {
 639     bdescr *bd;
 640     StgPtr p;
 641
 642     TICK_ALLOC_HEAP_NOCTR(n);
 643     CCS_ALLOC(CCCS,n);
 644
 645     /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 646     /* ToDo: allocate directly into generation 1 */
 647     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 648         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 649         ACQUIRE_SM_LOCK;
 650         bd = allocGroup(req_blocks);
 651         dbl_link_onto(bd, &g0s0->large_objects);
 652         g0s0->n_large_blocks += req_blocks;
 653         bd->gen_no  = 0;
 654         bd->step = g0s0;
 655         bd->flags = BF_LARGE;
 656         bd->free = bd->start + n;
 657         alloc_blocks += req_blocks;
 658         RELEASE_SM_LOCK;
 659         return bd->start;
 660
 661         /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 662     } else {
 663
 664         bd = cap->r.rCurrentAlloc;
 665         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 666
 667             // The CurrentAlloc block is full, we need to find another
 668             // one.  First, we try taking the next block from the
 669             // nursery:
 670             bd = cap->r.rCurrentNursery->link;
 671
 672             if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 673                 // The nursery is empty, or the next block is already
 674                 // full: allocate a fresh block (we can't fail here).
 675                 ACQUIRE_SM_LOCK;
 676                 bd = allocBlock();
 677                 cap->r.rNursery->n_blocks++;
 678                 RELEASE_SM_LOCK;
 679                 bd->gen_no = 0;
 680                 bd->step = cap->r.rNursery;
 681                 bd->flags = 0;
 682             } else {
 683                 // we have a block in the nursery: take it and put
 684                 // it at the *front* of the nursery list, and use it
 685                 // to allocate() from.
 686                 cap->r.rCurrentNursery->link = bd->link;
 687                 if (bd->link != NULL) {
 688                     bd->link->u.back = cap->r.rCurrentNursery;
 689                 }
 690             }
 691             dbl_link_onto(bd, &cap->r.rNursery->blocks);
 692             cap->r.rCurrentAlloc = bd;
 693             IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 694         }
 695     }
 696     p = bd->free;
 697     bd->free += n;
 698     return p;
 699 }
 700
 701 /* ---------------------------------------------------------------------------
 702    Allocate a fixed/pinned object.
 703
 704    We allocate small pinned objects into a single block, allocating a
 705    new block when the current one overflows.  The block is chained
 706    onto the large_object_list of generation 0 step 0.
 707
 708    NOTE: The GC can't in general handle pinned objects.  This
 709    interface is only safe to use for ByteArrays, which have no
 710    pointers and don't require scavenging.  It works because the
 711    block's descriptor has the BF_LARGE flag set, so the block is
 712    treated as a large object and chained onto various lists, rather
 713    than the individual objects being copied.  However, when it comes
 714    to scavenge the block, the GC will only scavenge the first object.
 715    The reason is that the GC can't linearly scan a block of pinned
 716    objects at the moment (doing so would require using the
 717    mostly-copying techniques).  But since we're restricting ourselves
 718    to pinned ByteArrays, not scavenging is ok.
 719
 720    This function is called by newPinnedByteArray# which immediately
 721    fills the allocated memory with a MutableByteArray#.
 722    ------------------------------------------------------------------------- */
 723
 724 StgPtr
 725 allocatePinned( nat n )
 726 {
 727     StgPtr p;
 728     bdescr *bd = pinned_object_block;
 729
 730     // If the request is for a large object, then allocate()
 731     // will give us a pinned object anyway.
 732     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 733         return allocate(n);
 734     }
 735
 736     ACQUIRE_SM_LOCK;
 737
 738     TICK_ALLOC_HEAP_NOCTR(n);
 739     CCS_ALLOC(CCCS,n);
 740
 741     // we always return 8-byte aligned memory.  bd->free must be
 742     // 8-byte aligned to begin with, so we just round up n to
 743     // the nearest multiple of 8 bytes.
 744     if (sizeof(StgWord) == 4) {
 745         n = (n+1) & ~1;
 746     }
 747
 748     // If we don't have a block of pinned objects yet, or the current
 749     // one isn't large enough to hold the new object, allocate a new one.
 750     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 751         pinned_object_block = bd = allocBlock();
 752         dbl_link_onto(bd, &g0s0->large_objects);
 753         bd->gen_no = 0;
 754         bd->step   = g0s0;
 755         bd->flags  = BF_PINNED | BF_LARGE;
 756         bd->free   = bd->start;
 757         alloc_blocks++;
 758     }
 759
 760     p = bd->free;
 761     bd->free += n;
 762     RELEASE_SM_LOCK;
 763     return p;
 764 }
 765
 766 /* -----------------------------------------------------------------------------
 767    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 768    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 769    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 770    and is put on the mutable list.
 771    -------------------------------------------------------------------------- */
 772
 773 void
 774 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 775 {
 776     Capability *cap = regTableToCapability(reg);
 777     bdescr *bd;
 778     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 779         p->header.info = &stg_MUT_VAR_DIRTY_info;
 780         bd = Bdescr((StgPtr)p);
 781         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 782     }
 783 }
 784
 785 /* -----------------------------------------------------------------------------
 786    Allocation functions for GMP.
 787
 788    These all use the allocate() interface - we can't have any garbage
 789    collection going on during a gmp operation, so we use allocate()
 790    which always succeeds.  The gmp operations which might need to
 791    allocate will ask the storage manager (via doYouWantToGC()) whether
 792    a garbage collection is required, in case we get into a loop doing
 793    only allocate() style allocation.
 794    -------------------------------------------------------------------------- */
 795
 796 static void *
 797 stgAllocForGMP (size_t size_in_bytes)
 798 {
 799   StgArrWords* arr;
 800   nat data_size_in_words, total_size_in_words;
 801
 802   /* round up to a whole number of words */
 803   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 804   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 805
 806   /* allocate and fill it in. */
 807 #if defined(THREADED_RTS)
 808   arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
 809 #else
 810   arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
 811 #endif
 812   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 813
 814   /* and return a ptr to the goods inside the array */
 815   return arr->payload;
 816 }
 817
 818 static void *
 819 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 820 {
 821     void *new_stuff_ptr = stgAllocForGMP(new_size);
 822     nat i = 0;
 823     char *p = (char *) ptr;
 824     char *q = (char *) new_stuff_ptr;
 825
 826     for (; i < old_size; i++, p++, q++) {
 827         *q = *p;
 828     }
 829
 830     return(new_stuff_ptr);
 831 }
 832
 833 static void
 834 stgDeallocForGMP (void *ptr STG_UNUSED,
 835                   size_t size STG_UNUSED)
 836 {
 837     /* easy for us: the garbage collector does the dealloc'n */
 838 }
 839
 840 /* -----------------------------------------------------------------------------
 841  * Stats and stuff
 842  * -------------------------------------------------------------------------- */
 843
 844 /* -----------------------------------------------------------------------------
 845  * calcAllocated()
 846  *
 847  * Approximate how much we've allocated: number of blocks in the
 848  * nursery + blocks allocated via allocate() - unused nusery blocks.
 849  * This leaves a little slop at the end of each block, and doesn't
 850  * take into account large objects (ToDo).
 851  * -------------------------------------------------------------------------- */
 852
 853 lnat
 854 calcAllocated( void )
 855 {
 856   nat allocated;
 857   bdescr *bd;
 858
 859   allocated = allocated_bytes();
 860   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 861
 862   {
 863 #ifdef THREADED_RTS
 864   nat i;
 865   for (i = 0; i < n_nurseries; i++) {
 866       Capability *cap;
 867       for ( bd = capabilities[i].r.rCurrentNursery->link;
 868             bd != NULL; bd = bd->link ) {
 869           allocated -= BLOCK_SIZE_W;
 870       }
 871       cap = &capabilities[i];
 872       if (cap->r.rCurrentNursery->free <
 873           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 874           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 875               - cap->r.rCurrentNursery->free;
 876       }
 877   }
 878 #else
 879   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 880
 881   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 882       allocated -= BLOCK_SIZE_W;
 883   }
 884   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 885       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 886           - current_nursery->free;
 887   }
 888 #endif
 889   }
 890
 891   total_allocated += allocated;
 892   return allocated;
 893 }
 894
 895 /* Approximate the amount of live data in the heap.  To be called just
 896  * after garbage collection (see GarbageCollect()).
 897  */
 898 extern lnat
 899 calcLive(void)
 900 {
 901   nat g, s;
 902   lnat live = 0;
 903   step *stp;
 904
 905   if (RtsFlags.GcFlags.generations == 1) {
 906     live = (g0s0->n_blocks - 1) * BLOCK_SIZE_W +
 907       ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
 908     return live;
 909   }
 910
 911   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 912     for (s = 0; s < generations[g].n_steps; s++) {
 913       /* approximate amount of live data (doesn't take into account slop
 914        * at end of each block).
 915        */
 916       if (g == 0 && s == 0) {
 917           continue;
 918       }
 919       stp = &generations[g].steps[s];
 920       live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
 921       if (stp->hp_bd != NULL) {
 922           live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
 923               / sizeof(W_);
 924       }
 925       if (stp->scavd_hp != NULL) {
 926           live -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
 927       }
 928     }
 929   }
 930   return live;
 931 }
 932
 933 /* Approximate the number of blocks that will be needed at the next
 934  * garbage collection.
 935  *
 936  * Assume: all data currently live will remain live.  Steps that will
 937  * be collected next time will therefore need twice as many blocks
 938  * since all the data will be copied.
 939  */
 940 extern lnat
 941 calcNeeded(void)
 942 {
 943     lnat needed = 0;
 944     nat g, s;
 945     step *stp;
 946
 947     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 948         for (s = 0; s < generations[g].n_steps; s++) {
 949             if (g == 0 && s == 0) { continue; }
 950             stp = &generations[g].steps[s];
 951             if (generations[g].steps[0].n_blocks +
 952                 generations[g].steps[0].n_large_blocks
 953                 > generations[g].max_blocks
 954                 && stp->is_compacted == 0) {
 955                 needed += 2 * stp->n_blocks;
 956             } else {
 957                 needed += stp->n_blocks;
 958             }
 959         }
 960     }
 961     return needed;
 962 }
 963
 964 /* -----------------------------------------------------------------------------
 965    Debugging
 966
 967    memInventory() checks for memory leaks by counting up all the
 968    blocks we know about and comparing that to the number of blocks
 969    allegedly floating around in the system.
 970    -------------------------------------------------------------------------- */
 971
 972 #ifdef DEBUG
 973
 974 static lnat
 975 stepBlocks (step *stp)
 976 {
 977     lnat total_blocks;
 978     bdescr *bd;
 979
 980     total_blocks = stp->n_blocks;
 981     total_blocks += stp->n_old_blocks;
 982     for (bd = stp->large_objects; bd; bd = bd->link) {
 983         total_blocks += bd->blocks;
 984         /* hack for megablock groups: they have an extra block or two in
 985            the second and subsequent megablocks where the block
 986            descriptors would normally go.
 987         */
 988         if (bd->blocks > BLOCKS_PER_MBLOCK) {
 989             total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
 990                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
 991         }
 992     }
 993     return total_blocks;
 994 }
 995
 996 void
 997 memInventory(void)
 998 {
 999   nat g, s, i;
1000   step *stp;
1001   bdescr *bd;
1002   lnat total_blocks = 0, free_blocks = 0;
1003
1004   /* count the blocks we current have */
1005
1006   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1007       for (i = 0; i < n_capabilities; i++) {
1008           for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) {
1009               total_blocks += bd->blocks;
1010           }
1011       }
1012       for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1013           total_blocks += bd->blocks;
1014       }
1015       for (s = 0; s < generations[g].n_steps; s++) {
1016           if (g==0 && s==0) continue;
1017           stp = &generations[g].steps[s];
1018           total_blocks += stepBlocks(stp);
1019       }
1020   }
1021
1022   for (i = 0; i < n_nurseries; i++) {
1023       total_blocks += stepBlocks(&nurseries[i]);
1024   }
1025 #ifdef THREADED_RTS
1026   // We put pinned object blocks in g0s0, so better count blocks there too.
1027   total_blocks += stepBlocks(g0s0);
1028 #endif
1029
1030   /* any blocks held by allocate() */
1031   for (bd = small_alloc_list; bd; bd = bd->link) {
1032     total_blocks += bd->blocks;
1033   }
1034
1035 #ifdef PROFILING
1036   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1037       total_blocks += retainerStackBlocks();
1038   }
1039 #endif
1040
1041   // count the blocks allocated by the arena allocator
1042   total_blocks += arenaBlocks();
1043
1044   /* count the blocks on the free list */
1045   free_blocks = countFreeList();
1046
1047   if (total_blocks + free_blocks != mblocks_allocated *
1048       BLOCKS_PER_MBLOCK) {
1049     debugBelch("Blocks: %ld live + %ld free  = %ld total (%ld around)\n",
1050             total_blocks, free_blocks, total_blocks + free_blocks,
1051             mblocks_allocated * BLOCKS_PER_MBLOCK);
1052   }
1053
1054   ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
1055 }
1056
1057
1058 nat
1059 countBlocks(bdescr *bd)
1060 {
1061     nat n;
1062     for (n=0; bd != NULL; bd=bd->link) {
1063         n += bd->blocks;
1064     }
1065     return n;
1066 }
1067
1068 /* Full heap sanity check. */
1069 void
1070 checkSanity( void )
1071 {
1072     nat g, s;
1073
1074     if (RtsFlags.GcFlags.generations == 1) {
1075         checkHeap(g0s0->blocks);
1076         checkChain(g0s0->large_objects);
1077     } else {
1078
1079         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1080             for (s = 0; s < generations[g].n_steps; s++) {
1081                 if (g == 0 && s == 0) { continue; }
1082                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1083                        == generations[g].steps[s].n_blocks);
1084                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1085                        == generations[g].steps[s].n_large_blocks);
1086                 checkHeap(generations[g].steps[s].blocks);
1087                 checkChain(generations[g].steps[s].large_objects);
1088                 if (g > 0) {
1089                     checkMutableList(generations[g].mut_list, g);
1090                 }
1091             }
1092         }
1093
1094         for (s = 0; s < n_nurseries; s++) {
1095             ASSERT(countBlocks(nurseries[s].blocks)
1096                    == nurseries[s].n_blocks);
1097             ASSERT(countBlocks(nurseries[s].large_objects)
1098                    == nurseries[s].n_large_blocks);
1099         }
1100
1101         checkFreeListSanity();
1102     }
1103 }
1104
1105 /* Nursery sanity check */
1106 void
1107 checkNurserySanity( step *stp )
1108 {
1109     bdescr *bd, *prev;
1110     nat blocks = 0;
1111
1112     prev = NULL;
1113     for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1114         ASSERT(bd->u.back == prev);
1115         prev = bd;
1116         blocks += bd->blocks;
1117     }
1118     ASSERT(blocks == stp->n_blocks);
1119 }
1120
1121 // handy function for use in gdb, because Bdescr() is inlined.
1122 extern bdescr *_bdescr( StgPtr p );
1123
1124 bdescr *
1125 _bdescr( StgPtr p )
1126 {
1127     return Bdescr(p);
1128 }
1129
1130 #endif