rts/sm/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2008
   4  *
   5  * Storage manager front end
   6  *
   7  * Documentation on the architecture of the Storage Manager can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16
  17 #include "Storage.h"
  18 #include "RtsUtils.h"
  19 #include "Stats.h"
  20 #include "BlockAlloc.h"
  21 #include "Weak.h"
  22 #include "Sanity.h"
  23 #include "Arena.h"
  24 #include "Capability.h"
  25 #include "Schedule.h"
  26 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  27 #include "OSMem.h"
  28 #include "Trace.h"
  29 #include "GC.h"
  30 #include "Evac.h"
  31
  32 #include <string.h>
  33
  34 #include "ffi.h"
  35
  36 /*
  37  * All these globals require sm_mutex to access in THREADED_RTS mode.
  38  */
  39 StgClosure    *caf_list         = NULL;
  40 StgClosure    *revertible_caf_list = NULL;
  41 rtsBool       keepCAFs;
  42
  43 nat alloc_blocks_lim;    /* GC if n_large_blocks in any nursery
  44                           * reaches this. */
  45
  46 bdescr *exec_block;
  47
  48 generation *generations = NULL; /* all the generations */
  49 generation *g0          = NULL; /* generation 0, for convenience */
  50 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  51
  52 nat total_steps         = 0;
  53 step *all_steps         = NULL; /* single array of steps */
  54
  55 ullong total_allocated = 0;     /* total memory allocated during run */
  56
  57 step *nurseries         = NULL; /* array of nurseries, size == n_capabilities */
  58
  59 #ifdef THREADED_RTS
  60 /*
  61  * Storage manager mutex:  protects all the above state from
  62  * simultaneous access by two STG threads.
  63  */
  64 Mutex sm_mutex;
  65 #endif
  66
  67 static void allocNurseries ( void );
  68
  69 static void
  70 initStep (step *stp, int g, int s)
  71 {
  72     stp->no = s;
  73     stp->abs_no = RtsFlags.GcFlags.steps * g + s;
  74     stp->blocks = NULL;
  75     stp->n_blocks = 0;
  76     stp->n_words = 0;
  77     stp->live_estimate = 0;
  78     stp->old_blocks = NULL;
  79     stp->n_old_blocks = 0;
  80     stp->gen = &generations[g];
  81     stp->gen_no = g;
  82     stp->large_objects = NULL;
  83     stp->n_large_blocks = 0;
  84     stp->scavenged_large_objects = NULL;
  85     stp->n_scavenged_large_blocks = 0;
  86     stp->mark = 0;
  87     stp->compact = 0;
  88     stp->bitmap = NULL;
  89 #ifdef THREADED_RTS
  90     initSpinLock(&stp->sync_large_objects);
  91 #endif
  92     stp->threads = END_TSO_QUEUE;
  93     stp->old_threads = END_TSO_QUEUE;
  94 }
  95
  96 void
  97 initStorage( void )
  98 {
  99   nat g, s;
 100   generation *gen;
 101
 102   if (generations != NULL) {
 103       // multi-init protection
 104       return;
 105   }
 106
 107   initMBlocks();
 108
 109   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 110    * doing something reasonable.
 111    */
 112   /* We use the NOT_NULL variant or gcc warns that the test is always true */
 113   ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLACKHOLE_info));
 114   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 115   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 116
 117   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 118       RtsFlags.GcFlags.heapSizeSuggestion >
 119       RtsFlags.GcFlags.maxHeapSize) {
 120     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 121   }
 122
 123   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 124       RtsFlags.GcFlags.minAllocAreaSize >
 125       RtsFlags.GcFlags.maxHeapSize) {
 126       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 127       RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
 128   }
 129
 130   initBlockAllocator();
 131
 132 #if defined(THREADED_RTS)
 133   initMutex(&sm_mutex);
 134 #endif
 135
 136   ACQUIRE_SM_LOCK;
 137
 138   /* allocate generation info array */
 139   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 140                                              * sizeof(struct generation_),
 141                                              "initStorage: gens");
 142
 143   /* Initialise all generations */
 144   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 145     gen = &generations[g];
 146     gen->no = g;
 147     gen->mut_list = allocBlock();
 148     gen->collections = 0;
 149     gen->par_collections = 0;
 150     gen->failed_promotions = 0;
 151     gen->max_blocks = 0;
 152   }
 153
 154   /* A couple of convenience pointers */
 155   g0 = &generations[0];
 156   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 157
 158   /* allocate all the steps into an array.  It is important that we do
 159      it this way, because we need the invariant that two step pointers
 160      can be directly compared to see which is the oldest.
 161      Remember that the last generation has only one step. */
 162   total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
 163   all_steps   = stgMallocBytes(total_steps * sizeof(struct step_),
 164                                "initStorage: steps");
 165
 166   /* Allocate step structures in each generation */
 167   if (RtsFlags.GcFlags.generations > 1) {
 168     /* Only for multiple-generations */
 169
 170     /* Oldest generation: one step */
 171     oldest_gen->n_steps = 1;
 172     oldest_gen->steps   = all_steps + (RtsFlags.GcFlags.generations - 1)
 173                                       * RtsFlags.GcFlags.steps;
 174
 175     /* set up all except the oldest generation with 2 steps */
 176     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 177       generations[g].n_steps = RtsFlags.GcFlags.steps;
 178       generations[g].steps   = all_steps + g * RtsFlags.GcFlags.steps;
 179     }
 180
 181   } else {
 182     /* single generation, i.e. a two-space collector */
 183     g0->n_steps = 1;
 184     g0->steps   = all_steps;
 185   }
 186
 187   nurseries = stgMallocBytes (n_capabilities * sizeof(struct step_),
 188                               "initStorage: nurseries");
 189
 190   /* Initialise all steps */
 191   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 192     for (s = 0; s < generations[g].n_steps; s++) {
 193         initStep(&generations[g].steps[s], g, s);
 194     }
 195   }
 196
 197   for (s = 0; s < n_capabilities; s++) {
 198       initStep(&nurseries[s], 0, s);
 199   }
 200
 201   /* Set up the destination pointers in each younger gen. step */
 202   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 203     for (s = 0; s < generations[g].n_steps-1; s++) {
 204       generations[g].steps[s].to = &generations[g].steps[s+1];
 205     }
 206     generations[g].steps[s].to = &generations[g+1].steps[0];
 207   }
 208   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 209
 210   for (s = 0; s < n_capabilities; s++) {
 211       nurseries[s].to = generations[0].steps[0].to;
 212   }
 213
 214   /* The oldest generation has one step. */
 215   if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
 216       if (RtsFlags.GcFlags.generations == 1) {
 217           errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
 218       } else {
 219           oldest_gen->steps[0].mark = 1;
 220           if (RtsFlags.GcFlags.compact)
 221               oldest_gen->steps[0].compact = 1;
 222       }
 223   }
 224
 225   generations[0].max_blocks = 0;
 226
 227   /* The allocation area.  Policy: keep the allocation area
 228    * small to begin with, even if we have a large suggested heap
 229    * size.  Reason: we're going to do a major collection first, and we
 230    * don't want it to be a big one.  This vague idea is borne out by
 231    * rigorous experimental evidence.
 232    */
 233   allocNurseries();
 234
 235   weak_ptr_list = NULL;
 236   caf_list = NULL;
 237   revertible_caf_list = NULL;
 238
 239   /* initialise the allocate() interface */
 240   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 241
 242   exec_block = NULL;
 243
 244 #ifdef THREADED_RTS
 245   initSpinLock(&gc_alloc_block_sync);
 246   whitehole_spin = 0;
 247 #endif
 248
 249   N = 0;
 250
 251   initGcThreads();
 252
 253   IF_DEBUG(gc, statDescribeGens());
 254
 255   RELEASE_SM_LOCK;
 256 }
 257
 258 void
 259 exitStorage (void)
 260 {
 261     stat_exit(calcAllocated());
 262 }
 263
 264 void
 265 freeStorage (void)
 266 {
 267     stgFree(all_steps); // frees all the steps
 268     stgFree(generations);
 269     freeAllMBlocks();
 270 #if defined(THREADED_RTS)
 271     closeMutex(&sm_mutex);
 272 #endif
 273     stgFree(nurseries);
 274     freeGcThreads();
 275 }
 276
 277 /* -----------------------------------------------------------------------------
 278    CAF management.
 279
 280    The entry code for every CAF does the following:
 281
 282       - builds a CAF_BLACKHOLE in the heap
 283       - pushes an update frame pointing to the CAF_BLACKHOLE
 284       - invokes UPD_CAF(), which:
 285           - calls newCaf, below
 286           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 287
 288    Why do we build a BLACKHOLE in the heap rather than just updating
 289    the thunk directly?  It's so that we only need one kind of update
 290    frame - otherwise we'd need a static version of the update frame too.
 291
 292    newCaf() does the following:
 293
 294       - it puts the CAF on the oldest generation's mut-once list.
 295         This is so that we can treat the CAF as a root when collecting
 296         younger generations.
 297
 298    For GHCI, we have additional requirements when dealing with CAFs:
 299
 300       - we must *retain* all dynamically-loaded CAFs ever entered,
 301         just in case we need them again.
 302       - we must be able to *revert* CAFs that have been evaluated, to
 303         their pre-evaluated form.
 304
 305       To do this, we use an additional CAF list.  When newCaf() is
 306       called on a dynamically-loaded CAF, we add it to the CAF list
 307       instead of the old-generation mutable list, and save away its
 308       old info pointer (in caf->saved_info) for later reversion.
 309
 310       To revert all the CAFs, we traverse the CAF list and reset the
 311       info pointer to caf->saved_info, then throw away the CAF list.
 312       (see GC.c:revertCAFs()).
 313
 314       -- SDM 29/1/01
 315
 316    -------------------------------------------------------------------------- */
 317
 318 void
 319 newCAF(StgClosure* caf)
 320 {
 321   ACQUIRE_SM_LOCK;
 322
 323 #ifdef DYNAMIC
 324   if(keepCAFs)
 325   {
 326     // HACK:
 327     // If we are in GHCi _and_ we are using dynamic libraries,
 328     // then we can't redirect newCAF calls to newDynCAF (see below),
 329     // so we make newCAF behave almost like newDynCAF.
 330     // The dynamic libraries might be used by both the interpreted
 331     // program and GHCi itself, so they must not be reverted.
 332     // This also means that in GHCi with dynamic libraries, CAFs are not
 333     // garbage collected. If this turns out to be a problem, we could
 334     // do another hack here and do an address range test on caf to figure
 335     // out whether it is from a dynamic library.
 336     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 337     ((StgIndStatic *)caf)->static_link = caf_list;
 338     caf_list = caf;
 339   }
 340   else
 341 #endif
 342   {
 343     /* Put this CAF on the mutable list for the old generation.
 344     * This is a HACK - the IND_STATIC closure doesn't really have
 345     * a mut_link field, but we pretend it has - in fact we re-use
 346     * the STATIC_LINK field for the time being, because when we
 347     * come to do a major GC we won't need the mut_link field
 348     * any more and can use it as a STATIC_LINK.
 349     */
 350     ((StgIndStatic *)caf)->saved_info = NULL;
 351     recordMutableGen(caf, oldest_gen->no);
 352   }
 353
 354   RELEASE_SM_LOCK;
 355 }
 356
 357 // An alternate version of newCaf which is used for dynamically loaded
 358 // object code in GHCi.  In this case we want to retain *all* CAFs in
 359 // the object code, because they might be demanded at any time from an
 360 // expression evaluated on the command line.
 361 // Also, GHCi might want to revert CAFs, so we add these to the
 362 // revertible_caf_list.
 363 //
 364 // The linker hackily arranges that references to newCaf from dynamic
 365 // code end up pointing to newDynCAF.
 366 void
 367 newDynCAF(StgClosure *caf)
 368 {
 369     ACQUIRE_SM_LOCK;
 370
 371     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 372     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 373     revertible_caf_list = caf;
 374
 375     RELEASE_SM_LOCK;
 376 }
 377
 378 /* -----------------------------------------------------------------------------
 379    Nursery management.
 380    -------------------------------------------------------------------------- */
 381
 382 static bdescr *
 383 allocNursery (step *stp, bdescr *tail, nat blocks)
 384 {
 385     bdescr *bd;
 386     nat i;
 387
 388     // Allocate a nursery: we allocate fresh blocks one at a time and
 389     // cons them on to the front of the list, not forgetting to update
 390     // the back pointer on the tail of the list to point to the new block.
 391     for (i=0; i < blocks; i++) {
 392         // @LDV profiling
 393         /*
 394           processNursery() in LdvProfile.c assumes that every block group in
 395           the nursery contains only a single block. So, if a block group is
 396           given multiple blocks, change processNursery() accordingly.
 397         */
 398         bd = allocBlock();
 399         bd->link = tail;
 400         // double-link the nursery: we might need to insert blocks
 401         if (tail != NULL) {
 402             tail->u.back = bd;
 403         }
 404         initBdescr(bd, stp);
 405         bd->flags = 0;
 406         bd->free = bd->start;
 407         tail = bd;
 408     }
 409     tail->u.back = NULL;
 410     return tail;
 411 }
 412
 413 static void
 414 assignNurseriesToCapabilities (void)
 415 {
 416     nat i;
 417
 418     for (i = 0; i < n_capabilities; i++) {
 419         capabilities[i].r.rNursery        = &nurseries[i];
 420         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 421         capabilities[i].r.rCurrentAlloc   = NULL;
 422     }
 423 }
 424
 425 static void
 426 allocNurseries( void )
 427 {
 428     nat i;
 429
 430     for (i = 0; i < n_capabilities; i++) {
 431         nurseries[i].blocks =
 432             allocNursery(&nurseries[i], NULL,
 433                          RtsFlags.GcFlags.minAllocAreaSize);
 434         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 435         nurseries[i].old_blocks   = NULL;
 436         nurseries[i].n_old_blocks = 0;
 437     }
 438     assignNurseriesToCapabilities();
 439 }
 440
 441 void
 442 resetNurseries( void )
 443 {
 444     nat i;
 445     bdescr *bd;
 446     step *stp;
 447
 448     for (i = 0; i < n_capabilities; i++) {
 449         stp = &nurseries[i];
 450         for (bd = stp->blocks; bd; bd = bd->link) {
 451             bd->free = bd->start;
 452             ASSERT(bd->gen_no == 0);
 453             ASSERT(bd->step == stp);
 454             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 455         }
 456         // these large objects are dead, since we have just GC'd
 457         freeChain(stp->large_objects);
 458         stp->large_objects = NULL;
 459         stp->n_large_blocks = 0;
 460     }
 461     assignNurseriesToCapabilities();
 462 }
 463
 464 lnat
 465 countNurseryBlocks (void)
 466 {
 467     nat i;
 468     lnat blocks = 0;
 469
 470     for (i = 0; i < n_capabilities; i++) {
 471         blocks += nurseries[i].n_blocks;
 472         blocks += nurseries[i].n_large_blocks;
 473     }
 474     return blocks;
 475 }
 476
 477 static void
 478 resizeNursery ( step *stp, nat blocks )
 479 {
 480   bdescr *bd;
 481   nat nursery_blocks;
 482
 483   nursery_blocks = stp->n_blocks;
 484   if (nursery_blocks == blocks) return;
 485
 486   if (nursery_blocks < blocks) {
 487       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
 488                  blocks);
 489     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 490   }
 491   else {
 492     bdescr *next_bd;
 493
 494     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
 495                blocks);
 496
 497     bd = stp->blocks;
 498     while (nursery_blocks > blocks) {
 499         next_bd = bd->link;
 500         next_bd->u.back = NULL;
 501         nursery_blocks -= bd->blocks; // might be a large block
 502         freeGroup(bd);
 503         bd = next_bd;
 504     }
 505     stp->blocks = bd;
 506     // might have gone just under, by freeing a large block, so make
 507     // up the difference.
 508     if (nursery_blocks < blocks) {
 509         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 510     }
 511   }
 512
 513   stp->n_blocks = blocks;
 514   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 515 }
 516
 517 //
 518 // Resize each of the nurseries to the specified size.
 519 //
 520 void
 521 resizeNurseriesFixed (nat blocks)
 522 {
 523     nat i;
 524     for (i = 0; i < n_capabilities; i++) {
 525         resizeNursery(&nurseries[i], blocks);
 526     }
 527 }
 528
 529 //
 530 // Resize the nurseries to the total specified size.
 531 //
 532 void
 533 resizeNurseries (nat blocks)
 534 {
 535     // If there are multiple nurseries, then we just divide the number
 536     // of available blocks between them.
 537     resizeNurseriesFixed(blocks / n_capabilities);
 538 }
 539
 540
 541 /* -----------------------------------------------------------------------------
 542    move_TSO is called to update the TSO structure after it has been
 543    moved from one place to another.
 544    -------------------------------------------------------------------------- */
 545
 546 void
 547 move_TSO (StgTSO *src, StgTSO *dest)
 548 {
 549     ptrdiff_t diff;
 550
 551     // relocate the stack pointer...
 552     diff = (StgPtr)dest - (StgPtr)src; // In *words*
 553     dest->sp = (StgPtr)dest->sp + diff;
 554 }
 555
 556 /* -----------------------------------------------------------------------------
 557    split N blocks off the front of the given bdescr, returning the
 558    new block group.  We add the remainder to the large_blocks list
 559    in the same step as the original block.
 560    -------------------------------------------------------------------------- */
 561
 562 bdescr *
 563 splitLargeBlock (bdescr *bd, nat blocks)
 564 {
 565     bdescr *new_bd;
 566
 567     ACQUIRE_SM_LOCK;
 568
 569     ASSERT(countBlocks(bd->step->large_objects) == bd->step->n_large_blocks);
 570
 571     // subtract the original number of blocks from the counter first
 572     bd->step->n_large_blocks -= bd->blocks;
 573
 574     new_bd = splitBlockGroup (bd, blocks);
 575     initBdescr(new_bd, bd->step);
 576     new_bd->flags   = BF_LARGE | (bd->flags & BF_EVACUATED);
 577     // if new_bd is in an old generation, we have to set BF_EVACUATED
 578     new_bd->free    = bd->free;
 579     dbl_link_onto(new_bd, &bd->step->large_objects);
 580
 581     ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
 582
 583     // add the new number of blocks to the counter.  Due to the gaps
 584     // for block descriptors, new_bd->blocks + bd->blocks might not be
 585     // equal to the original bd->blocks, which is why we do it this way.
 586     bd->step->n_large_blocks += bd->blocks + new_bd->blocks;
 587
 588     ASSERT(countBlocks(bd->step->large_objects) == bd->step->n_large_blocks);
 589
 590     RELEASE_SM_LOCK;
 591
 592     return new_bd;
 593 }
 594
 595 /* -----------------------------------------------------------------------------
 596    allocate()
 597
 598    This allocates memory in the current thread - it is intended for
 599    use primarily from STG-land where we have a Capability.  It is
 600    better than allocate() because it doesn't require taking the
 601    sm_mutex lock in the common case.
 602
 603    Memory is allocated directly from the nursery if possible (but not
 604    from the current nursery block, so as not to interfere with
 605    Hp/HpLim).
 606    -------------------------------------------------------------------------- */
 607
 608 StgPtr
 609 allocate (Capability *cap, lnat n)
 610 {
 611     bdescr *bd;
 612     StgPtr p;
 613     step *stp;
 614
 615     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 616         lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 617
 618         // Attempting to allocate an object larger than maxHeapSize
 619         // should definitely be disallowed.  (bug #1791)
 620         if (RtsFlags.GcFlags.maxHeapSize > 0 &&
 621             req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
 622             heapOverflow();
 623             // heapOverflow() doesn't exit (see #2592), but we aren't
 624             // in a position to do a clean shutdown here: we
 625             // either have to allocate the memory or exit now.
 626             // Allocating the memory would be bad, because the user
 627             // has requested that we not exceed maxHeapSize, so we
 628             // just exit.
 629             stg_exit(EXIT_HEAPOVERFLOW);
 630         }
 631
 632         stp = &nurseries[cap->no];
 633
 634         ACQUIRE_SM_LOCK
 635         bd = allocGroup(req_blocks);
 636         RELEASE_SM_LOCK;
 637         dbl_link_onto(bd, &stp->large_objects);
 638         stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
 639         initBdescr(bd, stp);
 640         bd->flags = BF_LARGE;
 641         bd->free = bd->start + n;
 642         return bd->start;
 643     }
 644
 645     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 646
 647     TICK_ALLOC_HEAP_NOCTR(n);
 648     CCS_ALLOC(CCCS,n);
 649
 650     bd = cap->r.rCurrentAlloc;
 651     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 652
 653         // The CurrentAlloc block is full, we need to find another
 654         // one.  First, we try taking the next block from the
 655         // nursery:
 656         bd = cap->r.rCurrentNursery->link;
 657
 658         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 659             // The nursery is empty, or the next block is already
 660             // full: allocate a fresh block (we can't fail here).
 661             ACQUIRE_SM_LOCK;
 662             bd = allocBlock();
 663             cap->r.rNursery->n_blocks++;
 664             RELEASE_SM_LOCK;
 665             initBdescr(bd, cap->r.rNursery);
 666             bd->flags = 0;
 667             // If we had to allocate a new block, then we'll GC
 668             // pretty quickly now, because MAYBE_GC() will
 669             // notice that CurrentNursery->link is NULL.
 670         } else {
 671             // we have a block in the nursery: take it and put
 672             // it at the *front* of the nursery list, and use it
 673             // to allocate() from.
 674             cap->r.rCurrentNursery->link = bd->link;
 675             if (bd->link != NULL) {
 676                 bd->link->u.back = cap->r.rCurrentNursery;
 677             }
 678         }
 679         dbl_link_onto(bd, &cap->r.rNursery->blocks);
 680         cap->r.rCurrentAlloc = bd;
 681         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 682     }
 683     p = bd->free;
 684     bd->free += n;
 685     return p;
 686 }
 687
 688 /* ---------------------------------------------------------------------------
 689    Allocate a fixed/pinned object.
 690
 691    We allocate small pinned objects into a single block, allocating a
 692    new block when the current one overflows.  The block is chained
 693    onto the large_object_list of generation 0 step 0.
 694
 695    NOTE: The GC can't in general handle pinned objects.  This
 696    interface is only safe to use for ByteArrays, which have no
 697    pointers and don't require scavenging.  It works because the
 698    block's descriptor has the BF_LARGE flag set, so the block is
 699    treated as a large object and chained onto various lists, rather
 700    than the individual objects being copied.  However, when it comes
 701    to scavenge the block, the GC will only scavenge the first object.
 702    The reason is that the GC can't linearly scan a block of pinned
 703    objects at the moment (doing so would require using the
 704    mostly-copying techniques).  But since we're restricting ourselves
 705    to pinned ByteArrays, not scavenging is ok.
 706
 707    This function is called by newPinnedByteArray# which immediately
 708    fills the allocated memory with a MutableByteArray#.
 709    ------------------------------------------------------------------------- */
 710
 711 StgPtr
 712 allocatePinned (Capability *cap, lnat n)
 713 {
 714     StgPtr p;
 715     bdescr *bd;
 716     step *stp;
 717
 718     // If the request is for a large object, then allocate()
 719     // will give us a pinned object anyway.
 720     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 721         p = allocate(cap, n);
 722         Bdescr(p)->flags |= BF_PINNED;
 723         return p;
 724     }
 725
 726     TICK_ALLOC_HEAP_NOCTR(n);
 727     CCS_ALLOC(CCCS,n);
 728
 729     bd = cap->pinned_object_block;
 730
 731     // If we don't have a block of pinned objects yet, or the current
 732     // one isn't large enough to hold the new object, allocate a new one.
 733     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 734         ACQUIRE_SM_LOCK
 735         cap->pinned_object_block = bd = allocBlock();
 736         RELEASE_SM_LOCK
 737         stp = &nurseries[cap->no];
 738         dbl_link_onto(bd, &stp->large_objects);
 739         stp->n_large_blocks++;
 740         initBdescr(bd, stp);
 741         bd->flags  = BF_PINNED | BF_LARGE;
 742         bd->free   = bd->start;
 743     }
 744
 745     p = bd->free;
 746     bd->free += n;
 747     return p;
 748 }
 749
 750 /* -----------------------------------------------------------------------------
 751    Write Barriers
 752    -------------------------------------------------------------------------- */
 753
 754 /*
 755    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 756    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 757    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 758    and is put on the mutable list.
 759 */
 760 void
 761 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 762 {
 763     Capability *cap = regTableToCapability(reg);
 764     bdescr *bd;
 765     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 766         p->header.info = &stg_MUT_VAR_DIRTY_info;
 767         bd = Bdescr((StgPtr)p);
 768         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 769     }
 770 }
 771
 772 // Setting a TSO's link field with a write barrier.
 773 // It is *not* necessary to call this function when
 774 //    * setting the link field to END_TSO_QUEUE
 775 //    * putting a TSO on the blackhole_queue
 776 //    * setting the link field of the currently running TSO, as it
 777 //      will already be dirty.
 778 void
 779 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
 780 {
 781     bdescr *bd;
 782     if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
 783         tso->flags |= TSO_LINK_DIRTY;
 784         bd = Bdescr((StgPtr)tso);
 785         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 786     }
 787     tso->_link = target;
 788 }
 789
 790 void
 791 dirty_TSO (Capability *cap, StgTSO *tso)
 792 {
 793     bdescr *bd;
 794     if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) {
 795         bd = Bdescr((StgPtr)tso);
 796         if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no);
 797     }
 798     tso->dirty = 1;
 799 }
 800
 801 /*
 802    This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
 803    on the mutable list; a MVAR_DIRTY is.  When written to, a
 804    MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
 805    The check for MVAR_CLEAN is inlined at the call site for speed,
 806    this really does make a difference on concurrency-heavy benchmarks
 807    such as Chaneneos and cheap-concurrency.
 808 */
 809 void
 810 dirty_MVAR(StgRegTable *reg, StgClosure *p)
 811 {
 812     Capability *cap = regTableToCapability(reg);
 813     bdescr *bd;
 814     bd = Bdescr((StgPtr)p);
 815     if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 816 }
 817
 818 /* -----------------------------------------------------------------------------
 819  * Stats and stuff
 820  * -------------------------------------------------------------------------- */
 821
 822 /* -----------------------------------------------------------------------------
 823  * calcAllocated()
 824  *
 825  * Approximate how much we've allocated: number of blocks in the
 826  * nursery + blocks allocated via allocate() - unused nusery blocks.
 827  * This leaves a little slop at the end of each block.
 828  * -------------------------------------------------------------------------- */
 829
 830 lnat
 831 calcAllocated( void )
 832 {
 833   nat allocated;
 834   bdescr *bd;
 835   nat i;
 836
 837   allocated = countNurseryBlocks() * BLOCK_SIZE_W;
 838
 839   for (i = 0; i < n_capabilities; i++) {
 840       Capability *cap;
 841       for ( bd = capabilities[i].r.rCurrentNursery->link;
 842             bd != NULL; bd = bd->link ) {
 843           allocated -= BLOCK_SIZE_W;
 844       }
 845       cap = &capabilities[i];
 846       if (cap->r.rCurrentNursery->free <
 847           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 848           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 849               - cap->r.rCurrentNursery->free;
 850       }
 851       if (cap->pinned_object_block != NULL) {
 852           allocated -= (cap->pinned_object_block->start + BLOCK_SIZE_W) -
 853               cap->pinned_object_block->free;
 854       }
 855   }
 856
 857   total_allocated += allocated;
 858   return allocated;
 859 }
 860
 861 /* Approximate the amount of live data in the heap.  To be called just
 862  * after garbage collection (see GarbageCollect()).
 863  */
 864 lnat
 865 calcLiveBlocks(void)
 866 {
 867   nat g, s;
 868   lnat live = 0;
 869   step *stp;
 870
 871   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 872     for (s = 0; s < generations[g].n_steps; s++) {
 873       /* approximate amount of live data (doesn't take into account slop
 874        * at end of each block).
 875        */
 876       if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
 877           continue;
 878       }
 879       stp = &generations[g].steps[s];
 880       live += stp->n_large_blocks + stp->n_blocks;
 881     }
 882   }
 883   return live;
 884 }
 885
 886 lnat
 887 countOccupied(bdescr *bd)
 888 {
 889     lnat words;
 890
 891     words = 0;
 892     for (; bd != NULL; bd = bd->link) {
 893         ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
 894         words += bd->free - bd->start;
 895     }
 896     return words;
 897 }
 898
 899 // Return an accurate count of the live data in the heap, excluding
 900 // generation 0.
 901 lnat
 902 calcLiveWords(void)
 903 {
 904     nat g, s;
 905     lnat live;
 906     step *stp;
 907
 908     live = 0;
 909     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 910         for (s = 0; s < generations[g].n_steps; s++) {
 911             if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) continue;
 912             stp = &generations[g].steps[s];
 913             live += stp->n_words + countOccupied(stp->large_objects);
 914         }
 915     }
 916     return live;
 917 }
 918
 919 /* Approximate the number of blocks that will be needed at the next
 920  * garbage collection.
 921  *
 922  * Assume: all data currently live will remain live.  Steps that will
 923  * be collected next time will therefore need twice as many blocks
 924  * since all the data will be copied.
 925  */
 926 extern lnat
 927 calcNeeded(void)
 928 {
 929     lnat needed = 0;
 930     nat g, s;
 931     step *stp;
 932
 933     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 934         for (s = 0; s < generations[g].n_steps; s++) {
 935             if (g == 0 && s == 0) { continue; }
 936             stp = &generations[g].steps[s];
 937
 938             // we need at least this much space
 939             needed += stp->n_blocks + stp->n_large_blocks;
 940
 941             // any additional space needed to collect this gen next time?
 942             if (g == 0 || // always collect gen 0
 943                 (generations[g].steps[0].n_blocks +
 944                  generations[g].steps[0].n_large_blocks
 945                  > generations[g].max_blocks)) {
 946                 // we will collect this gen next time
 947                 if (stp->mark) {
 948                     //  bitmap:
 949                     needed += stp->n_blocks / BITS_IN(W_);
 950                     //  mark stack:
 951                     needed += stp->n_blocks / 100;
 952                 }
 953                 if (stp->compact) {
 954                     continue; // no additional space needed for compaction
 955                 } else {
 956                     needed += stp->n_blocks;
 957                 }
 958             }
 959         }
 960     }
 961     return needed;
 962 }
 963
 964 /* ----------------------------------------------------------------------------
 965    Executable memory
 966
 967    Executable memory must be managed separately from non-executable
 968    memory.  Most OSs these days require you to jump through hoops to
 969    dynamically allocate executable memory, due to various security
 970    measures.
 971
 972    Here we provide a small memory allocator for executable memory.
 973    Memory is managed with a page granularity; we allocate linearly
 974    in the page, and when the page is emptied (all objects on the page
 975    are free) we free the page again, not forgetting to make it
 976    non-executable.
 977
 978    TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
 979          the linker cannot use allocateExec for loading object code files
 980          on Windows. Once allocateExec can handle larger objects, the linker
 981          should be modified to use allocateExec instead of VirtualAlloc.
 982    ------------------------------------------------------------------------- */
 983
 984 #if defined(linux_HOST_OS)
 985
 986 // On Linux we need to use libffi for allocating executable memory,
 987 // because it knows how to work around the restrictions put in place
 988 // by SELinux.
 989
 990 void *allocateExec (nat bytes, void **exec_ret)
 991 {
 992     void **ret, **exec;
 993     ACQUIRE_SM_LOCK;
 994     ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
 995     RELEASE_SM_LOCK;
 996     if (ret == NULL) return ret;
 997     *ret = ret; // save the address of the writable mapping, for freeExec().
 998     *exec_ret = exec + 1;
 999     return (ret + 1);
1000 }
1001
1002 // freeExec gets passed the executable address, not the writable address.
1003 void freeExec (void *addr)
1004 {
1005     void *writable;
1006     writable = *((void**)addr - 1);
1007     ACQUIRE_SM_LOCK;
1008     ffi_closure_free (writable);
1009     RELEASE_SM_LOCK
1010 }
1011
1012 #else
1013
1014 void *allocateExec (nat bytes, void **exec_ret)
1015 {
1016     void *ret;
1017     nat n;
1018
1019     ACQUIRE_SM_LOCK;
1020
1021     // round up to words.
1022     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
1023
1024     if (n+1 > BLOCK_SIZE_W) {
1025         barf("allocateExec: can't handle large objects");
1026     }
1027
1028     if (exec_block == NULL ||
1029         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1030         bdescr *bd;
1031         lnat pagesize = getPageSize();
1032         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1033         debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1034         bd->gen_no = 0;
1035         bd->flags = BF_EXEC;
1036         bd->link = exec_block;
1037         if (exec_block != NULL) {
1038             exec_block->u.back = bd;
1039         }
1040         bd->u.back = NULL;
1041         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1042         exec_block = bd;
1043     }
1044     *(exec_block->free) = n;  // store the size of this chunk
1045     exec_block->gen_no += n;  // gen_no stores the number of words allocated
1046     ret = exec_block->free + 1;
1047     exec_block->free += n + 1;
1048
1049     RELEASE_SM_LOCK
1050     *exec_ret = ret;
1051     return ret;
1052 }
1053
1054 void freeExec (void *addr)
1055 {
1056     StgPtr p = (StgPtr)addr - 1;
1057     bdescr *bd = Bdescr((StgPtr)p);
1058
1059     if ((bd->flags & BF_EXEC) == 0) {
1060         barf("freeExec: not executable");
1061     }
1062
1063     if (*(StgPtr)p == 0) {
1064         barf("freeExec: already free?");
1065     }
1066
1067     ACQUIRE_SM_LOCK;
1068
1069     bd->gen_no -= *(StgPtr)p;
1070     *(StgPtr)p = 0;
1071
1072     if (bd->gen_no == 0) {
1073         // Free the block if it is empty, but not if it is the block at
1074         // the head of the queue.
1075         if (bd != exec_block) {
1076             debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1077             dbl_link_remove(bd, &exec_block);
1078             setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1079             freeGroup(bd);
1080         } else {
1081             bd->free = bd->start;
1082         }
1083     }
1084
1085     RELEASE_SM_LOCK
1086 }
1087
1088 #endif /* mingw32_HOST_OS */
1089
1090 #ifdef DEBUG
1091
1092 // handy function for use in gdb, because Bdescr() is inlined.
1093 extern bdescr *_bdescr( StgPtr p );
1094
1095 bdescr *
1096 _bdescr( StgPtr p )
1097 {
1098     return Bdescr(p);
1099 }
1100
1101 #endif