rts/sm/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2008
   4  *
   5  * Storage manager front end
   6  *
   7  * Documentation on the architecture of the Storage Manager can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16
  17 #include "Storage.h"
  18 #include "RtsUtils.h"
  19 #include "Stats.h"
  20 #include "BlockAlloc.h"
  21 #include "Weak.h"
  22 #include "Sanity.h"
  23 #include "Arena.h"
  24 #include "Capability.h"
  25 #include "Schedule.h"
  26 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  27 #include "OSMem.h"
  28 #include "Trace.h"
  29 #include "GC.h"
  30 #include "Evac.h"
  31
  32 #include <string.h>
  33
  34 #include "ffi.h"
  35
  36 /*
  37  * All these globals require sm_mutex to access in THREADED_RTS mode.
  38  */
  39 StgClosure    *caf_list         = NULL;
  40 StgClosure    *revertible_caf_list = NULL;
  41 rtsBool       keepCAFs;
  42
  43 nat large_alloc_lim;    /* GC if n_large_blocks in any nursery
  44                          * reaches this. */
  45
  46 bdescr *exec_block;
  47
  48 generation *generations = NULL; /* all the generations */
  49 generation *g0          = NULL; /* generation 0, for convenience */
  50 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  51
  52 nursery *nurseries = NULL;     /* array of nurseries, size == n_capabilities */
  53
  54 #ifdef THREADED_RTS
  55 /*
  56  * Storage manager mutex:  protects all the above state from
  57  * simultaneous access by two STG threads.
  58  */
  59 Mutex sm_mutex;
  60 #endif
  61
  62 static void allocNurseries ( void );
  63
  64 static void
  65 initGeneration (generation *gen, int g)
  66 {
  67     gen->no = g;
  68     gen->collections = 0;
  69     gen->par_collections = 0;
  70     gen->failed_promotions = 0;
  71     gen->max_blocks = 0;
  72     gen->blocks = NULL;
  73     gen->n_blocks = 0;
  74     gen->n_words = 0;
  75     gen->live_estimate = 0;
  76     gen->old_blocks = NULL;
  77     gen->n_old_blocks = 0;
  78     gen->large_objects = NULL;
  79     gen->n_large_blocks = 0;
  80     gen->n_new_large_words = 0;
  81     gen->scavenged_large_objects = NULL;
  82     gen->n_scavenged_large_blocks = 0;
  83     gen->mark = 0;
  84     gen->compact = 0;
  85     gen->bitmap = NULL;
  86 #ifdef THREADED_RTS
  87     initSpinLock(&gen->sync_large_objects);
  88 #endif
  89     gen->threads = END_TSO_QUEUE;
  90     gen->old_threads = END_TSO_QUEUE;
  91 }
  92
  93 void
  94 initStorage( void )
  95 {
  96     nat g, n;
  97
  98   if (generations != NULL) {
  99       // multi-init protection
 100       return;
 101   }
 102
 103   initMBlocks();
 104
 105   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 106    * doing something reasonable.
 107    */
 108   /* We use the NOT_NULL variant or gcc warns that the test is always true */
 109   ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
 110   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 111   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 112
 113   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 114       RtsFlags.GcFlags.heapSizeSuggestion >
 115       RtsFlags.GcFlags.maxHeapSize) {
 116     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 117   }
 118
 119   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 120       RtsFlags.GcFlags.minAllocAreaSize >
 121       RtsFlags.GcFlags.maxHeapSize) {
 122       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 123       RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
 124   }
 125
 126   initBlockAllocator();
 127
 128 #if defined(THREADED_RTS)
 129   initMutex(&sm_mutex);
 130 #endif
 131
 132   ACQUIRE_SM_LOCK;
 133
 134   /* allocate generation info array */
 135   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 136                                              * sizeof(struct generation_),
 137                                              "initStorage: gens");
 138
 139   /* Initialise all generations */
 140   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 141       initGeneration(&generations[g], g);
 142   }
 143
 144   /* A couple of convenience pointers */
 145   g0 = &generations[0];
 146   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 147
 148   nurseries = stgMallocBytes(n_capabilities * sizeof(struct nursery_),
 149                              "initStorage: nurseries");
 150
 151   /* Set up the destination pointers in each younger gen. step */
 152   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 153       generations[g].to = &generations[g+1];
 154   }
 155   oldest_gen->to = oldest_gen;
 156
 157   /* The oldest generation has one step. */
 158   if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
 159       if (RtsFlags.GcFlags.generations == 1) {
 160           errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
 161       } else {
 162           oldest_gen->mark = 1;
 163           if (RtsFlags.GcFlags.compact)
 164               oldest_gen->compact = 1;
 165       }
 166   }
 167
 168   generations[0].max_blocks = 0;
 169
 170   /* The allocation area.  Policy: keep the allocation area
 171    * small to begin with, even if we have a large suggested heap
 172    * size.  Reason: we're going to do a major collection first, and we
 173    * don't want it to be a big one.  This vague idea is borne out by
 174    * rigorous experimental evidence.
 175    */
 176   allocNurseries();
 177
 178   weak_ptr_list = NULL;
 179   caf_list = END_OF_STATIC_LIST;
 180   revertible_caf_list = END_OF_STATIC_LIST;
 181
 182   /* initialise the allocate() interface */
 183   large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
 184
 185   exec_block = NULL;
 186
 187 #ifdef THREADED_RTS
 188   initSpinLock(&gc_alloc_block_sync);
 189   whitehole_spin = 0;
 190 #endif
 191
 192   N = 0;
 193
 194   // allocate a block for each mut list
 195   for (n = 0; n < n_capabilities; n++) {
 196       for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
 197           capabilities[n].mut_lists[g] = allocBlock();
 198       }
 199   }
 200
 201   initGcThreads();
 202
 203   IF_DEBUG(gc, statDescribeGens());
 204
 205   RELEASE_SM_LOCK;
 206 }
 207
 208 void
 209 exitStorage (void)
 210 {
 211     stat_exit(calcAllocated(rtsTrue));
 212 }
 213
 214 void
 215 freeStorage (rtsBool free_heap)
 216 {
 217     stgFree(generations);
 218     if (free_heap) freeAllMBlocks();
 219 #if defined(THREADED_RTS)
 220     closeMutex(&sm_mutex);
 221 #endif
 222     stgFree(nurseries);
 223     freeGcThreads();
 224 }
 225
 226 /* -----------------------------------------------------------------------------
 227    CAF management.
 228
 229    The entry code for every CAF does the following:
 230
 231       - builds a BLACKHOLE in the heap
 232       - pushes an update frame pointing to the BLACKHOLE
 233       - calls newCaf, below
 234       - updates the CAF with a static indirection to the BLACKHOLE
 235
 236    Why do we build an BLACKHOLE in the heap rather than just updating
 237    the thunk directly?  It's so that we only need one kind of update
 238    frame - otherwise we'd need a static version of the update frame too.
 239
 240    newCaf() does the following:
 241
 242       - it puts the CAF on the oldest generation's mutable list.
 243         This is so that we treat the CAF as a root when collecting
 244         younger generations.
 245
 246    For GHCI, we have additional requirements when dealing with CAFs:
 247
 248       - we must *retain* all dynamically-loaded CAFs ever entered,
 249         just in case we need them again.
 250       - we must be able to *revert* CAFs that have been evaluated, to
 251         their pre-evaluated form.
 252
 253       To do this, we use an additional CAF list.  When newCaf() is
 254       called on a dynamically-loaded CAF, we add it to the CAF list
 255       instead of the old-generation mutable list, and save away its
 256       old info pointer (in caf->saved_info) for later reversion.
 257
 258       To revert all the CAFs, we traverse the CAF list and reset the
 259       info pointer to caf->saved_info, then throw away the CAF list.
 260       (see GC.c:revertCAFs()).
 261
 262       -- SDM 29/1/01
 263
 264    -------------------------------------------------------------------------- */
 265
 266 void
 267 newCAF(StgRegTable *reg, StgClosure* caf)
 268 {
 269   if(keepCAFs)
 270   {
 271     // HACK:
 272     // If we are in GHCi _and_ we are using dynamic libraries,
 273     // then we can't redirect newCAF calls to newDynCAF (see below),
 274     // so we make newCAF behave almost like newDynCAF.
 275     // The dynamic libraries might be used by both the interpreted
 276     // program and GHCi itself, so they must not be reverted.
 277     // This also means that in GHCi with dynamic libraries, CAFs are not
 278     // garbage collected. If this turns out to be a problem, we could
 279     // do another hack here and do an address range test on caf to figure
 280     // out whether it is from a dynamic library.
 281     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 282
 283     ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex
 284     ((StgIndStatic *)caf)->static_link = caf_list;
 285     caf_list = caf;
 286     RELEASE_SM_LOCK;
 287   }
 288   else
 289   {
 290     // Put this CAF on the mutable list for the old generation.
 291     ((StgIndStatic *)caf)->saved_info = NULL;
 292     if (oldest_gen->no != 0) {
 293         recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no);
 294     }
 295   }
 296 }
 297
 298 // External API for setting the keepCAFs flag. see #3900.
 299 void
 300 setKeepCAFs (void)
 301 {
 302     keepCAFs = 1;
 303 }
 304
 305 // An alternate version of newCaf which is used for dynamically loaded
 306 // object code in GHCi.  In this case we want to retain *all* CAFs in
 307 // the object code, because they might be demanded at any time from an
 308 // expression evaluated on the command line.
 309 // Also, GHCi might want to revert CAFs, so we add these to the
 310 // revertible_caf_list.
 311 //
 312 // The linker hackily arranges that references to newCaf from dynamic
 313 // code end up pointing to newDynCAF.
 314 void
 315 newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf)
 316 {
 317     ACQUIRE_SM_LOCK;
 318
 319     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 320     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 321     revertible_caf_list = caf;
 322
 323     RELEASE_SM_LOCK;
 324 }
 325
 326 /* -----------------------------------------------------------------------------
 327    Nursery management.
 328    -------------------------------------------------------------------------- */
 329
 330 static bdescr *
 331 allocNursery (bdescr *tail, nat blocks)
 332 {
 333     bdescr *bd = NULL;
 334     nat i, n;
 335
 336     // We allocate the nursery as a single contiguous block and then
 337     // divide it into single blocks manually.  This way we guarantee
 338     // that the nursery blocks are adjacent, so that the processor's
 339     // automatic prefetching works across nursery blocks.  This is a
 340     // tiny optimisation (~0.5%), but it's free.
 341
 342     while (blocks > 0) {
 343         n = stg_min(blocks, BLOCKS_PER_MBLOCK);
 344         blocks -= n;
 345
 346         bd = allocGroup(n);
 347         for (i = 0; i < n; i++) {
 348             initBdescr(&bd[i], g0, g0);
 349
 350             bd[i].blocks = 1;
 351             bd[i].flags = 0;
 352
 353             if (i > 0) {
 354                 bd[i].u.back = &bd[i-1];
 355             } else {
 356                 bd[i].u.back = NULL;
 357             }
 358
 359             if (i+1 < n) {
 360                 bd[i].link = &bd[i+1];
 361             } else {
 362                 bd[i].link = tail;
 363                 if (tail != NULL) {
 364                     tail->u.back = &bd[i];
 365                 }
 366             }
 367
 368             bd[i].free = bd[i].start;
 369         }
 370
 371         tail = &bd[0];
 372     }
 373
 374     return &bd[0];
 375 }
 376
 377 static void
 378 assignNurseriesToCapabilities (void)
 379 {
 380     nat i;
 381
 382     for (i = 0; i < n_capabilities; i++) {
 383         capabilities[i].r.rNursery        = &nurseries[i];
 384         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 385         capabilities[i].r.rCurrentAlloc   = NULL;
 386     }
 387 }
 388
 389 static void
 390 allocNurseries( void )
 391 {
 392     nat i;
 393
 394     for (i = 0; i < n_capabilities; i++) {
 395         nurseries[i].blocks =
 396             allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
 397         nurseries[i].n_blocks =
 398             RtsFlags.GcFlags.minAllocAreaSize;
 399     }
 400     assignNurseriesToCapabilities();
 401 }
 402
 403 lnat // words allocated
 404 clearNurseries (void)
 405 {
 406     lnat allocated = 0;
 407     nat i;
 408     bdescr *bd;
 409
 410     for (i = 0; i < n_capabilities; i++) {
 411         for (bd = nurseries[i].blocks; bd; bd = bd->link) {
 412             allocated += (lnat)(bd->free - bd->start);
 413             bd->free = bd->start;
 414             ASSERT(bd->gen_no == 0);
 415             ASSERT(bd->gen == g0);
 416             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 417         }
 418     }
 419
 420     return allocated;
 421 }
 422
 423 void
 424 resetNurseries (void)
 425 {
 426     assignNurseriesToCapabilities();
 427
 428 }
 429
 430 lnat
 431 countNurseryBlocks (void)
 432 {
 433     nat i;
 434     lnat blocks = 0;
 435
 436     for (i = 0; i < n_capabilities; i++) {
 437         blocks += nurseries[i].n_blocks;
 438     }
 439     return blocks;
 440 }
 441
 442 static void
 443 resizeNursery ( nursery *nursery, nat blocks )
 444 {
 445   bdescr *bd;
 446   nat nursery_blocks;
 447
 448   nursery_blocks = nursery->n_blocks;
 449   if (nursery_blocks == blocks) return;
 450
 451   if (nursery_blocks < blocks) {
 452       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
 453                  blocks);
 454     nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
 455   }
 456   else {
 457     bdescr *next_bd;
 458
 459     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
 460                blocks);
 461
 462     bd = nursery->blocks;
 463     while (nursery_blocks > blocks) {
 464         next_bd = bd->link;
 465         next_bd->u.back = NULL;
 466         nursery_blocks -= bd->blocks; // might be a large block
 467         freeGroup(bd);
 468         bd = next_bd;
 469     }
 470     nursery->blocks = bd;
 471     // might have gone just under, by freeing a large block, so make
 472     // up the difference.
 473     if (nursery_blocks < blocks) {
 474         nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
 475     }
 476   }
 477
 478   nursery->n_blocks = blocks;
 479   ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
 480 }
 481
 482 //
 483 // Resize each of the nurseries to the specified size.
 484 //
 485 void
 486 resizeNurseriesFixed (nat blocks)
 487 {
 488     nat i;
 489     for (i = 0; i < n_capabilities; i++) {
 490         resizeNursery(&nurseries[i], blocks);
 491     }
 492 }
 493
 494 //
 495 // Resize the nurseries to the total specified size.
 496 //
 497 void
 498 resizeNurseries (nat blocks)
 499 {
 500     // If there are multiple nurseries, then we just divide the number
 501     // of available blocks between them.
 502     resizeNurseriesFixed(blocks / n_capabilities);
 503 }
 504
 505
 506 /* -----------------------------------------------------------------------------
 507    move_STACK is called to update the TSO structure after it has been
 508    moved from one place to another.
 509    -------------------------------------------------------------------------- */
 510
 511 void
 512 move_STACK (StgStack *src, StgStack *dest)
 513 {
 514     ptrdiff_t diff;
 515
 516     // relocate the stack pointer...
 517     diff = (StgPtr)dest - (StgPtr)src; // In *words*
 518     dest->sp = (StgPtr)dest->sp + diff;
 519 }
 520
 521 /* -----------------------------------------------------------------------------
 522    allocate()
 523
 524    This allocates memory in the current thread - it is intended for
 525    use primarily from STG-land where we have a Capability.  It is
 526    better than allocate() because it doesn't require taking the
 527    sm_mutex lock in the common case.
 528
 529    Memory is allocated directly from the nursery if possible (but not
 530    from the current nursery block, so as not to interfere with
 531    Hp/HpLim).
 532    -------------------------------------------------------------------------- */
 533
 534 StgPtr
 535 allocate (Capability *cap, lnat n)
 536 {
 537     bdescr *bd;
 538     StgPtr p;
 539
 540     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 541         lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 542
 543         // Attempting to allocate an object larger than maxHeapSize
 544         // should definitely be disallowed.  (bug #1791)
 545         if (RtsFlags.GcFlags.maxHeapSize > 0 &&
 546             req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
 547             heapOverflow();
 548             // heapOverflow() doesn't exit (see #2592), but we aren't
 549             // in a position to do a clean shutdown here: we
 550             // either have to allocate the memory or exit now.
 551             // Allocating the memory would be bad, because the user
 552             // has requested that we not exceed maxHeapSize, so we
 553             // just exit.
 554             stg_exit(EXIT_HEAPOVERFLOW);
 555         }
 556
 557         ACQUIRE_SM_LOCK
 558         bd = allocGroup(req_blocks);
 559         dbl_link_onto(bd, &g0->large_objects);
 560         g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
 561         g0->n_new_large_words += n;
 562         RELEASE_SM_LOCK;
 563         initBdescr(bd, g0, g0);
 564         bd->flags = BF_LARGE;
 565         bd->free = bd->start + n;
 566         return bd->start;
 567     }
 568
 569     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 570
 571     TICK_ALLOC_HEAP_NOCTR(n);
 572     CCS_ALLOC(CCCS,n);
 573
 574     bd = cap->r.rCurrentAlloc;
 575     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 576
 577         // The CurrentAlloc block is full, we need to find another
 578         // one.  First, we try taking the next block from the
 579         // nursery:
 580         bd = cap->r.rCurrentNursery->link;
 581
 582         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 583             // The nursery is empty, or the next block is already
 584             // full: allocate a fresh block (we can't fail here).
 585             ACQUIRE_SM_LOCK;
 586             bd = allocBlock();
 587             cap->r.rNursery->n_blocks++;
 588             RELEASE_SM_LOCK;
 589             initBdescr(bd, g0, g0);
 590             bd->flags = 0;
 591             // If we had to allocate a new block, then we'll GC
 592             // pretty quickly now, because MAYBE_GC() will
 593             // notice that CurrentNursery->link is NULL.
 594         } else {
 595             // we have a block in the nursery: take it and put
 596             // it at the *front* of the nursery list, and use it
 597             // to allocate() from.
 598             cap->r.rCurrentNursery->link = bd->link;
 599             if (bd->link != NULL) {
 600                 bd->link->u.back = cap->r.rCurrentNursery;
 601             }
 602         }
 603         dbl_link_onto(bd, &cap->r.rNursery->blocks);
 604         cap->r.rCurrentAlloc = bd;
 605         IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 606     }
 607     p = bd->free;
 608     bd->free += n;
 609
 610     IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
 611     return p;
 612 }
 613
 614 /* ---------------------------------------------------------------------------
 615    Allocate a fixed/pinned object.
 616
 617    We allocate small pinned objects into a single block, allocating a
 618    new block when the current one overflows.  The block is chained
 619    onto the large_object_list of generation 0.
 620
 621    NOTE: The GC can't in general handle pinned objects.  This
 622    interface is only safe to use for ByteArrays, which have no
 623    pointers and don't require scavenging.  It works because the
 624    block's descriptor has the BF_LARGE flag set, so the block is
 625    treated as a large object and chained onto various lists, rather
 626    than the individual objects being copied.  However, when it comes
 627    to scavenge the block, the GC will only scavenge the first object.
 628    The reason is that the GC can't linearly scan a block of pinned
 629    objects at the moment (doing so would require using the
 630    mostly-copying techniques).  But since we're restricting ourselves
 631    to pinned ByteArrays, not scavenging is ok.
 632
 633    This function is called by newPinnedByteArray# which immediately
 634    fills the allocated memory with a MutableByteArray#.
 635    ------------------------------------------------------------------------- */
 636
 637 StgPtr
 638 allocatePinned (Capability *cap, lnat n)
 639 {
 640     StgPtr p;
 641     bdescr *bd;
 642
 643     // If the request is for a large object, then allocate()
 644     // will give us a pinned object anyway.
 645     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 646         p = allocate(cap, n);
 647         Bdescr(p)->flags |= BF_PINNED;
 648         return p;
 649     }
 650
 651     TICK_ALLOC_HEAP_NOCTR(n);
 652     CCS_ALLOC(CCCS,n);
 653
 654     bd = cap->pinned_object_block;
 655
 656     // If we don't have a block of pinned objects yet, or the current
 657     // one isn't large enough to hold the new object, allocate a new one.
 658     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 659         ACQUIRE_SM_LOCK;
 660         cap->pinned_object_block = bd = allocBlock();
 661         dbl_link_onto(bd, &g0->large_objects);
 662         g0->n_large_blocks++;
 663         RELEASE_SM_LOCK;
 664         initBdescr(bd, g0, g0);
 665         bd->flags  = BF_PINNED | BF_LARGE;
 666         bd->free   = bd->start;
 667     }
 668
 669     g0->n_new_large_words += n;
 670     p = bd->free;
 671     bd->free += n;
 672     return p;
 673 }
 674
 675 /* -----------------------------------------------------------------------------
 676    Write Barriers
 677    -------------------------------------------------------------------------- */
 678
 679 /*
 680    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 681    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 682    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 683    and is put on the mutable list.
 684 */
 685 void
 686 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 687 {
 688     Capability *cap = regTableToCapability(reg);
 689     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 690         p->header.info = &stg_MUT_VAR_DIRTY_info;
 691         recordClosureMutated(cap,p);
 692     }
 693 }
 694
 695 // Setting a TSO's link field with a write barrier.
 696 // It is *not* necessary to call this function when
 697 //    * setting the link field to END_TSO_QUEUE
 698 //    * putting a TSO on the blackhole_queue
 699 //    * setting the link field of the currently running TSO, as it
 700 //      will already be dirty.
 701 void
 702 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
 703 {
 704     if (tso->dirty == 0) {
 705         tso->dirty = 1;
 706         recordClosureMutated(cap,(StgClosure*)tso);
 707     }
 708     tso->_link = target;
 709 }
 710
 711 void
 712 setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
 713 {
 714     if (tso->dirty == 0) {
 715         tso->dirty = 1;
 716         recordClosureMutated(cap,(StgClosure*)tso);
 717     }
 718     tso->block_info.prev = target;
 719 }
 720
 721 void
 722 dirty_TSO (Capability *cap, StgTSO *tso)
 723 {
 724     if (tso->dirty == 0) {
 725         tso->dirty = 1;
 726         recordClosureMutated(cap,(StgClosure*)tso);
 727     }
 728 }
 729
 730 void
 731 dirty_STACK (Capability *cap, StgStack *stack)
 732 {
 733     if (stack->dirty == 0) {
 734         stack->dirty = 1;
 735         recordClosureMutated(cap,(StgClosure*)stack);
 736     }
 737 }
 738
 739 /*
 740    This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
 741    on the mutable list; a MVAR_DIRTY is.  When written to, a
 742    MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
 743    The check for MVAR_CLEAN is inlined at the call site for speed,
 744    this really does make a difference on concurrency-heavy benchmarks
 745    such as Chaneneos and cheap-concurrency.
 746 */
 747 void
 748 dirty_MVAR(StgRegTable *reg, StgClosure *p)
 749 {
 750     recordClosureMutated(regTableToCapability(reg),p);
 751 }
 752
 753 /* -----------------------------------------------------------------------------
 754  * Stats and stuff
 755  * -------------------------------------------------------------------------- */
 756
 757 /* -----------------------------------------------------------------------------
 758  * calcAllocated()
 759  *
 760  * Approximate how much we've allocated: number of blocks in the
 761  * nursery + blocks allocated via allocate() - unused nusery blocks.
 762  * This leaves a little slop at the end of each block.
 763  * -------------------------------------------------------------------------- */
 764
 765 lnat
 766 calcAllocated (rtsBool include_nurseries)
 767 {
 768   nat allocated = 0;
 769   bdescr *bd;
 770   nat i;
 771
 772   // When called from GC.c, we already have the allocation count for
 773   // the nursery from resetNurseries(), so we don't need to walk
 774   // through these block lists again.
 775   if (include_nurseries)
 776   {
 777       for (i = 0; i < n_capabilities; i++) {
 778           for (bd = nurseries[i].blocks; bd; bd = bd->link) {
 779               allocated += (lnat)(bd->free - bd->start);
 780           }
 781       }
 782   }
 783
 784   // add in sizes of new large and pinned objects
 785   allocated += g0->n_new_large_words;
 786
 787   return allocated;
 788 }
 789
 790 /* Approximate the amount of live data in the heap.  To be called just
 791  * after garbage collection (see GarbageCollect()).
 792  */
 793 lnat calcLiveBlocks (void)
 794 {
 795   nat g;
 796   lnat live = 0;
 797   generation *gen;
 798
 799   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 800       /* approximate amount of live data (doesn't take into account slop
 801        * at end of each block).
 802        */
 803       gen = &generations[g];
 804       live += gen->n_large_blocks + gen->n_blocks;
 805   }
 806   return live;
 807 }
 808
 809 lnat countOccupied (bdescr *bd)
 810 {
 811     lnat words;
 812
 813     words = 0;
 814     for (; bd != NULL; bd = bd->link) {
 815         ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
 816         words += bd->free - bd->start;
 817     }
 818     return words;
 819 }
 820
 821 // Return an accurate count of the live data in the heap, excluding
 822 // generation 0.
 823 lnat calcLiveWords (void)
 824 {
 825     nat g;
 826     lnat live;
 827     generation *gen;
 828
 829     live = 0;
 830     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 831         gen = &generations[g];
 832         live += gen->n_words + countOccupied(gen->large_objects);
 833     }
 834     return live;
 835 }
 836
 837 /* Approximate the number of blocks that will be needed at the next
 838  * garbage collection.
 839  *
 840  * Assume: all data currently live will remain live.  Generationss
 841  * that will be collected next time will therefore need twice as many
 842  * blocks since all the data will be copied.
 843  */
 844 extern lnat
 845 calcNeeded(void)
 846 {
 847     lnat needed = 0;
 848     nat g;
 849     generation *gen;
 850
 851     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 852         gen = &generations[g];
 853
 854         // we need at least this much space
 855         needed += gen->n_blocks + gen->n_large_blocks;
 856
 857         // any additional space needed to collect this gen next time?
 858         if (g == 0 || // always collect gen 0
 859             (gen->n_blocks + gen->n_large_blocks > gen->max_blocks)) {
 860             // we will collect this gen next time
 861             if (gen->mark) {
 862                 //  bitmap:
 863                 needed += gen->n_blocks / BITS_IN(W_);
 864                 //  mark stack:
 865                 needed += gen->n_blocks / 100;
 866             }
 867             if (gen->compact) {
 868                 continue; // no additional space needed for compaction
 869             } else {
 870                 needed += gen->n_blocks;
 871             }
 872         }
 873     }
 874     return needed;
 875 }
 876
 877 /* ----------------------------------------------------------------------------
 878    Executable memory
 879
 880    Executable memory must be managed separately from non-executable
 881    memory.  Most OSs these days require you to jump through hoops to
 882    dynamically allocate executable memory, due to various security
 883    measures.
 884
 885    Here we provide a small memory allocator for executable memory.
 886    Memory is managed with a page granularity; we allocate linearly
 887    in the page, and when the page is emptied (all objects on the page
 888    are free) we free the page again, not forgetting to make it
 889    non-executable.
 890
 891    TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
 892          the linker cannot use allocateExec for loading object code files
 893          on Windows. Once allocateExec can handle larger objects, the linker
 894          should be modified to use allocateExec instead of VirtualAlloc.
 895    ------------------------------------------------------------------------- */
 896
 897 #if defined(linux_HOST_OS)
 898
 899 // On Linux we need to use libffi for allocating executable memory,
 900 // because it knows how to work around the restrictions put in place
 901 // by SELinux.
 902
 903 void *allocateExec (nat bytes, void **exec_ret)
 904 {
 905     void **ret, **exec;
 906     ACQUIRE_SM_LOCK;
 907     ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
 908     RELEASE_SM_LOCK;
 909     if (ret == NULL) return ret;
 910     *ret = ret; // save the address of the writable mapping, for freeExec().
 911     *exec_ret = exec + 1;
 912     return (ret + 1);
 913 }
 914
 915 // freeExec gets passed the executable address, not the writable address.
 916 void freeExec (void *addr)
 917 {
 918     void *writable;
 919     writable = *((void**)addr - 1);
 920     ACQUIRE_SM_LOCK;
 921     ffi_closure_free (writable);
 922     RELEASE_SM_LOCK
 923 }
 924
 925 #else
 926
 927 void *allocateExec (nat bytes, void **exec_ret)
 928 {
 929     void *ret;
 930     nat n;
 931
 932     ACQUIRE_SM_LOCK;
 933
 934     // round up to words.
 935     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
 936
 937     if (n+1 > BLOCK_SIZE_W) {
 938         barf("allocateExec: can't handle large objects");
 939     }
 940
 941     if (exec_block == NULL ||
 942         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
 943         bdescr *bd;
 944         lnat pagesize = getPageSize();
 945         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
 946         debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
 947         bd->gen_no = 0;
 948         bd->flags = BF_EXEC;
 949         bd->link = exec_block;
 950         if (exec_block != NULL) {
 951             exec_block->u.back = bd;
 952         }
 953         bd->u.back = NULL;
 954         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
 955         exec_block = bd;
 956     }
 957     *(exec_block->free) = n;  // store the size of this chunk
 958     exec_block->gen_no += n;  // gen_no stores the number of words allocated
 959     ret = exec_block->free + 1;
 960     exec_block->free += n + 1;
 961
 962     RELEASE_SM_LOCK
 963     *exec_ret = ret;
 964     return ret;
 965 }
 966
 967 void freeExec (void *addr)
 968 {
 969     StgPtr p = (StgPtr)addr - 1;
 970     bdescr *bd = Bdescr((StgPtr)p);
 971
 972     if ((bd->flags & BF_EXEC) == 0) {
 973         barf("freeExec: not executable");
 974     }
 975
 976     if (*(StgPtr)p == 0) {
 977         barf("freeExec: already free?");
 978     }
 979
 980     ACQUIRE_SM_LOCK;
 981
 982     bd->gen_no -= *(StgPtr)p;
 983     *(StgPtr)p = 0;
 984
 985     if (bd->gen_no == 0) {
 986         // Free the block if it is empty, but not if it is the block at
 987         // the head of the queue.
 988         if (bd != exec_block) {
 989             debugTrace(DEBUG_gc, "free exec block %p", bd->start);
 990             dbl_link_remove(bd, &exec_block);
 991             setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
 992             freeGroup(bd);
 993         } else {
 994             bd->free = bd->start;
 995         }
 996     }
 997
 998     RELEASE_SM_LOCK
 999 }
1000
1001 #endif /* mingw32_HOST_OS */
1002
1003 #ifdef DEBUG
1004
1005 // handy function for use in gdb, because Bdescr() is inlined.
1006 extern bdescr *_bdescr( StgPtr p );
1007
1008 bdescr *
1009 _bdescr( StgPtr p )
1010 {
1011     return Bdescr(p);
1012 }
1013
1014 #endif