rts/Storage.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team, 1998-2004
   4  *
   5  * Storage manager front end
   6  *
   7  * ---------------------------------------------------------------------------*/
   8
   9 #include "PosixSource.h"
  10 #include "Rts.h"
  11 #include "RtsUtils.h"
  12 #include "RtsFlags.h"
  13 #include "Stats.h"
  14 #include "Hooks.h"
  15 #include "BlockAlloc.h"
  16 #include "MBlock.h"
  17 #include "Weak.h"
  18 #include "Sanity.h"
  19 #include "Arena.h"
  20 #include "OSThreads.h"
  21 #include "Capability.h"
  22 #include "Storage.h"
  23 #include "Schedule.h"
  24 #include "RetainerProfile.h"    // for counting memory blocks (memInventory)
  25 #include "OSMem.h"
  26
  27 #include <stdlib.h>
  28 #include <string.h>
  29
  30 /*
  31  * All these globals require sm_mutex to access in THREADED_RTS mode.
  32  */
  33 StgClosure    *caf_list         = NULL;
  34 StgClosure    *revertible_caf_list = NULL;
  35 rtsBool       keepCAFs;
  36
  37 bdescr *small_alloc_list;       /* allocate()d small objects */
  38 bdescr *pinned_object_block;    /* allocate pinned objects into this block */
  39 nat alloc_blocks;               /* number of allocate()d blocks since GC */
  40 nat alloc_blocks_lim;           /* approximate limit on alloc_blocks */
  41
  42 StgPtr alloc_Hp    = NULL;      /* next free byte in small_alloc_list */
  43 StgPtr alloc_HpLim = NULL;      /* end of block at small_alloc_list   */
  44
  45 generation *generations = NULL; /* all the generations */
  46 generation *g0          = NULL; /* generation 0, for convenience */
  47 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
  48 step *g0s0              = NULL; /* generation 0, step 0, for convenience */
  49
  50 ullong total_allocated = 0;     /* total memory allocated during run */
  51
  52 nat n_nurseries         = 0;    /* == RtsFlags.ParFlags.nNodes, convenience */
  53 step *nurseries         = NULL; /* array of nurseries, >1 only if THREADED_RTS */
  54
  55 #ifdef THREADED_RTS
  56 /*
  57  * Storage manager mutex:  protects all the above state from
  58  * simultaneous access by two STG threads.
  59  */
  60 Mutex sm_mutex;
  61 /*
  62  * This mutex is used by atomicModifyMutVar# only
  63  */
  64 Mutex atomic_modify_mutvar_mutex;
  65 #endif
  66
  67
  68 /*
  69  * Forward references
  70  */
  71 static void *stgAllocForGMP   (size_t size_in_bytes);
  72 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
  73 static void  stgDeallocForGMP (void *ptr, size_t size);
  74
  75 static void
  76 initStep (step *stp, int g, int s)
  77 {
  78     stp->no = s;
  79     stp->blocks = NULL;
  80     stp->n_blocks = 0;
  81     stp->old_blocks = NULL;
  82     stp->n_old_blocks = 0;
  83     stp->gen = &generations[g];
  84     stp->gen_no = g;
  85     stp->hp = NULL;
  86     stp->hpLim = NULL;
  87     stp->hp_bd = NULL;
  88     stp->scavd_hp = NULL;
  89     stp->scavd_hpLim = NULL;
  90     stp->scan = NULL;
  91     stp->scan_bd = NULL;
  92     stp->large_objects = NULL;
  93     stp->n_large_blocks = 0;
  94     stp->new_large_objects = NULL;
  95     stp->scavenged_large_objects = NULL;
  96     stp->n_scavenged_large_blocks = 0;
  97     stp->is_compacted = 0;
  98     stp->bitmap = NULL;
  99 }
 100
 101 void
 102 initStorage( void )
 103 {
 104   nat g, s;
 105   generation *gen;
 106
 107   if (generations != NULL) {
 108       // multi-init protection
 109       return;
 110   }
 111
 112   /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
 113    * doing something reasonable.
 114    */
 115   ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info));
 116   ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
 117   ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
 118
 119   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 120       RtsFlags.GcFlags.heapSizeSuggestion >
 121       RtsFlags.GcFlags.maxHeapSize) {
 122     RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
 123   }
 124
 125   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
 126       RtsFlags.GcFlags.minAllocAreaSize >
 127       RtsFlags.GcFlags.maxHeapSize) {
 128       errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
 129       exit(1);
 130   }
 131
 132   initBlockAllocator();
 133
 134 #if defined(THREADED_RTS)
 135   initMutex(&sm_mutex);
 136   initMutex(&atomic_modify_mutvar_mutex);
 137 #endif
 138
 139   ACQUIRE_SM_LOCK;
 140
 141   /* allocate generation info array */
 142   generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
 143                                              * sizeof(struct generation_),
 144                                              "initStorage: gens");
 145
 146   /* Initialise all generations */
 147   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
 148     gen = &generations[g];
 149     gen->no = g;
 150     gen->mut_list = allocBlock();
 151     gen->collections = 0;
 152     gen->failed_promotions = 0;
 153     gen->max_blocks = 0;
 154   }
 155
 156   /* A couple of convenience pointers */
 157   g0 = &generations[0];
 158   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 159
 160   /* Allocate step structures in each generation */
 161   if (RtsFlags.GcFlags.generations > 1) {
 162     /* Only for multiple-generations */
 163
 164     /* Oldest generation: one step */
 165     oldest_gen->n_steps = 1;
 166     oldest_gen->steps =
 167       stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step");
 168
 169     /* set up all except the oldest generation with 2 steps */
 170     for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 171       generations[g].n_steps = RtsFlags.GcFlags.steps;
 172       generations[g].steps  =
 173         stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_),
 174                         "initStorage: steps");
 175     }
 176
 177   } else {
 178     /* single generation, i.e. a two-space collector */
 179     g0->n_steps = 1;
 180     g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
 181   }
 182
 183 #ifdef THREADED_RTS
 184   n_nurseries = n_capabilities;
 185   nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
 186                               "initStorage: nurseries");
 187 #else
 188   n_nurseries = 1;
 189   nurseries = g0->steps; // just share nurseries[0] with g0s0
 190 #endif
 191
 192   /* Initialise all steps */
 193   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 194     for (s = 0; s < generations[g].n_steps; s++) {
 195         initStep(&generations[g].steps[s], g, s);
 196     }
 197   }
 198
 199 #ifdef THREADED_RTS
 200   for (s = 0; s < n_nurseries; s++) {
 201       initStep(&nurseries[s], 0, s);
 202   }
 203 #endif
 204
 205   /* Set up the destination pointers in each younger gen. step */
 206   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
 207     for (s = 0; s < generations[g].n_steps-1; s++) {
 208       generations[g].steps[s].to = &generations[g].steps[s+1];
 209     }
 210     generations[g].steps[s].to = &generations[g+1].steps[0];
 211   }
 212   oldest_gen->steps[0].to = &oldest_gen->steps[0];
 213
 214 #ifdef THREADED_RTS
 215   for (s = 0; s < n_nurseries; s++) {
 216       nurseries[s].to = generations[0].steps[0].to;
 217   }
 218 #endif
 219
 220   /* The oldest generation has one step. */
 221   if (RtsFlags.GcFlags.compact) {
 222       if (RtsFlags.GcFlags.generations == 1) {
 223           errorBelch("WARNING: compaction is incompatible with -G1; disabled");
 224       } else {
 225           oldest_gen->steps[0].is_compacted = 1;
 226       }
 227   }
 228
 229 #ifdef THREADED_RTS
 230   if (RtsFlags.GcFlags.generations == 1) {
 231       errorBelch("-G1 is incompatible with -threaded");
 232       stg_exit(EXIT_FAILURE);
 233   }
 234 #endif
 235
 236   /* generation 0 is special: that's the nursery */
 237   generations[0].max_blocks = 0;
 238
 239   /* G0S0: the allocation area.  Policy: keep the allocation area
 240    * small to begin with, even if we have a large suggested heap
 241    * size.  Reason: we're going to do a major collection first, and we
 242    * don't want it to be a big one.  This vague idea is borne out by
 243    * rigorous experimental evidence.
 244    */
 245   g0s0 = &generations[0].steps[0];
 246
 247   allocNurseries();
 248
 249   weak_ptr_list = NULL;
 250   caf_list = NULL;
 251   revertible_caf_list = NULL;
 252
 253   /* initialise the allocate() interface */
 254   small_alloc_list = NULL;
 255   alloc_blocks = 0;
 256   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 257
 258   /* Tell GNU multi-precision pkg about our custom alloc functions */
 259   mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
 260
 261   IF_DEBUG(gc, statDescribeGens());
 262
 263   RELEASE_SM_LOCK;
 264 }
 265
 266 void
 267 exitStorage (void)
 268 {
 269     stat_exit(calcAllocated());
 270 }
 271
 272 void
 273 freeStorage (void)
 274 {
 275     freeAllMBlocks();
 276 }
 277
 278 /* -----------------------------------------------------------------------------
 279    CAF management.
 280
 281    The entry code for every CAF does the following:
 282
 283       - builds a CAF_BLACKHOLE in the heap
 284       - pushes an update frame pointing to the CAF_BLACKHOLE
 285       - invokes UPD_CAF(), which:
 286           - calls newCaf, below
 287           - updates the CAF with a static indirection to the CAF_BLACKHOLE
 288
 289    Why do we build a BLACKHOLE in the heap rather than just updating
 290    the thunk directly?  It's so that we only need one kind of update
 291    frame - otherwise we'd need a static version of the update frame too.
 292
 293    newCaf() does the following:
 294
 295       - it puts the CAF on the oldest generation's mut-once list.
 296         This is so that we can treat the CAF as a root when collecting
 297         younger generations.
 298
 299    For GHCI, we have additional requirements when dealing with CAFs:
 300
 301       - we must *retain* all dynamically-loaded CAFs ever entered,
 302         just in case we need them again.
 303       - we must be able to *revert* CAFs that have been evaluated, to
 304         their pre-evaluated form.
 305
 306       To do this, we use an additional CAF list.  When newCaf() is
 307       called on a dynamically-loaded CAF, we add it to the CAF list
 308       instead of the old-generation mutable list, and save away its
 309       old info pointer (in caf->saved_info) for later reversion.
 310
 311       To revert all the CAFs, we traverse the CAF list and reset the
 312       info pointer to caf->saved_info, then throw away the CAF list.
 313       (see GC.c:revertCAFs()).
 314
 315       -- SDM 29/1/01
 316
 317    -------------------------------------------------------------------------- */
 318
 319 void
 320 newCAF(StgClosure* caf)
 321 {
 322   ACQUIRE_SM_LOCK;
 323
 324   if(keepCAFs)
 325   {
 326     // HACK:
 327     // If we are in GHCi _and_ we are using dynamic libraries,
 328     // then we can't redirect newCAF calls to newDynCAF (see below),
 329     // so we make newCAF behave almost like newDynCAF.
 330     // The dynamic libraries might be used by both the interpreted
 331     // program and GHCi itself, so they must not be reverted.
 332     // This also means that in GHCi with dynamic libraries, CAFs are not
 333     // garbage collected. If this turns out to be a problem, we could
 334     // do another hack here and do an address range test on caf to figure
 335     // out whether it is from a dynamic library.
 336     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 337     ((StgIndStatic *)caf)->static_link = caf_list;
 338     caf_list = caf;
 339   }
 340   else
 341   {
 342     /* Put this CAF on the mutable list for the old generation.
 343     * This is a HACK - the IND_STATIC closure doesn't really have
 344     * a mut_link field, but we pretend it has - in fact we re-use
 345     * the STATIC_LINK field for the time being, because when we
 346     * come to do a major GC we won't need the mut_link field
 347     * any more and can use it as a STATIC_LINK.
 348     */
 349     ((StgIndStatic *)caf)->saved_info = NULL;
 350     recordMutableGen(caf, oldest_gen);
 351   }
 352
 353   RELEASE_SM_LOCK;
 354
 355 #ifdef PAR
 356   /* If we are PAR or DIST then  we never forget a CAF */
 357   { globalAddr *newGA;
 358     //debugBelch("<##> Globalising CAF %08x %s",caf,info_type(caf));
 359     newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
 360     ASSERT(newGA);
 361   }
 362 #endif /* PAR */
 363 }
 364
 365 // An alternate version of newCaf which is used for dynamically loaded
 366 // object code in GHCi.  In this case we want to retain *all* CAFs in
 367 // the object code, because they might be demanded at any time from an
 368 // expression evaluated on the command line.
 369 // Also, GHCi might want to revert CAFs, so we add these to the
 370 // revertible_caf_list.
 371 //
 372 // The linker hackily arranges that references to newCaf from dynamic
 373 // code end up pointing to newDynCAF.
 374 void
 375 newDynCAF(StgClosure *caf)
 376 {
 377     ACQUIRE_SM_LOCK;
 378
 379     ((StgIndStatic *)caf)->saved_info  = (StgInfoTable *)caf->header.info;
 380     ((StgIndStatic *)caf)->static_link = revertible_caf_list;
 381     revertible_caf_list = caf;
 382
 383     RELEASE_SM_LOCK;
 384 }
 385
 386 /* -----------------------------------------------------------------------------
 387    Nursery management.
 388    -------------------------------------------------------------------------- */
 389
 390 static bdescr *
 391 allocNursery (step *stp, bdescr *tail, nat blocks)
 392 {
 393     bdescr *bd;
 394     nat i;
 395
 396     // Allocate a nursery: we allocate fresh blocks one at a time and
 397     // cons them on to the front of the list, not forgetting to update
 398     // the back pointer on the tail of the list to point to the new block.
 399     for (i=0; i < blocks; i++) {
 400         // @LDV profiling
 401         /*
 402           processNursery() in LdvProfile.c assumes that every block group in
 403           the nursery contains only a single block. So, if a block group is
 404           given multiple blocks, change processNursery() accordingly.
 405         */
 406         bd = allocBlock();
 407         bd->link = tail;
 408         // double-link the nursery: we might need to insert blocks
 409         if (tail != NULL) {
 410             tail->u.back = bd;
 411         }
 412         bd->step = stp;
 413         bd->gen_no = 0;
 414         bd->flags = 0;
 415         bd->free = bd->start;
 416         tail = bd;
 417     }
 418     tail->u.back = NULL;
 419     return tail;
 420 }
 421
 422 static void
 423 assignNurseriesToCapabilities (void)
 424 {
 425 #ifdef THREADED_RTS
 426     nat i;
 427
 428     for (i = 0; i < n_nurseries; i++) {
 429         capabilities[i].r.rNursery        = &nurseries[i];
 430         capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
 431         capabilities[i].r.rCurrentAlloc   = NULL;
 432     }
 433 #else /* THREADED_RTS */
 434     MainCapability.r.rNursery        = &nurseries[0];
 435     MainCapability.r.rCurrentNursery = nurseries[0].blocks;
 436     MainCapability.r.rCurrentAlloc   = NULL;
 437 #endif
 438 }
 439
 440 void
 441 allocNurseries( void )
 442 {
 443     nat i;
 444
 445     for (i = 0; i < n_nurseries; i++) {
 446         nurseries[i].blocks =
 447             allocNursery(&nurseries[i], NULL,
 448                          RtsFlags.GcFlags.minAllocAreaSize);
 449         nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
 450         nurseries[i].old_blocks   = NULL;
 451         nurseries[i].n_old_blocks = 0;
 452         /* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
 453     }
 454     assignNurseriesToCapabilities();
 455 }
 456
 457 void
 458 resetNurseries( void )
 459 {
 460     nat i;
 461     bdescr *bd;
 462     step *stp;
 463
 464     for (i = 0; i < n_nurseries; i++) {
 465         stp = &nurseries[i];
 466         for (bd = stp->blocks; bd; bd = bd->link) {
 467             bd->free = bd->start;
 468             ASSERT(bd->gen_no == 0);
 469             ASSERT(bd->step == stp);
 470             IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 471         }
 472     }
 473     assignNurseriesToCapabilities();
 474 }
 475
 476 lnat
 477 countNurseryBlocks (void)
 478 {
 479     nat i;
 480     lnat blocks = 0;
 481
 482     for (i = 0; i < n_nurseries; i++) {
 483         blocks += nurseries[i].n_blocks;
 484     }
 485     return blocks;
 486 }
 487
 488 static void
 489 resizeNursery ( step *stp, nat blocks )
 490 {
 491   bdescr *bd;
 492   nat nursery_blocks;
 493
 494   nursery_blocks = stp->n_blocks;
 495   if (nursery_blocks == blocks) return;
 496
 497   if (nursery_blocks < blocks) {
 498     IF_DEBUG(gc, debugBelch("Increasing size of nursery to %d blocks\n",
 499                          blocks));
 500     stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 501   }
 502   else {
 503     bdescr *next_bd;
 504
 505     IF_DEBUG(gc, debugBelch("Decreasing size of nursery to %d blocks\n",
 506                          blocks));
 507
 508     bd = stp->blocks;
 509     while (nursery_blocks > blocks) {
 510         next_bd = bd->link;
 511         next_bd->u.back = NULL;
 512         nursery_blocks -= bd->blocks; // might be a large block
 513         freeGroup(bd);
 514         bd = next_bd;
 515     }
 516     stp->blocks = bd;
 517     // might have gone just under, by freeing a large block, so make
 518     // up the difference.
 519     if (nursery_blocks < blocks) {
 520         stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
 521     }
 522   }
 523
 524   stp->n_blocks = blocks;
 525   ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 526 }
 527
 528 //
 529 // Resize each of the nurseries to the specified size.
 530 //
 531 void
 532 resizeNurseriesFixed (nat blocks)
 533 {
 534     nat i;
 535     for (i = 0; i < n_nurseries; i++) {
 536         resizeNursery(&nurseries[i], blocks);
 537     }
 538 }
 539
 540 //
 541 // Resize the nurseries to the total specified size.
 542 //
 543 void
 544 resizeNurseries (nat blocks)
 545 {
 546     // If there are multiple nurseries, then we just divide the number
 547     // of available blocks between them.
 548     resizeNurseriesFixed(blocks / n_nurseries);
 549 }
 550
 551 /* -----------------------------------------------------------------------------
 552    The allocate() interface
 553
 554    allocate(n) always succeeds, and returns a chunk of memory n words
 555    long.  n can be larger than the size of a block if necessary, in
 556    which case a contiguous block group will be allocated.
 557    -------------------------------------------------------------------------- */
 558
 559 StgPtr
 560 allocate( nat n )
 561 {
 562     bdescr *bd;
 563     StgPtr p;
 564
 565     ACQUIRE_SM_LOCK;
 566
 567     TICK_ALLOC_HEAP_NOCTR(n);
 568     CCS_ALLOC(CCCS,n);
 569
 570     /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 571     /* ToDo: allocate directly into generation 1 */
 572     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 573         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 574         bd = allocGroup(req_blocks);
 575         dbl_link_onto(bd, &g0s0->large_objects);
 576         g0s0->n_large_blocks += req_blocks;
 577         bd->gen_no  = 0;
 578         bd->step = g0s0;
 579         bd->flags = BF_LARGE;
 580         bd->free = bd->start + n;
 581         alloc_blocks += req_blocks;
 582         RELEASE_SM_LOCK;
 583         return bd->start;
 584
 585         /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 586     } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
 587         if (small_alloc_list) {
 588             small_alloc_list->free = alloc_Hp;
 589         }
 590         bd = allocBlock();
 591         bd->link = small_alloc_list;
 592         small_alloc_list = bd;
 593         bd->gen_no = 0;
 594         bd->step = g0s0;
 595         bd->flags = 0;
 596         alloc_Hp = bd->start;
 597         alloc_HpLim = bd->start + BLOCK_SIZE_W;
 598         alloc_blocks++;
 599     }
 600
 601     p = alloc_Hp;
 602     alloc_Hp += n;
 603     RELEASE_SM_LOCK;
 604     return p;
 605 }
 606
 607 lnat
 608 allocated_bytes( void )
 609 {
 610     lnat allocated;
 611
 612     allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
 613     if (pinned_object_block != NULL) {
 614         allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
 615             pinned_object_block->free;
 616     }
 617
 618     return allocated;
 619 }
 620
 621 void
 622 tidyAllocateLists (void)
 623 {
 624     if (small_alloc_list != NULL) {
 625         ASSERT(alloc_Hp >= small_alloc_list->start &&
 626                alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
 627         small_alloc_list->free = alloc_Hp;
 628     }
 629 }
 630
 631 /* -----------------------------------------------------------------------------
 632    allocateLocal()
 633
 634    This allocates memory in the current thread - it is intended for
 635    use primarily from STG-land where we have a Capability.  It is
 636    better than allocate() because it doesn't require taking the
 637    sm_mutex lock in the common case.
 638
 639    Memory is allocated directly from the nursery if possible (but not
 640    from the current nursery block, so as not to interfere with
 641    Hp/HpLim).
 642    -------------------------------------------------------------------------- */
 643
 644 StgPtr
 645 allocateLocal (Capability *cap, nat n)
 646 {
 647     bdescr *bd;
 648     StgPtr p;
 649
 650     TICK_ALLOC_HEAP_NOCTR(n);
 651     CCS_ALLOC(CCCS,n);
 652
 653     /* big allocation (>LARGE_OBJECT_THRESHOLD) */
 654     /* ToDo: allocate directly into generation 1 */
 655     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 656         nat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
 657         ACQUIRE_SM_LOCK;
 658         bd = allocGroup(req_blocks);
 659         dbl_link_onto(bd, &g0s0->large_objects);
 660         g0s0->n_large_blocks += req_blocks;
 661         bd->gen_no  = 0;
 662         bd->step = g0s0;
 663         bd->flags = BF_LARGE;
 664         bd->free = bd->start + n;
 665         alloc_blocks += req_blocks;
 666         RELEASE_SM_LOCK;
 667         return bd->start;
 668
 669         /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 670     } else {
 671
 672         bd = cap->r.rCurrentAlloc;
 673         if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 674
 675             // The CurrentAlloc block is full, we need to find another
 676             // one.  First, we try taking the next block from the
 677             // nursery:
 678             bd = cap->r.rCurrentNursery->link;
 679
 680             if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 681                 // The nursery is empty, or the next block is already
 682                 // full: allocate a fresh block (we can't fail here).
 683                 ACQUIRE_SM_LOCK;
 684                 bd = allocBlock();
 685                 cap->r.rNursery->n_blocks++;
 686                 RELEASE_SM_LOCK;
 687                 bd->gen_no = 0;
 688                 bd->step = cap->r.rNursery;
 689                 bd->flags = 0;
 690             } else {
 691                 // we have a block in the nursery: take it and put
 692                 // it at the *front* of the nursery list, and use it
 693                 // to allocate() from.
 694                 cap->r.rCurrentNursery->link = bd->link;
 695                 if (bd->link != NULL) {
 696                     bd->link->u.back = cap->r.rCurrentNursery;
 697                 }
 698             }
 699             dbl_link_onto(bd, &cap->r.rNursery->blocks);
 700             cap->r.rCurrentAlloc = bd;
 701             IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
 702         }
 703     }
 704     p = bd->free;
 705     bd->free += n;
 706     return p;
 707 }
 708
 709 /* ---------------------------------------------------------------------------
 710    Allocate a fixed/pinned object.
 711
 712    We allocate small pinned objects into a single block, allocating a
 713    new block when the current one overflows.  The block is chained
 714    onto the large_object_list of generation 0 step 0.
 715
 716    NOTE: The GC can't in general handle pinned objects.  This
 717    interface is only safe to use for ByteArrays, which have no
 718    pointers and don't require scavenging.  It works because the
 719    block's descriptor has the BF_LARGE flag set, so the block is
 720    treated as a large object and chained onto various lists, rather
 721    than the individual objects being copied.  However, when it comes
 722    to scavenge the block, the GC will only scavenge the first object.
 723    The reason is that the GC can't linearly scan a block of pinned
 724    objects at the moment (doing so would require using the
 725    mostly-copying techniques).  But since we're restricting ourselves
 726    to pinned ByteArrays, not scavenging is ok.
 727
 728    This function is called by newPinnedByteArray# which immediately
 729    fills the allocated memory with a MutableByteArray#.
 730    ------------------------------------------------------------------------- */
 731
 732 StgPtr
 733 allocatePinned( nat n )
 734 {
 735     StgPtr p;
 736     bdescr *bd = pinned_object_block;
 737
 738     // If the request is for a large object, then allocate()
 739     // will give us a pinned object anyway.
 740     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 741         return allocate(n);
 742     }
 743
 744     ACQUIRE_SM_LOCK;
 745
 746     TICK_ALLOC_HEAP_NOCTR(n);
 747     CCS_ALLOC(CCCS,n);
 748
 749     // we always return 8-byte aligned memory.  bd->free must be
 750     // 8-byte aligned to begin with, so we just round up n to
 751     // the nearest multiple of 8 bytes.
 752     if (sizeof(StgWord) == 4) {
 753         n = (n+1) & ~1;
 754     }
 755
 756     // If we don't have a block of pinned objects yet, or the current
 757     // one isn't large enough to hold the new object, allocate a new one.
 758     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
 759         pinned_object_block = bd = allocBlock();
 760         dbl_link_onto(bd, &g0s0->large_objects);
 761         bd->gen_no = 0;
 762         bd->step   = g0s0;
 763         bd->flags  = BF_PINNED | BF_LARGE;
 764         bd->free   = bd->start;
 765         alloc_blocks++;
 766     }
 767
 768     p = bd->free;
 769     bd->free += n;
 770     RELEASE_SM_LOCK;
 771     return p;
 772 }
 773
 774 /* -----------------------------------------------------------------------------
 775    This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
 776    MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
 777    is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
 778    and is put on the mutable list.
 779    -------------------------------------------------------------------------- */
 780
 781 void
 782 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
 783 {
 784     Capability *cap = regTableToCapability(reg);
 785     bdescr *bd;
 786     if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
 787         p->header.info = &stg_MUT_VAR_DIRTY_info;
 788         bd = Bdescr((StgPtr)p);
 789         if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
 790     }
 791 }
 792
 793 /* -----------------------------------------------------------------------------
 794    Allocation functions for GMP.
 795
 796    These all use the allocate() interface - we can't have any garbage
 797    collection going on during a gmp operation, so we use allocate()
 798    which always succeeds.  The gmp operations which might need to
 799    allocate will ask the storage manager (via doYouWantToGC()) whether
 800    a garbage collection is required, in case we get into a loop doing
 801    only allocate() style allocation.
 802    -------------------------------------------------------------------------- */
 803
 804 static void *
 805 stgAllocForGMP (size_t size_in_bytes)
 806 {
 807   StgArrWords* arr;
 808   nat data_size_in_words, total_size_in_words;
 809
 810   /* round up to a whole number of words */
 811   data_size_in_words  = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
 812   total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
 813
 814   /* allocate and fill it in. */
 815 #if defined(THREADED_RTS)
 816   arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
 817 #else
 818   arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
 819 #endif
 820   SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
 821
 822   /* and return a ptr to the goods inside the array */
 823   return arr->payload;
 824 }
 825
 826 static void *
 827 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
 828 {
 829     void *new_stuff_ptr = stgAllocForGMP(new_size);
 830     nat i = 0;
 831     char *p = (char *) ptr;
 832     char *q = (char *) new_stuff_ptr;
 833
 834     for (; i < old_size; i++, p++, q++) {
 835         *q = *p;
 836     }
 837
 838     return(new_stuff_ptr);
 839 }
 840
 841 static void
 842 stgDeallocForGMP (void *ptr STG_UNUSED,
 843                   size_t size STG_UNUSED)
 844 {
 845     /* easy for us: the garbage collector does the dealloc'n */
 846 }
 847
 848 /* -----------------------------------------------------------------------------
 849  * Stats and stuff
 850  * -------------------------------------------------------------------------- */
 851
 852 /* -----------------------------------------------------------------------------
 853  * calcAllocated()
 854  *
 855  * Approximate how much we've allocated: number of blocks in the
 856  * nursery + blocks allocated via allocate() - unused nusery blocks.
 857  * This leaves a little slop at the end of each block, and doesn't
 858  * take into account large objects (ToDo).
 859  * -------------------------------------------------------------------------- */
 860
 861 lnat
 862 calcAllocated( void )
 863 {
 864   nat allocated;
 865   bdescr *bd;
 866
 867   allocated = allocated_bytes();
 868   allocated += countNurseryBlocks() * BLOCK_SIZE_W;
 869
 870   {
 871 #ifdef THREADED_RTS
 872   nat i;
 873   for (i = 0; i < n_nurseries; i++) {
 874       Capability *cap;
 875       for ( bd = capabilities[i].r.rCurrentNursery->link;
 876             bd != NULL; bd = bd->link ) {
 877           allocated -= BLOCK_SIZE_W;
 878       }
 879       cap = &capabilities[i];
 880       if (cap->r.rCurrentNursery->free <
 881           cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
 882           allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
 883               - cap->r.rCurrentNursery->free;
 884       }
 885   }
 886 #else
 887   bdescr *current_nursery = MainCapability.r.rCurrentNursery;
 888
 889   for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
 890       allocated -= BLOCK_SIZE_W;
 891   }
 892   if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
 893       allocated -= (current_nursery->start + BLOCK_SIZE_W)
 894           - current_nursery->free;
 895   }
 896 #endif
 897   }
 898
 899   total_allocated += allocated;
 900   return allocated;
 901 }
 902
 903 /* Approximate the amount of live data in the heap.  To be called just
 904  * after garbage collection (see GarbageCollect()).
 905  */
 906 extern lnat
 907 calcLive(void)
 908 {
 909   nat g, s;
 910   lnat live = 0;
 911   step *stp;
 912
 913   if (RtsFlags.GcFlags.generations == 1) {
 914     live = (g0s0->n_blocks - 1) * BLOCK_SIZE_W +
 915       ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
 916     return live;
 917   }
 918
 919   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 920     for (s = 0; s < generations[g].n_steps; s++) {
 921       /* approximate amount of live data (doesn't take into account slop
 922        * at end of each block).
 923        */
 924       if (g == 0 && s == 0) {
 925           continue;
 926       }
 927       stp = &generations[g].steps[s];
 928       live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
 929       if (stp->hp_bd != NULL) {
 930           live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
 931               / sizeof(W_);
 932       }
 933       if (stp->scavd_hp != NULL) {
 934           live -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
 935       }
 936     }
 937   }
 938   return live;
 939 }
 940
 941 /* Approximate the number of blocks that will be needed at the next
 942  * garbage collection.
 943  *
 944  * Assume: all data currently live will remain live.  Steps that will
 945  * be collected next time will therefore need twice as many blocks
 946  * since all the data will be copied.
 947  */
 948 extern lnat
 949 calcNeeded(void)
 950 {
 951     lnat needed = 0;
 952     nat g, s;
 953     step *stp;
 954
 955     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 956         for (s = 0; s < generations[g].n_steps; s++) {
 957             if (g == 0 && s == 0) { continue; }
 958             stp = &generations[g].steps[s];
 959             if (generations[g].steps[0].n_blocks +
 960                 generations[g].steps[0].n_large_blocks
 961                 > generations[g].max_blocks
 962                 && stp->is_compacted == 0) {
 963                 needed += 2 * stp->n_blocks;
 964             } else {
 965                 needed += stp->n_blocks;
 966             }
 967         }
 968     }
 969     return needed;
 970 }
 971
 972 /* ----------------------------------------------------------------------------
 973    Executable memory
 974
 975    Executable memory must be managed separately from non-executable
 976    memory.  Most OSs these days require you to jump through hoops to
 977    dynamically allocate executable memory, due to various security
 978    measures.
 979
 980    Here we provide a small memory allocator for executable memory.
 981    Memory is managed with a page granularity; we allocate linearly
 982    in the page, and when the page is emptied (all objects on the page
 983    are free) we free the page again, not forgetting to make it
 984    non-executable.
 985    ------------------------------------------------------------------------- */
 986
 987 static bdescr *exec_block;
 988
 989 void *allocateExec (nat bytes)
 990 {
 991     void *ret;
 992     nat n;
 993
 994     ACQUIRE_SM_LOCK;
 995
 996     // round up to words.
 997     n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
 998
 999     if (n+1 > BLOCK_SIZE_W) {
1000         barf("allocateExec: can't handle large objects");
1001     }
1002
1003     if (exec_block == NULL ||
1004         exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1005         bdescr *bd;
1006         lnat pagesize = getPageSize();
1007         bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1008         IF_DEBUG(gc, debugBelch("allocate exec block %p\n", bd->start));
1009         bd->gen_no = 0;
1010         bd->flags = BF_EXEC;
1011         bd->link = exec_block;
1012         if (exec_block != NULL) {
1013             exec_block->u.back = bd;
1014         }
1015         bd->u.back = NULL;
1016         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1017         exec_block = bd;
1018     }
1019     *(exec_block->free) = n;  // store the size of this chunk
1020     exec_block->gen_no += n;  // gen_no stores the number of words allocated
1021     ret = exec_block->free + 1;
1022     exec_block->free += n + 1;
1023
1024     RELEASE_SM_LOCK
1025     return ret;
1026 }
1027
1028 void freeExec (void *addr)
1029 {
1030     StgPtr p = (StgPtr)addr - 1;
1031     bdescr *bd = Bdescr((StgPtr)p);
1032
1033     if ((bd->flags & BF_EXEC) == 0) {
1034         barf("freeExec: not executable");
1035     }
1036
1037     if (*(StgPtr)p == 0) {
1038         barf("freeExec: already free?");
1039     }
1040
1041     ACQUIRE_SM_LOCK;
1042
1043     bd->gen_no -= *(StgPtr)p;
1044     *(StgPtr)p = 0;
1045
1046     // Free the block if it is empty, but not if it is the block at
1047     // the head of the queue.
1048     if (bd->gen_no == 0 && bd != exec_block) {
1049         IF_DEBUG(gc, debugBelch("free exec block %p\n", bd->start));
1050         if (bd->u.back) {
1051             bd->u.back->link = bd->link;
1052         } else {
1053             exec_block = bd->link;
1054         }
1055         if (bd->link) {
1056             bd->link->u.back = bd->u.back;
1057         }
1058         setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1059         freeGroup(bd);
1060     }
1061
1062     RELEASE_SM_LOCK
1063 }
1064
1065 /* -----------------------------------------------------------------------------
1066    Debugging
1067
1068    memInventory() checks for memory leaks by counting up all the
1069    blocks we know about and comparing that to the number of blocks
1070    allegedly floating around in the system.
1071    -------------------------------------------------------------------------- */
1072
1073 #ifdef DEBUG
1074
1075 static lnat
1076 stepBlocks (step *stp)
1077 {
1078     lnat total_blocks;
1079     bdescr *bd;
1080
1081     total_blocks = stp->n_blocks;
1082     total_blocks += stp->n_old_blocks;
1083     for (bd = stp->large_objects; bd; bd = bd->link) {
1084         total_blocks += bd->blocks;
1085         /* hack for megablock groups: they have an extra block or two in
1086            the second and subsequent megablocks where the block
1087            descriptors would normally go.
1088         */
1089         if (bd->blocks > BLOCKS_PER_MBLOCK) {
1090             total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1091                 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1092         }
1093     }
1094     return total_blocks;
1095 }
1096
1097 void
1098 memInventory(void)
1099 {
1100   nat g, s, i;
1101   step *stp;
1102   bdescr *bd;
1103   lnat total_blocks = 0, free_blocks = 0;
1104
1105   /* count the blocks we current have */
1106
1107   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1108       for (i = 0; i < n_capabilities; i++) {
1109           for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) {
1110               total_blocks += bd->blocks;
1111           }
1112       }
1113       for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1114           total_blocks += bd->blocks;
1115       }
1116       for (s = 0; s < generations[g].n_steps; s++) {
1117           if (g==0 && s==0) continue;
1118           stp = &generations[g].steps[s];
1119           total_blocks += stepBlocks(stp);
1120       }
1121   }
1122
1123   for (i = 0; i < n_nurseries; i++) {
1124       total_blocks += stepBlocks(&nurseries[i]);
1125   }
1126 #ifdef THREADED_RTS
1127   // We put pinned object blocks in g0s0, so better count blocks there too.
1128   total_blocks += stepBlocks(g0s0);
1129 #endif
1130
1131   /* any blocks held by allocate() */
1132   for (bd = small_alloc_list; bd; bd = bd->link) {
1133     total_blocks += bd->blocks;
1134   }
1135
1136 #ifdef PROFILING
1137   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1138       total_blocks += retainerStackBlocks();
1139   }
1140 #endif
1141
1142   // count the blocks allocated by the arena allocator
1143   total_blocks += arenaBlocks();
1144
1145   // count the blocks containing executable memory
1146   for (bd = exec_block; bd; bd = bd->link) {
1147     total_blocks += bd->blocks;
1148   }
1149
1150   /* count the blocks on the free list */
1151   free_blocks = countFreeList();
1152
1153   if (total_blocks + free_blocks != mblocks_allocated *
1154       BLOCKS_PER_MBLOCK) {
1155     debugBelch("Blocks: %ld live + %ld free  = %ld total (%ld around)\n",
1156             total_blocks, free_blocks, total_blocks + free_blocks,
1157             mblocks_allocated * BLOCKS_PER_MBLOCK);
1158   }
1159
1160   ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
1161 }
1162
1163
1164 nat
1165 countBlocks(bdescr *bd)
1166 {
1167     nat n;
1168     for (n=0; bd != NULL; bd=bd->link) {
1169         n += bd->blocks;
1170     }
1171     return n;
1172 }
1173
1174 /* Full heap sanity check. */
1175 void
1176 checkSanity( void )
1177 {
1178     nat g, s;
1179
1180     if (RtsFlags.GcFlags.generations == 1) {
1181         checkHeap(g0s0->blocks);
1182         checkChain(g0s0->large_objects);
1183     } else {
1184
1185         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1186             for (s = 0; s < generations[g].n_steps; s++) {
1187                 if (g == 0 && s == 0) { continue; }
1188                 ASSERT(countBlocks(generations[g].steps[s].blocks)
1189                        == generations[g].steps[s].n_blocks);
1190                 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1191                        == generations[g].steps[s].n_large_blocks);
1192                 checkHeap(generations[g].steps[s].blocks);
1193                 checkChain(generations[g].steps[s].large_objects);
1194                 if (g > 0) {
1195                     checkMutableList(generations[g].mut_list, g);
1196                 }
1197             }
1198         }
1199
1200         for (s = 0; s < n_nurseries; s++) {
1201             ASSERT(countBlocks(nurseries[s].blocks)
1202                    == nurseries[s].n_blocks);
1203             ASSERT(countBlocks(nurseries[s].large_objects)
1204                    == nurseries[s].n_large_blocks);
1205         }
1206
1207         checkFreeListSanity();
1208     }
1209 }
1210
1211 /* Nursery sanity check */
1212 void
1213 checkNurserySanity( step *stp )
1214 {
1215     bdescr *bd, *prev;
1216     nat blocks = 0;
1217
1218     prev = NULL;
1219     for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1220         ASSERT(bd->u.back == prev);
1221         prev = bd;
1222         blocks += bd->blocks;
1223     }
1224     ASSERT(blocks == stp->n_blocks);
1225 }
1226
1227 // handy function for use in gdb, because Bdescr() is inlined.
1228 extern bdescr *_bdescr( StgPtr p );
1229
1230 bdescr *
1231 _bdescr( StgPtr p )
1232 {
1233     return Bdescr(p);
1234 }
1235
1236 #endif