1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * The block allocator and free list manager.
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
10 * void *getMBlock(nat n);
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlock().
16 * ---------------------------------------------------------------------------*/
18 #include "PosixSource.h"
23 #include "BlockAlloc.h"
27 static void initMBlock(void *mblock);
29 /* -----------------------------------------------------------------------------
35 - bdescr = block descriptor
36 - bgroup = block group (1 or more adjacent blocks)
38 - mgroup = mega group (1 or more adjacent mblocks)
40 Invariants on block descriptors
41 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
42 bd->start always points to the start of the block.
45 - zero for a non-group-head; bd->link points to the head
46 - (-1) for the head of a free block group
47 - or it points within the block
50 - zero for a non-group-head; bd->link points to the head
51 - number of blocks in this group otherwise
53 bd->link either points to a block descriptor or is NULL
55 The following fields are not used by the allocator:
61 Exceptions: we don't maintain invariants for all the blocks within a
62 group on the free list, because it is expensive to modify every
63 bdescr in a group when coalescing. Just the head and last bdescrs
64 will be correct for a group on the free list.
71 - most allocations are for a small number of blocks
72 - sometimes the OS gives us new memory backwards in the address
73 space, sometimes forwards, so we should not be biased towards
74 any particular layout in the address space
75 - We want to avoid fragmentation
76 - We want allocation and freeing to be O(1) or close.
78 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
79 whether it can be coalesced with other free bgroups by checking the
80 bdescrs for the blocks on either side of it. This means that we can
81 coalesce in O(1) time. Every free bgroup must have its head and tail
82 bdescrs initialised, the rest don't matter.
84 We keep the free list in buckets, using a heap-sort strategy.
85 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
86 blocks in each bucket is doubly-linked, so that if a block is
87 coalesced we can easily remove it from its current free list.
89 To allocate a new block of size S, grab a block from bucket
90 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
91 least as big as S, and split it if necessary. If there are no
92 blocks in that bucket, look at bigger buckets until a block is found
93 Allocation is therefore O(logN) time.
96 - coalesce it with neighbours.
97 - remove coalesced neighbour(s) from free list(s)
98 - add the new (coalesced) block to the front of the appropriate
99 bucket, given by log2(S) where S is the size of the block.
103 We cannot play this coalescing trick with mblocks, because there is
104 no requirement that the bdescrs in the second and subsequent mblock
105 of an mgroup are initialised (the mgroup might be filled with a
106 large array, overwriting the bdescrs for example).
108 So there is a separate free list for megablocks, sorted in *address*
109 order, so that we can coalesce. Allocation in this list is best-fit
110 by traversing the whole list: we don't expect this list to be long,
111 and allocation/freeing of large blocks is rare; avoiding
112 fragmentation is more important than performance here.
114 freeGroup() might end up moving a block from free_list to
115 free_mblock_list, if after coalescing we end up with a full mblock.
117 checkFreeListSanity() checks all the invariants on the free lists.
119 --------------------------------------------------------------------------- */
121 #define MAX_FREE_LIST 9
123 // In THREADED_RTS mode, the free list is protected by sm_mutex.
125 static bdescr *free_list[MAX_FREE_LIST];
126 static bdescr *free_mblock_list;
128 // free_list[i] contains blocks that are at least size 2^i, and at
129 // most size 2^(i+1) - 1.
131 // To find the free list in which to place a block, use log_2(size).
132 // To find a free block of the right size, use log_2_ceil(size).
134 lnat n_alloc_blocks; // currently allocated blocks
135 lnat hw_alloc_blocks; // high-water allocated blocks
137 /* -----------------------------------------------------------------------------
139 -------------------------------------------------------------------------- */
141 void initBlockAllocator(void)
144 for (i=0; i < MAX_FREE_LIST; i++) {
147 free_mblock_list = NULL;
152 /* -----------------------------------------------------------------------------
154 -------------------------------------------------------------------------- */
157 initGroup(bdescr *head)
163 head->free = head->start;
165 for (i=1, bd = head+1; i < n; i++, bd++) {
172 // There are quicker non-loopy ways to do log_2, but we expect n to be
173 // usually small, and MAX_FREE_LIST is also small, so the loop version
174 // might well be the best choice here.
180 for (i=0; i < MAX_FREE_LIST; i++) {
181 if (x >= n) return i;
184 return MAX_FREE_LIST;
192 for (i=0; i < MAX_FREE_LIST; i++) {
194 if (x == 0) return i;
196 return MAX_FREE_LIST;
200 free_list_insert (bdescr *bd)
204 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
205 ln = log_2(bd->blocks);
207 dbl_link_onto(bd, &free_list[ln]);
211 STATIC_INLINE bdescr *
214 return bd + bd->blocks - 1;
217 // After splitting a group, the last block of each group must have a
218 // tail that points to the head block, to keep our invariants for
221 setup_tail (bdescr *bd)
233 // Take a free block group bd, and split off a group of size n from
234 // it. Adjust the free list as necessary, and return the new group.
236 split_free_block (bdescr *bd, nat n, nat ln)
238 bdescr *fg; // free group
240 ASSERT(bd->blocks > n);
241 dbl_link_remove(bd, &free_list[ln]);
242 fg = bd + bd->blocks - n; // take n blocks off the end
246 ln = log_2(bd->blocks);
247 dbl_link_onto(bd, &free_list[ln]);
252 alloc_mega_group (nat mblocks)
254 bdescr *best, *bd, *prev;
257 n = MBLOCK_GROUP_BLOCKS(mblocks);
261 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
266 prev->link = bd->link;
268 free_mblock_list = bd->link;
273 else if (bd->blocks > n)
275 if (!best || bd->blocks < best->blocks)
284 // we take our chunk off the end here.
285 StgWord best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
286 bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) +
287 (best_mblocks-mblocks)*MBLOCK_SIZE);
289 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
290 initMBlock(MBLOCK_ROUND_DOWN(bd));
294 void *mblock = getMBlocks(mblocks);
295 initMBlock(mblock); // only need to init the 1st one
296 bd = FIRST_BDESCR(mblock);
298 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
308 if (n == 0) barf("allocGroup: requested zero blocks");
310 if (n >= BLOCKS_PER_MBLOCK)
314 mblocks = BLOCKS_TO_MBLOCKS(n);
316 // n_alloc_blocks doesn't count the extra blocks we get in a
318 n_alloc_blocks += mblocks * BLOCKS_PER_MBLOCK;
319 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
321 bd = alloc_mega_group(mblocks);
322 // only the bdescrs of the first MB are required to be initialised
325 IF_DEBUG(sanity, checkFreeListSanity());
330 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
334 while (ln < MAX_FREE_LIST && free_list[ln] == NULL) {
338 if (ln == MAX_FREE_LIST) {
340 if ((mblocks_allocated * MBLOCK_SIZE_W - n_alloc_blocks * BLOCK_SIZE_W) > (1024*1024)/sizeof(W_)) {
341 debugBelch("Fragmentation, wanted %d blocks:", n);
342 RtsFlags.DebugFlags.block_alloc = 1;
343 checkFreeListSanity();
347 bd = alloc_mega_group(1);
349 initGroup(bd); // we know the group will fit
351 rem->blocks = BLOCKS_PER_MBLOCK-n;
352 initGroup(rem); // init the slop
353 n_alloc_blocks += rem->blocks;
354 freeGroup(rem); // add the slop on to the free list
355 IF_DEBUG(sanity, checkFreeListSanity());
361 if (bd->blocks == n) // exactly the right size!
363 dbl_link_remove(bd, &free_list[ln]);
365 else if (bd->blocks > n) // block too big...
367 bd = split_free_block(bd, n, ln);
371 barf("allocGroup: free list corrupted");
373 initGroup(bd); // initialise it
374 IF_DEBUG(sanity, checkFreeListSanity());
375 ASSERT(bd->blocks == n);
380 allocGroup_lock(nat n)
392 return allocGroup(1);
396 allocBlock_lock(void)
405 /* -----------------------------------------------------------------------------
407 -------------------------------------------------------------------------- */
409 STATIC_INLINE bdescr *
410 coalesce_mblocks (bdescr *p)
416 MBLOCK_ROUND_DOWN(q) ==
417 (StgWord8*)MBLOCK_ROUND_DOWN(p) +
418 BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
420 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
421 BLOCKS_TO_MBLOCKS(q->blocks));
429 free_mega_group (bdescr *mg)
433 // Find the right place in the free list. free_mblock_list is
434 // sorted by *address*, not by size as the free_list is.
436 bd = free_mblock_list;
437 while (bd && bd->start < mg->start) {
442 // coalesce backwards
445 mg->link = prev->link;
447 mg = coalesce_mblocks(prev);
451 mg->link = free_mblock_list;
452 free_mblock_list = mg;
455 coalesce_mblocks(mg);
457 IF_DEBUG(sanity, checkFreeListSanity());
466 // Todo: not true in multithreaded GC
469 ASSERT(p->free != (P_)-1);
471 p->free = (void *)-1; /* indicates that this block is free */
474 /* fill the block group with garbage if sanity checking is on */
475 IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
477 if (p->blocks == 0) barf("freeGroup: block size is zero");
479 if (p->blocks >= BLOCKS_PER_MBLOCK)
483 mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
484 // If this is an mgroup, make sure it has the right number of blocks
485 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
487 n_alloc_blocks -= mblocks * BLOCKS_PER_MBLOCK;
493 ASSERT(n_alloc_blocks >= p->blocks);
494 n_alloc_blocks -= p->blocks;
499 next = p + p->blocks;
500 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
502 p->blocks += next->blocks;
503 ln = log_2(next->blocks);
504 dbl_link_remove(next, &free_list[ln]);
505 if (p->blocks == BLOCKS_PER_MBLOCK)
514 // coalesce backwards
515 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
519 if (prev->blocks == 0) prev = prev->link; // find the head
521 if (prev->free == (P_)-1)
523 ln = log_2(prev->blocks);
524 dbl_link_remove(prev, &free_list[ln]);
525 prev->blocks += p->blocks;
526 if (prev->blocks >= BLOCKS_PER_MBLOCK)
528 free_mega_group(prev);
538 IF_DEBUG(sanity, checkFreeListSanity());
542 freeGroup_lock(bdescr *p)
550 freeChain(bdescr *bd)
561 freeChain_lock(bdescr *bd)
568 // splitBlockGroup(bd,B) splits bd in two. Afterward, bd will have B
569 // blocks, and a new block descriptor pointing to the remainder is
572 splitBlockGroup (bdescr *bd, nat blocks)
576 if (bd->blocks <= blocks) {
577 barf("splitLargeBlock: too small");
580 if (bd->blocks > BLOCKS_PER_MBLOCK) {
581 nat low_mblocks, high_mblocks;
583 if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
584 barf("splitLargeBlock: not a multiple of a megablock");
586 low_mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
587 high_mblocks = (bd->blocks - blocks) / (MBLOCK_SIZE / BLOCK_SIZE);
589 new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + low_mblocks * MBLOCK_SIZE_W);
590 initMBlock(new_mblock);
591 new_bd = FIRST_BDESCR(new_mblock);
592 new_bd->blocks = MBLOCK_GROUP_BLOCKS(high_mblocks);
594 ASSERT(blocks + new_bd->blocks ==
595 bd->blocks + BLOCKS_PER_MBLOCK - MBLOCK_SIZE/BLOCK_SIZE);
599 // NB. we're not updating all the bdescrs in the split groups to
600 // point to the new heads, so this can only be used for large
601 // objects which do not start in the non-head block.
602 new_bd = bd + blocks;
603 new_bd->blocks = bd->blocks - blocks;
611 initMBlock(void *mblock)
616 /* the first few Bdescr's in a block are unused, so we don't want to
617 * put them all on the free list.
619 block = FIRST_BLOCK(mblock);
620 bd = FIRST_BDESCR(mblock);
622 /* Initialise the start field of each block descriptor
624 for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1,
625 block += BLOCK_SIZE) {
626 bd->start = (void*)block;
630 /* -----------------------------------------------------------------------------
632 -------------------------------------------------------------------------- */
635 countBlocks(bdescr *bd)
638 for (n=0; bd != NULL; bd=bd->link) {
644 // (*1) Just like countBlocks, except that we adjust the count for a
645 // megablock group so that it doesn't include the extra few blocks
646 // that would be taken up by block descriptors in the second and
647 // subsequent megablock. This is so we can tally the count with the
648 // number of blocks allocated in the system, for memInventory().
650 countAllocdBlocks(bdescr *bd)
653 for (n=0; bd != NULL; bd=bd->link) {
655 // hack for megablock groups: see (*1) above
656 if (bd->blocks > BLOCKS_PER_MBLOCK) {
657 n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
658 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
664 /* -----------------------------------------------------------------------------
666 -------------------------------------------------------------------------- */
670 check_tail (bdescr *bd)
672 bdescr *tail = tail_of(bd);
676 ASSERT(tail->blocks == 0);
677 ASSERT(tail->free == 0);
678 ASSERT(tail->link == bd);
683 checkFreeListSanity(void)
690 for (ln = 0; ln < MAX_FREE_LIST; ln++) {
691 IF_DEBUG(block_alloc, debugBelch("free block list [%d]:\n", ln));
694 for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
696 IF_DEBUG(block_alloc,
697 debugBelch("group at %p, length %ld blocks\n",
698 bd->start, (long)bd->blocks));
699 ASSERT(bd->free == (P_)-1);
700 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
701 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
702 ASSERT(bd->link != bd); // catch easy loops
707 ASSERT(bd->u.back == prev);
709 ASSERT(bd->u.back == NULL);
713 next = bd + bd->blocks;
714 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
716 ASSERT(next->free != (P_)-1);
724 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
726 IF_DEBUG(block_alloc,
727 debugBelch("mega group at %p, length %ld blocks\n",
728 bd->start, (long)bd->blocks));
730 ASSERT(bd->link != bd); // catch easy loops
732 if (bd->link != NULL)
734 // make sure the list is sorted
735 ASSERT(bd->start < bd->link->start);
738 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
739 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
742 // make sure we're fully coalesced
743 if (bd->link != NULL)
745 ASSERT (MBLOCK_ROUND_DOWN(bd->link) !=
746 (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
747 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
756 lnat total_blocks = 0;
759 for (ln=0; ln < MAX_FREE_LIST; ln++) {
760 for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
761 total_blocks += bd->blocks;
764 for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
765 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
766 // The caller of this function, memInventory(), expects to match
767 // the total number of blocks in the system against mblocks *
768 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
769 // block descriptors from *every* mblock.
775 markBlocks (bdescr *bd)
777 for (; bd != NULL; bd = bd->link) {
778 bd->flags |= BF_KNOWN;
783 reportUnmarkedBlocks (void)
788 debugBelch("Unreachable blocks:\n");
789 for (mblock = getFirstMBlock(); mblock != NULL;
790 mblock = getNextMBlock(mblock)) {
791 for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
792 if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
793 debugBelch(" %p\n",bd);
795 if (bd->blocks >= BLOCKS_PER_MBLOCK) {
796 mblock = (StgWord8*)mblock +
797 (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;