1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * The block allocator and free list manager.
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
10 * void *getMBlock(nat n);
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlock().
16 * ---------------------------------------------------------------------------*/
18 #include "PosixSource.h"
23 #include "BlockAlloc.h"
27 static void initMBlock(void *mblock);
29 // The free_list is kept sorted by size, smallest first.
30 // In THREADED_RTS mode, the free list is protected by sm_mutex.
32 /* -----------------------------------------------------------------------------
38 - bdescr = block descriptor
39 - bgroup = block group (1 or more adjacent blocks)
41 - mgroup = mega group (1 or more adjacent mblocks)
43 Invariants on block descriptors
44 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
45 bd->start always points to the start of the block.
48 - zero for a non-group-head; bd->link points to the head
49 - (-1) for the head of a free block group
50 - or it points within the block
53 - zero for a non-group-head; bd->link points to the head
54 - number of blocks in this group otherwise
56 bd->link either points to a block descriptor or is NULL
58 The following fields are not used by the allocator:
64 Exceptions: we don't maintain invariants for all the blocks within a
65 group on the free list, because it is expensive to modify every
66 bdescr in a group when coalescing. Just the head and last bdescrs
67 will be correct for a group on the free list.
74 - most allocations are for small blocks
75 - sometimes the OS gives us new memory backwards in the address
76 space, sometimes forwards, so we should not be biased towards
77 any particular layout in the address space
78 - We want to avoid fragmentation
79 - We want allocation and freeing to be O(1) or close.
81 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
82 whether it can be coalesced with other free bgroups by checking the
83 bdescrs for the blocks on either side of it. This means that we can
84 coalesce in O(1) time. Every free bgroup must have its head and tail
85 bdescrs initialised, the rest don't matter.
87 We keep the free list in buckets, using a heap-sort strategy.
88 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
89 blocks in each bucket is doubly-linked, so that if a block is
90 coalesced we can easily remove it from its current free list.
92 To allocate a new block of size S, grab a block from bucket
93 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
94 least as big as S, and split it if necessary. If there are no
95 blocks in that bucket, look at bigger buckets until a block is found
96 Allocation is therefore O(logN) time.
99 - coalesce it with neighbours.
100 - remove coalesced neighbour(s) from free list(s)
101 - add the new (coalesced) block to the front of the appropriate
102 bucket, given by log2(S) where S is the size of the block.
106 We cannot play this coalescing trick with mblocks, because there is
107 no requirement that the bdescrs in the second and subsequent mblock
108 of an mgroup are initialised (the mgroup might be filled with a
109 large array, overwriting the bdescrs for example).
111 So there is a separate free list for megablocks, sorted in *address*
112 order, so that we can coalesce. Allocation in this list is best-fit
113 by traversing the whole list: we don't expect this list to be long,
114 and allocation/freeing of large blocks is rare; avoiding
115 fragmentation is more important than performance here.
117 freeGroup() might end up moving a block from free_list to
118 free_mblock_list, if after coalescing we end up with a full mblock.
120 checkFreeListSanity() checks all the invariants on the free lists.
122 --------------------------------------------------------------------------- */
124 #define MAX_FREE_LIST 9
126 static bdescr *free_list[MAX_FREE_LIST];
127 static bdescr *free_mblock_list;
129 // free_list[i] contains blocks that are at least size 2^i, and at
130 // most size 2^(i+1) - 1.
132 // To find the free list in which to place a block, use log_2(size).
133 // To find a free block of the right size, use log_2_ceil(size).
135 lnat n_alloc_blocks; // currently allocated blocks
136 lnat hw_alloc_blocks; // high-water allocated blocks
138 /* -----------------------------------------------------------------------------
140 -------------------------------------------------------------------------- */
142 void initBlockAllocator(void)
145 for (i=0; i < MAX_FREE_LIST; i++) {
148 free_mblock_list = NULL;
153 /* -----------------------------------------------------------------------------
155 -------------------------------------------------------------------------- */
158 initGroup(bdescr *head)
164 head->free = head->start;
166 for (i=1, bd = head+1; i < n; i++, bd++) {
173 // There are quicker non-loopy ways to do log_2, but we expect n to be
174 // usually small, and MAX_FREE_LIST is also small, so the loop version
175 // might well be the best choice here.
181 for (i=0; i < MAX_FREE_LIST; i++) {
182 if (x >= n) return i;
185 return MAX_FREE_LIST;
193 for (i=0; i < MAX_FREE_LIST; i++) {
195 if (x == 0) return i;
197 return MAX_FREE_LIST;
201 free_list_insert (bdescr *bd)
205 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
206 ln = log_2(bd->blocks);
208 dbl_link_onto(bd, &free_list[ln]);
212 STATIC_INLINE bdescr *
215 return bd + bd->blocks - 1;
218 // After splitting a group, the last block of each group must have a
219 // tail that points to the head block, to keep our invariants for
222 setup_tail (bdescr *bd)
234 // Take a free block group bd, and split off a group of size n from
235 // it. Adjust the free list as necessary, and return the new group.
237 split_free_block (bdescr *bd, nat n, nat ln)
239 bdescr *fg; // free group
241 ASSERT(bd->blocks > n);
242 dbl_link_remove(bd, &free_list[ln]);
243 fg = bd + bd->blocks - n; // take n blocks off the end
247 ln = log_2(bd->blocks);
248 dbl_link_onto(bd, &free_list[ln]);
253 alloc_mega_group (nat mblocks)
255 bdescr *best, *bd, *prev;
258 n = MBLOCK_GROUP_BLOCKS(mblocks);
262 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
267 prev->link = bd->link;
269 free_mblock_list = bd->link;
274 else if (bd->blocks > n)
276 if (!best || bd->blocks < best->blocks)
285 // we take our chunk off the end here.
286 nat best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
287 bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) +
288 (best_mblocks-mblocks)*MBLOCK_SIZE);
290 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
291 initMBlock(MBLOCK_ROUND_DOWN(bd));
295 void *mblock = getMBlocks(mblocks);
296 initMBlock(mblock); // only need to init the 1st one
297 bd = FIRST_BDESCR(mblock);
299 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
309 if (n == 0) barf("allocGroup: requested zero blocks");
311 if (n >= BLOCKS_PER_MBLOCK)
315 mblocks = BLOCKS_TO_MBLOCKS(n);
317 // n_alloc_blocks doesn't count the extra blocks we get in a
319 n_alloc_blocks += mblocks * BLOCKS_PER_MBLOCK;
320 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
322 bd = alloc_mega_group(mblocks);
323 // only the bdescrs of the first MB are required to be initialised
326 IF_DEBUG(sanity, checkFreeListSanity());
331 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
335 while (ln < MAX_FREE_LIST && free_list[ln] == NULL) {
339 if (ln == MAX_FREE_LIST) {
341 if ((mblocks_allocated * MBLOCK_SIZE_W - n_alloc_blocks * BLOCK_SIZE_W) > (1024*1024)/sizeof(W_)) {
342 debugBelch("Fragmentation, wanted %d blocks:", n);
343 RtsFlags.DebugFlags.block_alloc = 1;
344 checkFreeListSanity();
348 bd = alloc_mega_group(1);
350 initGroup(bd); // we know the group will fit
352 rem->blocks = BLOCKS_PER_MBLOCK-n;
353 initGroup(rem); // init the slop
354 n_alloc_blocks += rem->blocks;
355 freeGroup(rem); // add the slop on to the free list
356 IF_DEBUG(sanity, checkFreeListSanity());
362 if (bd->blocks == n) // exactly the right size!
364 dbl_link_remove(bd, &free_list[ln]);
366 else if (bd->blocks > n) // block too big...
368 bd = split_free_block(bd, n, ln);
372 barf("allocGroup: free list corrupted");
374 initGroup(bd); // initialise it
375 IF_DEBUG(sanity, checkFreeListSanity());
376 ASSERT(bd->blocks == n);
381 allocGroup_lock(nat n)
393 return allocGroup(1);
397 allocBlock_lock(void)
406 /* -----------------------------------------------------------------------------
408 -------------------------------------------------------------------------- */
410 STATIC_INLINE bdescr *
411 coalesce_mblocks (bdescr *p)
417 MBLOCK_ROUND_DOWN(q) ==
418 (StgWord8*)MBLOCK_ROUND_DOWN(p) +
419 BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
421 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
422 BLOCKS_TO_MBLOCKS(q->blocks));
430 free_mega_group (bdescr *mg)
434 // Find the right place in the free list. free_mblock_list is
435 // sorted by *address*, not by size as the free_list is.
437 bd = free_mblock_list;
438 while (bd && bd->start < mg->start) {
443 // coalesce backwards
446 mg->link = prev->link;
448 mg = coalesce_mblocks(prev);
452 mg->link = free_mblock_list;
453 free_mblock_list = mg;
456 coalesce_mblocks(mg);
458 IF_DEBUG(sanity, checkFreeListSanity());
467 // Todo: not true in multithreaded GC
470 ASSERT(p->free != (P_)-1);
472 p->free = (void *)-1; /* indicates that this block is free */
475 /* fill the block group with garbage if sanity checking is on */
476 IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
478 if (p->blocks == 0) barf("freeGroup: block size is zero");
480 if (p->blocks >= BLOCKS_PER_MBLOCK)
484 mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
485 // If this is an mgroup, make sure it has the right number of blocks
486 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
488 n_alloc_blocks -= mblocks * BLOCKS_PER_MBLOCK;
494 ASSERT(n_alloc_blocks >= p->blocks);
495 n_alloc_blocks -= p->blocks;
500 next = p + p->blocks;
501 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
503 p->blocks += next->blocks;
504 ln = log_2(next->blocks);
505 dbl_link_remove(next, &free_list[ln]);
506 if (p->blocks == BLOCKS_PER_MBLOCK)
515 // coalesce backwards
516 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
520 if (prev->blocks == 0) prev = prev->link; // find the head
522 if (prev->free == (P_)-1)
524 ln = log_2(prev->blocks);
525 dbl_link_remove(prev, &free_list[ln]);
526 prev->blocks += p->blocks;
527 if (prev->blocks >= BLOCKS_PER_MBLOCK)
529 free_mega_group(prev);
539 IF_DEBUG(sanity, checkFreeListSanity());
543 freeGroup_lock(bdescr *p)
551 freeChain(bdescr *bd)
562 freeChain_lock(bdescr *bd)
569 // splitBlockGroup(bd,B) splits bd in two. Afterward, bd will have B
570 // blocks, and a new block descriptor pointing to the remainder is
573 splitBlockGroup (bdescr *bd, nat blocks)
577 if (bd->blocks <= blocks) {
578 barf("splitLargeBlock: too small");
581 if (bd->blocks > BLOCKS_PER_MBLOCK) {
582 nat low_mblocks, high_mblocks;
584 if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
585 barf("splitLargeBlock: not a multiple of a megablock");
587 low_mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
588 high_mblocks = (bd->blocks - blocks) / (MBLOCK_SIZE / BLOCK_SIZE);
590 new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + low_mblocks * MBLOCK_SIZE_W);
591 initMBlock(new_mblock);
592 new_bd = FIRST_BDESCR(new_mblock);
593 new_bd->blocks = MBLOCK_GROUP_BLOCKS(high_mblocks);
595 ASSERT(blocks + new_bd->blocks ==
596 bd->blocks + BLOCKS_PER_MBLOCK - MBLOCK_SIZE/BLOCK_SIZE);
600 // NB. we're not updating all the bdescrs in the split groups to
601 // point to the new heads, so this can only be used for large
602 // objects which do not start in the non-head block.
603 new_bd = bd + blocks;
604 new_bd->blocks = bd->blocks - blocks;
612 initMBlock(void *mblock)
617 /* the first few Bdescr's in a block are unused, so we don't want to
618 * put them all on the free list.
620 block = FIRST_BLOCK(mblock);
621 bd = FIRST_BDESCR(mblock);
623 /* Initialise the start field of each block descriptor
625 for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1,
626 block += BLOCK_SIZE) {
627 bd->start = (void*)block;
631 /* -----------------------------------------------------------------------------
633 -------------------------------------------------------------------------- */
636 countBlocks(bdescr *bd)
639 for (n=0; bd != NULL; bd=bd->link) {
645 // (*1) Just like countBlocks, except that we adjust the count for a
646 // megablock group so that it doesn't include the extra few blocks
647 // that would be taken up by block descriptors in the second and
648 // subsequent megablock. This is so we can tally the count with the
649 // number of blocks allocated in the system, for memInventory().
651 countAllocdBlocks(bdescr *bd)
654 for (n=0; bd != NULL; bd=bd->link) {
656 // hack for megablock groups: see (*1) above
657 if (bd->blocks > BLOCKS_PER_MBLOCK) {
658 n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
659 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
665 /* -----------------------------------------------------------------------------
667 -------------------------------------------------------------------------- */
671 check_tail (bdescr *bd)
673 bdescr *tail = tail_of(bd);
677 ASSERT(tail->blocks == 0);
678 ASSERT(tail->free == 0);
679 ASSERT(tail->link == bd);
684 checkFreeListSanity(void)
691 for (ln = 0; ln < MAX_FREE_LIST; ln++) {
692 IF_DEBUG(block_alloc, debugBelch("free block list [%d]:\n", ln));
695 for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
697 IF_DEBUG(block_alloc,
698 debugBelch("group at %p, length %ld blocks\n",
699 bd->start, (long)bd->blocks));
700 ASSERT(bd->free == (P_)-1);
701 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
702 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
703 ASSERT(bd->link != bd); // catch easy loops
708 ASSERT(bd->u.back == prev);
710 ASSERT(bd->u.back == NULL);
714 next = bd + bd->blocks;
715 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
717 ASSERT(next->free != (P_)-1);
725 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
727 IF_DEBUG(block_alloc,
728 debugBelch("mega group at %p, length %ld blocks\n",
729 bd->start, (long)bd->blocks));
731 ASSERT(bd->link != bd); // catch easy loops
733 if (bd->link != NULL)
735 // make sure the list is sorted
736 ASSERT(bd->start < bd->link->start);
739 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
740 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
743 // make sure we're fully coalesced
744 if (bd->link != NULL)
746 ASSERT (MBLOCK_ROUND_DOWN(bd->link) !=
747 (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
748 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
757 lnat total_blocks = 0;
760 for (ln=0; ln < MAX_FREE_LIST; ln++) {
761 for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
762 total_blocks += bd->blocks;
765 for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
766 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
767 // The caller of this function, memInventory(), expects to match
768 // the total number of blocks in the system against mblocks *
769 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
770 // block descriptors from *every* mblock.
776 markBlocks (bdescr *bd)
778 for (; bd != NULL; bd = bd->link) {
779 bd->flags |= BF_KNOWN;
784 reportUnmarkedBlocks (void)
789 debugBelch("Unreachable blocks:\n");
790 for (mblock = getFirstMBlock(); mblock != NULL;
791 mblock = getNextMBlock(mblock)) {
792 for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
793 if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
794 debugBelch(" %p\n",bd);
796 if (bd->blocks >= BLOCKS_PER_MBLOCK) {
797 mblock = (StgWord8*)mblock +
798 (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;