1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * The block allocator and free list manager.
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
10 * void *getMBlock(nat n);
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlock().
16 * ---------------------------------------------------------------------------*/
18 #include "PosixSource.h"
23 #include "BlockAlloc.h"
28 static void initMBlock(void *mblock);
30 /* -----------------------------------------------------------------------------
36 - bdescr = block descriptor
37 - bgroup = block group (1 or more adjacent blocks)
39 - mgroup = mega group (1 or more adjacent mblocks)
41 Invariants on block descriptors
42 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
43 bd->start always points to the start of the block.
46 - zero for a non-group-head; bd->link points to the head
47 - (-1) for the head of a free block group
48 - or it points within the block
51 - zero for a non-group-head; bd->link points to the head
52 - number of blocks in this group otherwise
54 bd->link either points to a block descriptor or is NULL
56 The following fields are not used by the allocator:
62 Exceptions: we don't maintain invariants for all the blocks within a
63 group on the free list, because it is expensive to modify every
64 bdescr in a group when coalescing. Just the head and last bdescrs
65 will be correct for a group on the free list.
72 - most allocations are for a small number of blocks
73 - sometimes the OS gives us new memory backwards in the address
74 space, sometimes forwards, so we should not be biased towards
75 any particular layout in the address space
76 - We want to avoid fragmentation
77 - We want allocation and freeing to be O(1) or close.
79 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
80 whether it can be coalesced with other free bgroups by checking the
81 bdescrs for the blocks on either side of it. This means that we can
82 coalesce in O(1) time. Every free bgroup must have its head and tail
83 bdescrs initialised, the rest don't matter.
85 We keep the free list in buckets, using a heap-sort strategy.
86 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
87 blocks in each bucket is doubly-linked, so that if a block is
88 coalesced we can easily remove it from its current free list.
90 To allocate a new block of size S, grab a block from bucket
91 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
92 least as big as S, and split it if necessary. If there are no
93 blocks in that bucket, look at bigger buckets until a block is found
94 Allocation is therefore O(logN) time.
97 - coalesce it with neighbours.
98 - remove coalesced neighbour(s) from free list(s)
99 - add the new (coalesced) block to the front of the appropriate
100 bucket, given by log2(S) where S is the size of the block.
104 We cannot play this coalescing trick with mblocks, because there is
105 no requirement that the bdescrs in the second and subsequent mblock
106 of an mgroup are initialised (the mgroup might be filled with a
107 large array, overwriting the bdescrs for example).
109 So there is a separate free list for megablocks, sorted in *address*
110 order, so that we can coalesce. Allocation in this list is best-fit
111 by traversing the whole list: we don't expect this list to be long,
112 and allocation/freeing of large blocks is rare; avoiding
113 fragmentation is more important than performance here.
115 freeGroup() might end up moving a block from free_list to
116 free_mblock_list, if after coalescing we end up with a full mblock.
118 checkFreeListSanity() checks all the invariants on the free lists.
120 --------------------------------------------------------------------------- */
122 /* ---------------------------------------------------------------------------
123 WATCH OUT FOR OVERFLOW
125 Be very careful with integer overflow here. If you have an
126 expression like (n_blocks * BLOCK_SIZE), and n_blocks is an int or
127 a nat, then it will very likely overflow on a 64-bit platform.
128 Always cast to StgWord (or W_ for short) first: ((W_)n_blocks * BLOCK_SIZE).
130 --------------------------------------------------------------------------- */
132 #define MAX_FREE_LIST 9
134 // In THREADED_RTS mode, the free list is protected by sm_mutex.
136 static bdescr *free_list[MAX_FREE_LIST];
137 static bdescr *free_mblock_list;
139 // free_list[i] contains blocks that are at least size 2^i, and at
140 // most size 2^(i+1) - 1.
142 // To find the free list in which to place a block, use log_2(size).
143 // To find a free block of the right size, use log_2_ceil(size).
145 lnat n_alloc_blocks; // currently allocated blocks
146 lnat hw_alloc_blocks; // high-water allocated blocks
148 /* -----------------------------------------------------------------------------
150 -------------------------------------------------------------------------- */
152 void initBlockAllocator(void)
155 for (i=0; i < MAX_FREE_LIST; i++) {
158 free_mblock_list = NULL;
163 /* -----------------------------------------------------------------------------
165 -------------------------------------------------------------------------- */
168 initGroup(bdescr *head)
174 head->free = head->start;
176 for (i=1, bd = head+1; i < n; i++, bd++) {
183 // There are quicker non-loopy ways to do log_2, but we expect n to be
184 // usually small, and MAX_FREE_LIST is also small, so the loop version
185 // might well be the best choice here.
191 for (i=0; i < MAX_FREE_LIST; i++) {
192 if (x >= n) return i;
195 return MAX_FREE_LIST;
203 for (i=0; i < MAX_FREE_LIST; i++) {
205 if (x == 0) return i;
207 return MAX_FREE_LIST;
211 free_list_insert (bdescr *bd)
215 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
216 ln = log_2(bd->blocks);
218 dbl_link_onto(bd, &free_list[ln]);
222 STATIC_INLINE bdescr *
225 return bd + bd->blocks - 1;
228 // After splitting a group, the last block of each group must have a
229 // tail that points to the head block, to keep our invariants for
232 setup_tail (bdescr *bd)
244 // Take a free block group bd, and split off a group of size n from
245 // it. Adjust the free list as necessary, and return the new group.
247 split_free_block (bdescr *bd, nat n, nat ln)
249 bdescr *fg; // free group
251 ASSERT(bd->blocks > n);
252 dbl_link_remove(bd, &free_list[ln]);
253 fg = bd + bd->blocks - n; // take n blocks off the end
257 ln = log_2(bd->blocks);
258 dbl_link_onto(bd, &free_list[ln]);
263 alloc_mega_group (nat mblocks)
265 bdescr *best, *bd, *prev;
268 n = MBLOCK_GROUP_BLOCKS(mblocks);
272 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
277 prev->link = bd->link;
279 free_mblock_list = bd->link;
284 else if (bd->blocks > n)
286 if (!best || bd->blocks < best->blocks)
295 // we take our chunk off the end here.
296 StgWord best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
297 bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) +
298 (best_mblocks-mblocks)*MBLOCK_SIZE);
300 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
301 initMBlock(MBLOCK_ROUND_DOWN(bd));
305 void *mblock = getMBlocks(mblocks);
306 initMBlock(mblock); // only need to init the 1st one
307 bd = FIRST_BDESCR(mblock);
309 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
319 if (n == 0) barf("allocGroup: requested zero blocks");
321 if (n >= BLOCKS_PER_MBLOCK)
325 mblocks = BLOCKS_TO_MBLOCKS(n);
327 // n_alloc_blocks doesn't count the extra blocks we get in a
329 n_alloc_blocks += mblocks * BLOCKS_PER_MBLOCK;
330 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
332 bd = alloc_mega_group(mblocks);
333 // only the bdescrs of the first MB are required to be initialised
336 IF_DEBUG(sanity,memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
337 IF_DEBUG(sanity, checkFreeListSanity());
342 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
346 while (ln < MAX_FREE_LIST && free_list[ln] == NULL) {
350 if (ln == MAX_FREE_LIST) {
352 if (((W_)mblocks_allocated * MBLOCK_SIZE_W - (W_)n_alloc_blocks * BLOCK_SIZE_W) > (1024*1024)/sizeof(W_)) {
353 debugBelch("Fragmentation, wanted %d blocks:", n);
354 RtsFlags.DebugFlags.block_alloc = 1;
355 checkFreeListSanity();
359 bd = alloc_mega_group(1);
361 initGroup(bd); // we know the group will fit
363 rem->blocks = BLOCKS_PER_MBLOCK-n;
364 initGroup(rem); // init the slop
365 n_alloc_blocks += rem->blocks;
366 freeGroup(rem); // add the slop on to the free list
367 IF_DEBUG(sanity, checkFreeListSanity());
373 if (bd->blocks == n) // exactly the right size!
375 dbl_link_remove(bd, &free_list[ln]);
377 else if (bd->blocks > n) // block too big...
379 bd = split_free_block(bd, n, ln);
383 barf("allocGroup: free list corrupted");
385 initGroup(bd); // initialise it
386 IF_DEBUG(sanity,memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
387 IF_DEBUG(sanity, checkFreeListSanity());
388 ASSERT(bd->blocks == n);
393 allocGroup_lock(nat n)
405 return allocGroup(1);
409 allocBlock_lock(void)
418 /* -----------------------------------------------------------------------------
420 -------------------------------------------------------------------------- */
422 STATIC_INLINE bdescr *
423 coalesce_mblocks (bdescr *p)
429 MBLOCK_ROUND_DOWN(q) ==
430 (StgWord8*)MBLOCK_ROUND_DOWN(p) +
431 BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
433 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
434 BLOCKS_TO_MBLOCKS(q->blocks));
442 free_mega_group (bdescr *mg)
446 // Find the right place in the free list. free_mblock_list is
447 // sorted by *address*, not by size as the free_list is.
449 bd = free_mblock_list;
450 while (bd && bd->start < mg->start) {
455 // coalesce backwards
458 mg->link = prev->link;
460 mg = coalesce_mblocks(prev);
464 mg->link = free_mblock_list;
465 free_mblock_list = mg;
468 coalesce_mblocks(mg);
470 IF_DEBUG(sanity, checkFreeListSanity());
479 // Todo: not true in multithreaded GC
482 ASSERT(p->free != (P_)-1);
484 p->free = (void *)-1; /* indicates that this block is free */
487 /* fill the block group with garbage if sanity checking is on */
488 IF_DEBUG(sanity,memset(p->start, 0xaa, (W_)p->blocks * BLOCK_SIZE));
490 if (p->blocks == 0) barf("freeGroup: block size is zero");
492 if (p->blocks >= BLOCKS_PER_MBLOCK)
496 mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
497 // If this is an mgroup, make sure it has the right number of blocks
498 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
500 n_alloc_blocks -= mblocks * BLOCKS_PER_MBLOCK;
506 ASSERT(n_alloc_blocks >= p->blocks);
507 n_alloc_blocks -= p->blocks;
512 next = p + p->blocks;
513 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
515 p->blocks += next->blocks;
516 ln = log_2(next->blocks);
517 dbl_link_remove(next, &free_list[ln]);
518 if (p->blocks == BLOCKS_PER_MBLOCK)
527 // coalesce backwards
528 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
532 if (prev->blocks == 0) prev = prev->link; // find the head
534 if (prev->free == (P_)-1)
536 ln = log_2(prev->blocks);
537 dbl_link_remove(prev, &free_list[ln]);
538 prev->blocks += p->blocks;
539 if (prev->blocks >= BLOCKS_PER_MBLOCK)
541 free_mega_group(prev);
551 IF_DEBUG(sanity, checkFreeListSanity());
555 freeGroup_lock(bdescr *p)
563 freeChain(bdescr *bd)
574 freeChain_lock(bdescr *bd)
581 // splitBlockGroup(bd,B) splits bd in two. Afterward, bd will have B
582 // blocks, and a new block descriptor pointing to the remainder is
585 splitBlockGroup (bdescr *bd, nat blocks)
589 if (bd->blocks <= blocks) {
590 barf("splitLargeBlock: too small");
593 if (bd->blocks > BLOCKS_PER_MBLOCK) {
594 nat low_mblocks, high_mblocks;
596 if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
597 barf("splitLargeBlock: not a multiple of a megablock");
599 low_mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
600 high_mblocks = (bd->blocks - blocks) / (MBLOCK_SIZE / BLOCK_SIZE);
602 new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + (W_)low_mblocks * MBLOCK_SIZE_W);
603 initMBlock(new_mblock);
604 new_bd = FIRST_BDESCR(new_mblock);
605 new_bd->blocks = MBLOCK_GROUP_BLOCKS(high_mblocks);
607 ASSERT(blocks + new_bd->blocks ==
608 bd->blocks + BLOCKS_PER_MBLOCK - MBLOCK_SIZE/BLOCK_SIZE);
612 // NB. we're not updating all the bdescrs in the split groups to
613 // point to the new heads, so this can only be used for large
614 // objects which do not start in the non-head block.
615 new_bd = bd + blocks;
616 new_bd->blocks = bd->blocks - blocks;
624 initMBlock(void *mblock)
629 /* the first few Bdescr's in a block are unused, so we don't want to
630 * put them all on the free list.
632 block = FIRST_BLOCK(mblock);
633 bd = FIRST_BDESCR(mblock);
635 /* Initialise the start field of each block descriptor
637 for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1,
638 block += BLOCK_SIZE) {
639 bd->start = (void*)block;
643 /* -----------------------------------------------------------------------------
645 -------------------------------------------------------------------------- */
648 countBlocks(bdescr *bd)
651 for (n=0; bd != NULL; bd=bd->link) {
657 // (*1) Just like countBlocks, except that we adjust the count for a
658 // megablock group so that it doesn't include the extra few blocks
659 // that would be taken up by block descriptors in the second and
660 // subsequent megablock. This is so we can tally the count with the
661 // number of blocks allocated in the system, for memInventory().
663 countAllocdBlocks(bdescr *bd)
666 for (n=0; bd != NULL; bd=bd->link) {
668 // hack for megablock groups: see (*1) above
669 if (bd->blocks > BLOCKS_PER_MBLOCK) {
670 n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
671 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
677 void returnMemoryToOS(nat n /* megablocks */)
682 bd = free_mblock_list;
683 while ((n > 0) && (bd != NULL)) {
684 size = BLOCKS_TO_MBLOCKS(bd->blocks);
686 nat newSize = size - n;
687 char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
688 freeAddr += newSize * MBLOCK_SIZE;
689 bd->blocks = MBLOCK_GROUP_BLOCKS(newSize);
690 freeMBlocks(freeAddr, n);
694 char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
697 freeMBlocks(freeAddr, size);
700 free_mblock_list = bd;
702 osReleaseFreeMemory();
706 debugBelch("Wanted to free %d more MBlocks than are freeable\n",
712 /* -----------------------------------------------------------------------------
714 -------------------------------------------------------------------------- */
718 check_tail (bdescr *bd)
720 bdescr *tail = tail_of(bd);
724 ASSERT(tail->blocks == 0);
725 ASSERT(tail->free == 0);
726 ASSERT(tail->link == bd);
731 checkFreeListSanity(void)
738 for (ln = 0; ln < MAX_FREE_LIST; ln++) {
739 IF_DEBUG(block_alloc, debugBelch("free block list [%d]:\n", ln));
742 for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
744 IF_DEBUG(block_alloc,
745 debugBelch("group at %p, length %ld blocks\n",
746 bd->start, (long)bd->blocks));
747 ASSERT(bd->free == (P_)-1);
748 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
749 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
750 ASSERT(bd->link != bd); // catch easy loops
755 ASSERT(bd->u.back == prev);
757 ASSERT(bd->u.back == NULL);
761 next = bd + bd->blocks;
762 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
764 ASSERT(next->free != (P_)-1);
772 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
774 IF_DEBUG(block_alloc,
775 debugBelch("mega group at %p, length %ld blocks\n",
776 bd->start, (long)bd->blocks));
778 ASSERT(bd->link != bd); // catch easy loops
780 if (bd->link != NULL)
782 // make sure the list is sorted
783 ASSERT(bd->start < bd->link->start);
786 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
787 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
790 // make sure we're fully coalesced
791 if (bd->link != NULL)
793 ASSERT (MBLOCK_ROUND_DOWN(bd->link) !=
794 (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
795 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
804 lnat total_blocks = 0;
807 for (ln=0; ln < MAX_FREE_LIST; ln++) {
808 for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
809 total_blocks += bd->blocks;
812 for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
813 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
814 // The caller of this function, memInventory(), expects to match
815 // the total number of blocks in the system against mblocks *
816 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
817 // block descriptors from *every* mblock.
823 markBlocks (bdescr *bd)
825 for (; bd != NULL; bd = bd->link) {
826 bd->flags |= BF_KNOWN;
831 reportUnmarkedBlocks (void)
836 debugBelch("Unreachable blocks:\n");
837 for (mblock = getFirstMBlock(); mblock != NULL;
838 mblock = getNextMBlock(mblock)) {
839 for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
840 if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
841 debugBelch(" %p\n",bd);
843 if (bd->blocks >= BLOCKS_PER_MBLOCK) {
844 mblock = (StgWord8*)mblock +
845 (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;