1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * The block allocator and free list manager.
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
10 * void *getMBlock(nat n);
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlock().
16 * ---------------------------------------------------------------------------*/
18 #include "PosixSource.h"
23 #include "BlockAlloc.h"
27 static void initMBlock(void *mblock);
29 /* -----------------------------------------------------------------------------
35 - bdescr = block descriptor
36 - bgroup = block group (1 or more adjacent blocks)
38 - mgroup = mega group (1 or more adjacent mblocks)
40 Invariants on block descriptors
41 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
42 bd->start always points to the start of the block.
45 - zero for a non-group-head; bd->link points to the head
46 - (-1) for the head of a free block group
47 - or it points within the block
50 - zero for a non-group-head; bd->link points to the head
51 - number of blocks in this group otherwise
53 bd->link either points to a block descriptor or is NULL
55 The following fields are not used by the allocator:
61 Exceptions: we don't maintain invariants for all the blocks within a
62 group on the free list, because it is expensive to modify every
63 bdescr in a group when coalescing. Just the head and last bdescrs
64 will be correct for a group on the free list.
71 - most allocations are for a small number of blocks
72 - sometimes the OS gives us new memory backwards in the address
73 space, sometimes forwards, so we should not be biased towards
74 any particular layout in the address space
75 - We want to avoid fragmentation
76 - We want allocation and freeing to be O(1) or close.
78 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
79 whether it can be coalesced with other free bgroups by checking the
80 bdescrs for the blocks on either side of it. This means that we can
81 coalesce in O(1) time. Every free bgroup must have its head and tail
82 bdescrs initialised, the rest don't matter.
84 We keep the free list in buckets, using a heap-sort strategy.
85 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
86 blocks in each bucket is doubly-linked, so that if a block is
87 coalesced we can easily remove it from its current free list.
89 To allocate a new block of size S, grab a block from bucket
90 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
91 least as big as S, and split it if necessary. If there are no
92 blocks in that bucket, look at bigger buckets until a block is found
93 Allocation is therefore O(logN) time.
96 - coalesce it with neighbours.
97 - remove coalesced neighbour(s) from free list(s)
98 - add the new (coalesced) block to the front of the appropriate
99 bucket, given by log2(S) where S is the size of the block.
103 We cannot play this coalescing trick with mblocks, because there is
104 no requirement that the bdescrs in the second and subsequent mblock
105 of an mgroup are initialised (the mgroup might be filled with a
106 large array, overwriting the bdescrs for example).
108 So there is a separate free list for megablocks, sorted in *address*
109 order, so that we can coalesce. Allocation in this list is best-fit
110 by traversing the whole list: we don't expect this list to be long,
111 and allocation/freeing of large blocks is rare; avoiding
112 fragmentation is more important than performance here.
114 freeGroup() might end up moving a block from free_list to
115 free_mblock_list, if after coalescing we end up with a full mblock.
117 checkFreeListSanity() checks all the invariants on the free lists.
119 --------------------------------------------------------------------------- */
121 /* ---------------------------------------------------------------------------
122 WATCH OUT FOR OVERFLOW
124 Be very careful with integer overflow here. If you have an
125 expression like (n_blocks * BLOCK_SIZE), and n_blocks is an int or
126 a nat, then it will very likely overflow on a 64-bit platform.
127 Always cast to StgWord (or W_ for short) first: ((W_)n_blocks * BLOCK_SIZE).
129 --------------------------------------------------------------------------- */
131 #define MAX_FREE_LIST 9
133 // In THREADED_RTS mode, the free list is protected by sm_mutex.
135 static bdescr *free_list[MAX_FREE_LIST];
136 static bdescr *free_mblock_list;
138 // free_list[i] contains blocks that are at least size 2^i, and at
139 // most size 2^(i+1) - 1.
141 // To find the free list in which to place a block, use log_2(size).
142 // To find a free block of the right size, use log_2_ceil(size).
144 lnat n_alloc_blocks; // currently allocated blocks
145 lnat hw_alloc_blocks; // high-water allocated blocks
147 /* -----------------------------------------------------------------------------
149 -------------------------------------------------------------------------- */
151 void initBlockAllocator(void)
154 for (i=0; i < MAX_FREE_LIST; i++) {
157 free_mblock_list = NULL;
162 /* -----------------------------------------------------------------------------
164 -------------------------------------------------------------------------- */
167 initGroup(bdescr *head)
173 head->free = head->start;
175 for (i=1, bd = head+1; i < n; i++, bd++) {
182 // There are quicker non-loopy ways to do log_2, but we expect n to be
183 // usually small, and MAX_FREE_LIST is also small, so the loop version
184 // might well be the best choice here.
190 for (i=0; i < MAX_FREE_LIST; i++) {
191 if (x >= n) return i;
194 return MAX_FREE_LIST;
202 for (i=0; i < MAX_FREE_LIST; i++) {
204 if (x == 0) return i;
206 return MAX_FREE_LIST;
210 free_list_insert (bdescr *bd)
214 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
215 ln = log_2(bd->blocks);
217 dbl_link_onto(bd, &free_list[ln]);
221 STATIC_INLINE bdescr *
224 return bd + bd->blocks - 1;
227 // After splitting a group, the last block of each group must have a
228 // tail that points to the head block, to keep our invariants for
231 setup_tail (bdescr *bd)
243 // Take a free block group bd, and split off a group of size n from
244 // it. Adjust the free list as necessary, and return the new group.
246 split_free_block (bdescr *bd, nat n, nat ln)
248 bdescr *fg; // free group
250 ASSERT(bd->blocks > n);
251 dbl_link_remove(bd, &free_list[ln]);
252 fg = bd + bd->blocks - n; // take n blocks off the end
256 ln = log_2(bd->blocks);
257 dbl_link_onto(bd, &free_list[ln]);
262 alloc_mega_group (nat mblocks)
264 bdescr *best, *bd, *prev;
267 n = MBLOCK_GROUP_BLOCKS(mblocks);
271 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
276 prev->link = bd->link;
278 free_mblock_list = bd->link;
283 else if (bd->blocks > n)
285 if (!best || bd->blocks < best->blocks)
294 // we take our chunk off the end here.
295 StgWord best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
296 bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) +
297 (best_mblocks-mblocks)*MBLOCK_SIZE);
299 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
300 initMBlock(MBLOCK_ROUND_DOWN(bd));
304 void *mblock = getMBlocks(mblocks);
305 initMBlock(mblock); // only need to init the 1st one
306 bd = FIRST_BDESCR(mblock);
308 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
318 if (n == 0) barf("allocGroup: requested zero blocks");
320 if (n >= BLOCKS_PER_MBLOCK)
324 mblocks = BLOCKS_TO_MBLOCKS(n);
326 // n_alloc_blocks doesn't count the extra blocks we get in a
328 n_alloc_blocks += mblocks * BLOCKS_PER_MBLOCK;
329 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
331 bd = alloc_mega_group(mblocks);
332 // only the bdescrs of the first MB are required to be initialised
335 IF_DEBUG(sanity, checkFreeListSanity());
340 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
344 while (ln < MAX_FREE_LIST && free_list[ln] == NULL) {
348 if (ln == MAX_FREE_LIST) {
350 if (((W_)mblocks_allocated * MBLOCK_SIZE_W - (W_)n_alloc_blocks * BLOCK_SIZE_W) > (1024*1024)/sizeof(W_)) {
351 debugBelch("Fragmentation, wanted %d blocks:", n);
352 RtsFlags.DebugFlags.block_alloc = 1;
353 checkFreeListSanity();
357 bd = alloc_mega_group(1);
359 initGroup(bd); // we know the group will fit
361 rem->blocks = BLOCKS_PER_MBLOCK-n;
362 initGroup(rem); // init the slop
363 n_alloc_blocks += rem->blocks;
364 freeGroup(rem); // add the slop on to the free list
365 IF_DEBUG(sanity, checkFreeListSanity());
371 if (bd->blocks == n) // exactly the right size!
373 dbl_link_remove(bd, &free_list[ln]);
375 else if (bd->blocks > n) // block too big...
377 bd = split_free_block(bd, n, ln);
381 barf("allocGroup: free list corrupted");
383 initGroup(bd); // initialise it
384 IF_DEBUG(sanity, checkFreeListSanity());
385 ASSERT(bd->blocks == n);
390 allocGroup_lock(nat n)
402 return allocGroup(1);
406 allocBlock_lock(void)
415 /* -----------------------------------------------------------------------------
417 -------------------------------------------------------------------------- */
419 STATIC_INLINE bdescr *
420 coalesce_mblocks (bdescr *p)
426 MBLOCK_ROUND_DOWN(q) ==
427 (StgWord8*)MBLOCK_ROUND_DOWN(p) +
428 BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
430 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
431 BLOCKS_TO_MBLOCKS(q->blocks));
439 free_mega_group (bdescr *mg)
443 // Find the right place in the free list. free_mblock_list is
444 // sorted by *address*, not by size as the free_list is.
446 bd = free_mblock_list;
447 while (bd && bd->start < mg->start) {
452 // coalesce backwards
455 mg->link = prev->link;
457 mg = coalesce_mblocks(prev);
461 mg->link = free_mblock_list;
462 free_mblock_list = mg;
465 coalesce_mblocks(mg);
467 IF_DEBUG(sanity, checkFreeListSanity());
476 // Todo: not true in multithreaded GC
479 ASSERT(p->free != (P_)-1);
481 p->free = (void *)-1; /* indicates that this block is free */
484 /* fill the block group with garbage if sanity checking is on */
485 IF_DEBUG(sanity,memset(p->start, 0xaa, (W_)p->blocks * BLOCK_SIZE));
487 if (p->blocks == 0) barf("freeGroup: block size is zero");
489 if (p->blocks >= BLOCKS_PER_MBLOCK)
493 mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
494 // If this is an mgroup, make sure it has the right number of blocks
495 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
497 n_alloc_blocks -= mblocks * BLOCKS_PER_MBLOCK;
503 ASSERT(n_alloc_blocks >= p->blocks);
504 n_alloc_blocks -= p->blocks;
509 next = p + p->blocks;
510 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
512 p->blocks += next->blocks;
513 ln = log_2(next->blocks);
514 dbl_link_remove(next, &free_list[ln]);
515 if (p->blocks == BLOCKS_PER_MBLOCK)
524 // coalesce backwards
525 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
529 if (prev->blocks == 0) prev = prev->link; // find the head
531 if (prev->free == (P_)-1)
533 ln = log_2(prev->blocks);
534 dbl_link_remove(prev, &free_list[ln]);
535 prev->blocks += p->blocks;
536 if (prev->blocks >= BLOCKS_PER_MBLOCK)
538 free_mega_group(prev);
548 IF_DEBUG(sanity, checkFreeListSanity());
552 freeGroup_lock(bdescr *p)
560 freeChain(bdescr *bd)
571 freeChain_lock(bdescr *bd)
578 // splitBlockGroup(bd,B) splits bd in two. Afterward, bd will have B
579 // blocks, and a new block descriptor pointing to the remainder is
582 splitBlockGroup (bdescr *bd, nat blocks)
586 if (bd->blocks <= blocks) {
587 barf("splitLargeBlock: too small");
590 if (bd->blocks > BLOCKS_PER_MBLOCK) {
591 nat low_mblocks, high_mblocks;
593 if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
594 barf("splitLargeBlock: not a multiple of a megablock");
596 low_mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
597 high_mblocks = (bd->blocks - blocks) / (MBLOCK_SIZE / BLOCK_SIZE);
599 new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + (W_)low_mblocks * MBLOCK_SIZE_W);
600 initMBlock(new_mblock);
601 new_bd = FIRST_BDESCR(new_mblock);
602 new_bd->blocks = MBLOCK_GROUP_BLOCKS(high_mblocks);
604 ASSERT(blocks + new_bd->blocks ==
605 bd->blocks + BLOCKS_PER_MBLOCK - MBLOCK_SIZE/BLOCK_SIZE);
609 // NB. we're not updating all the bdescrs in the split groups to
610 // point to the new heads, so this can only be used for large
611 // objects which do not start in the non-head block.
612 new_bd = bd + blocks;
613 new_bd->blocks = bd->blocks - blocks;
621 initMBlock(void *mblock)
626 /* the first few Bdescr's in a block are unused, so we don't want to
627 * put them all on the free list.
629 block = FIRST_BLOCK(mblock);
630 bd = FIRST_BDESCR(mblock);
632 /* Initialise the start field of each block descriptor
634 for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1,
635 block += BLOCK_SIZE) {
636 bd->start = (void*)block;
640 /* -----------------------------------------------------------------------------
642 -------------------------------------------------------------------------- */
645 countBlocks(bdescr *bd)
648 for (n=0; bd != NULL; bd=bd->link) {
654 // (*1) Just like countBlocks, except that we adjust the count for a
655 // megablock group so that it doesn't include the extra few blocks
656 // that would be taken up by block descriptors in the second and
657 // subsequent megablock. This is so we can tally the count with the
658 // number of blocks allocated in the system, for memInventory().
660 countAllocdBlocks(bdescr *bd)
663 for (n=0; bd != NULL; bd=bd->link) {
665 // hack for megablock groups: see (*1) above
666 if (bd->blocks > BLOCKS_PER_MBLOCK) {
667 n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
668 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
674 /* -----------------------------------------------------------------------------
676 -------------------------------------------------------------------------- */
680 check_tail (bdescr *bd)
682 bdescr *tail = tail_of(bd);
686 ASSERT(tail->blocks == 0);
687 ASSERT(tail->free == 0);
688 ASSERT(tail->link == bd);
693 checkFreeListSanity(void)
700 for (ln = 0; ln < MAX_FREE_LIST; ln++) {
701 IF_DEBUG(block_alloc, debugBelch("free block list [%d]:\n", ln));
704 for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
706 IF_DEBUG(block_alloc,
707 debugBelch("group at %p, length %ld blocks\n",
708 bd->start, (long)bd->blocks));
709 ASSERT(bd->free == (P_)-1);
710 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
711 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
712 ASSERT(bd->link != bd); // catch easy loops
717 ASSERT(bd->u.back == prev);
719 ASSERT(bd->u.back == NULL);
723 next = bd + bd->blocks;
724 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
726 ASSERT(next->free != (P_)-1);
734 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
736 IF_DEBUG(block_alloc,
737 debugBelch("mega group at %p, length %ld blocks\n",
738 bd->start, (long)bd->blocks));
740 ASSERT(bd->link != bd); // catch easy loops
742 if (bd->link != NULL)
744 // make sure the list is sorted
745 ASSERT(bd->start < bd->link->start);
748 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
749 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
752 // make sure we're fully coalesced
753 if (bd->link != NULL)
755 ASSERT (MBLOCK_ROUND_DOWN(bd->link) !=
756 (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
757 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
766 lnat total_blocks = 0;
769 for (ln=0; ln < MAX_FREE_LIST; ln++) {
770 for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
771 total_blocks += bd->blocks;
774 for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
775 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
776 // The caller of this function, memInventory(), expects to match
777 // the total number of blocks in the system against mblocks *
778 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
779 // block descriptors from *every* mblock.
785 markBlocks (bdescr *bd)
787 for (; bd != NULL; bd = bd->link) {
788 bd->flags |= BF_KNOWN;
793 reportUnmarkedBlocks (void)
798 debugBelch("Unreachable blocks:\n");
799 for (mblock = getFirstMBlock(); mblock != NULL;
800 mblock = getNextMBlock(mblock)) {
801 for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
802 if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
803 debugBelch(" %p\n",bd);
805 if (bd->blocks >= BLOCKS_PER_MBLOCK) {
806 mblock = (StgWord8*)mblock +
807 (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;