1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * The block allocator and free list manager.
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
10 * void *getMBlock(nat n);
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlock().
16 * ---------------------------------------------------------------------------*/
18 #include "PosixSource.h"
23 #include "BlockAlloc.h"
27 static void initMBlock(void *mblock);
29 // The free_list is kept sorted by size, smallest first.
30 // In THREADED_RTS mode, the free list is protected by sm_mutex.
32 /* -----------------------------------------------------------------------------
38 - bdescr = block descriptor
39 - bgroup = block group (1 or more adjacent blocks)
41 - mgroup = mega group (1 or more adjacent mblocks)
43 Invariants on block descriptors
44 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
45 bd->start always points to the start of the block.
48 - zero for a non-group-head; bd->link points to the head
49 - (-1) for the head of a free block group
50 - or it points within the block
53 - zero for a non-group-head; bd->link points to the head
54 - number of blocks in this group otherwise
56 bd->link either points to a block descriptor or is NULL
58 The following fields are not used by the allocator:
63 Exceptions: we don't maintain invariants for all the blocks within a
64 group on the free list, because it is expensive to modify every
65 bdescr in a group when coalescing. Just the head and last bdescrs
66 will be correct for a group on the free list.
73 - most allocations are for small blocks
74 - sometimes the OS gives us new memory backwards in the address
75 space, sometimes forwards, so we should not be biased towards
76 any particular layout in the address space
77 - We want to avoid fragmentation
78 - We want allocation and freeing to be O(1) or close.
80 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
81 whether it can be coalesced with other free bgroups by checking the
82 bdescrs for the blocks on either side of it. This means that we can
83 coalesce in O(1) time. Every free bgroup must have its head and tail
84 bdescrs initialised, the rest don't matter.
86 We keep the free list in buckets, using a heap-sort strategy.
87 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
88 blocks in each bucket is doubly-linked, so that if a block is
89 coalesced we can easily remove it from its current free list.
91 To allocate a new block of size S, grab a block from bucket
92 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
93 least as big as S, and split it if necessary. If there are no
94 blocks in that bucket, look at bigger buckets until a block is found
95 Allocation is therefore O(logN) time.
98 - coalesce it with neighbours.
99 - remove coalesced neighbour(s) from free list(s)
100 - add the new (coalesced) block to the front of the appropriate
101 bucket, given by log2(S) where S is the size of the block.
105 We cannot play this coalescing trick with mblocks, because there is
106 no requirement that the bdescrs in the second and subsequent mblock
107 of an mgroup are initialised (the mgroup might be filled with a
108 large array, overwriting the bdescrs for example).
110 So there is a separate free list for megablocks, sorted in *address*
111 order, so that we can coalesce. Allocation in this list is best-fit
112 by traversing the whole list: we don't expect this list to be long,
113 and allocation/freeing of large blocks is rare; avoiding
114 fragmentation is more important than performance here.
116 freeGroup() might end up moving a block from free_list to
117 free_mblock_list, if after coalescing we end up with a full mblock.
119 checkFreeListSanity() checks all the invariants on the free lists.
121 --------------------------------------------------------------------------- */
123 #define MAX_FREE_LIST 9
125 static bdescr *free_list[MAX_FREE_LIST];
126 static bdescr *free_mblock_list;
128 // free_list[i] contains blocks that are at least size 2^i, and at
129 // most size 2^(i+1) - 1.
131 // To find the free list in which to place a block, use log_2(size).
132 // To find a free block of the right size, use log_2_ceil(size).
134 lnat n_alloc_blocks; // currently allocated blocks
135 lnat hw_alloc_blocks; // high-water allocated blocks
137 /* -----------------------------------------------------------------------------
139 -------------------------------------------------------------------------- */
141 void initBlockAllocator(void)
144 for (i=0; i < MAX_FREE_LIST; i++) {
147 free_mblock_list = NULL;
152 /* -----------------------------------------------------------------------------
154 -------------------------------------------------------------------------- */
157 initGroup(bdescr *head)
163 head->free = head->start;
165 for (i=1, bd = head+1; i < n; i++, bd++) {
172 // There are quicker non-loopy ways to do log_2, but we expect n to be
173 // usually small, and MAX_FREE_LIST is also small, so the loop version
174 // might well be the best choice here.
180 for (i=0; i < MAX_FREE_LIST; i++) {
181 if (x >= n) return i;
184 return MAX_FREE_LIST;
192 for (i=0; i < MAX_FREE_LIST; i++) {
194 if (x == 0) return i;
196 return MAX_FREE_LIST;
200 free_list_insert (bdescr *bd)
204 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
205 ln = log_2(bd->blocks);
207 dbl_link_onto(bd, &free_list[ln]);
211 STATIC_INLINE bdescr *
214 return bd + bd->blocks - 1;
217 // After splitting a group, the last block of each group must have a
218 // tail that points to the head block, to keep our invariants for
221 setup_tail (bdescr *bd)
233 // Take a free block group bd, and split off a group of size n from
234 // it. Adjust the free list as necessary, and return the new group.
236 split_free_block (bdescr *bd, nat n, nat ln)
238 bdescr *fg; // free group
240 ASSERT(bd->blocks > n);
241 dbl_link_remove(bd, &free_list[ln]);
242 fg = bd + bd->blocks - n; // take n blocks off the end
246 ln = log_2(bd->blocks);
247 dbl_link_onto(bd, &free_list[ln]);
252 alloc_mega_group (nat mblocks)
254 bdescr *best, *bd, *prev;
257 n = MBLOCK_GROUP_BLOCKS(mblocks);
261 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
266 prev->link = bd->link;
268 free_mblock_list = bd->link;
273 else if (bd->blocks > n)
275 if (!best || bd->blocks < best->blocks)
284 // we take our chunk off the end here.
285 nat best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
286 bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) +
287 (best_mblocks-mblocks)*MBLOCK_SIZE);
289 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
290 initMBlock(MBLOCK_ROUND_DOWN(bd));
294 void *mblock = getMBlocks(mblocks);
295 initMBlock(mblock); // only need to init the 1st one
296 bd = FIRST_BDESCR(mblock);
298 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
308 if (n == 0) barf("allocGroup: requested zero blocks");
310 if (n >= BLOCKS_PER_MBLOCK)
314 mblocks = BLOCKS_TO_MBLOCKS(n);
316 // n_alloc_blocks doesn't count the extra blocks we get in a
318 n_alloc_blocks += mblocks * BLOCKS_PER_MBLOCK;
319 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
321 bd = alloc_mega_group(mblocks);
322 // only the bdescrs of the first MB are required to be initialised
325 IF_DEBUG(sanity, checkFreeListSanity());
330 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
334 while (ln < MAX_FREE_LIST && free_list[ln] == NULL) {
338 if (ln == MAX_FREE_LIST) {
340 if ((mblocks_allocated * MBLOCK_SIZE_W - n_alloc_blocks * BLOCK_SIZE_W) > (1024*1024)/sizeof(W_)) {
341 debugBelch("Fragmentation, wanted %d blocks:", n);
342 RtsFlags.DebugFlags.block_alloc = 1;
343 checkFreeListSanity();
347 bd = alloc_mega_group(1);
349 initGroup(bd); // we know the group will fit
351 rem->blocks = BLOCKS_PER_MBLOCK-n;
352 initGroup(rem); // init the slop
353 n_alloc_blocks += rem->blocks;
354 freeGroup(rem); // add the slop on to the free list
355 IF_DEBUG(sanity, checkFreeListSanity());
361 if (bd->blocks == n) // exactly the right size!
363 dbl_link_remove(bd, &free_list[ln]);
365 else if (bd->blocks > n) // block too big...
367 bd = split_free_block(bd, n, ln);
371 barf("allocGroup: free list corrupted");
373 initGroup(bd); // initialise it
374 IF_DEBUG(sanity, checkFreeListSanity());
375 ASSERT(bd->blocks == n);
380 allocGroup_lock(nat n)
392 return allocGroup(1);
396 allocBlock_lock(void)
405 /* -----------------------------------------------------------------------------
407 -------------------------------------------------------------------------- */
409 STATIC_INLINE bdescr *
410 coalesce_mblocks (bdescr *p)
416 MBLOCK_ROUND_DOWN(q) ==
417 (StgWord8*)MBLOCK_ROUND_DOWN(p) +
418 BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
420 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
421 BLOCKS_TO_MBLOCKS(q->blocks));
429 free_mega_group (bdescr *mg)
433 // Find the right place in the free list. free_mblock_list is
434 // sorted by *address*, not by size as the free_list is.
436 bd = free_mblock_list;
437 while (bd && bd->start < mg->start) {
442 // coalesce backwards
445 mg->link = prev->link;
447 mg = coalesce_mblocks(prev);
451 mg->link = free_mblock_list;
452 free_mblock_list = mg;
455 coalesce_mblocks(mg);
457 IF_DEBUG(sanity, checkFreeListSanity());
466 // Todo: not true in multithreaded GC
469 ASSERT(p->free != (P_)-1);
471 p->free = (void *)-1; /* indicates that this block is free */
474 /* fill the block group with garbage if sanity checking is on */
475 IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
477 if (p->blocks == 0) barf("freeGroup: block size is zero");
479 if (p->blocks >= BLOCKS_PER_MBLOCK)
483 mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
484 // If this is an mgroup, make sure it has the right number of blocks
485 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
487 n_alloc_blocks -= mblocks * BLOCKS_PER_MBLOCK;
493 ASSERT(n_alloc_blocks >= p->blocks);
494 n_alloc_blocks -= p->blocks;
499 next = p + p->blocks;
500 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
502 p->blocks += next->blocks;
503 ln = log_2(next->blocks);
504 dbl_link_remove(next, &free_list[ln]);
505 if (p->blocks == BLOCKS_PER_MBLOCK)
514 // coalesce backwards
515 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
519 if (prev->blocks == 0) prev = prev->link; // find the head
521 if (prev->free == (P_)-1)
523 ln = log_2(prev->blocks);
524 dbl_link_remove(prev, &free_list[ln]);
525 prev->blocks += p->blocks;
526 if (prev->blocks >= BLOCKS_PER_MBLOCK)
528 free_mega_group(prev);
538 IF_DEBUG(sanity, checkFreeListSanity());
542 freeGroup_lock(bdescr *p)
550 freeChain(bdescr *bd)
561 freeChain_lock(bdescr *bd)
568 // splitBlockGroup(bd,B) splits bd in two. Afterward, bd will have B
569 // blocks, and a new block descriptor pointing to the remainder is
572 splitBlockGroup (bdescr *bd, nat blocks)
576 if (bd->blocks <= blocks) {
577 barf("splitLargeBlock: too small");
580 if (bd->blocks > BLOCKS_PER_MBLOCK) {
581 nat low_mblocks, high_mblocks;
583 if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
584 barf("splitLargeBlock: not a multiple of a megablock");
586 low_mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
587 high_mblocks = (bd->blocks - blocks) / (MBLOCK_SIZE / BLOCK_SIZE);
589 new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + low_mblocks * MBLOCK_SIZE_W);
590 initMBlock(new_mblock);
591 new_bd = FIRST_BDESCR(new_mblock);
592 new_bd->blocks = MBLOCK_GROUP_BLOCKS(high_mblocks);
594 ASSERT(blocks + new_bd->blocks ==
595 bd->blocks + BLOCKS_PER_MBLOCK - MBLOCK_SIZE/BLOCK_SIZE);
599 // NB. we're not updating all the bdescrs in the split groups to
600 // point to the new heads, so this can only be used for large
601 // objects which do not start in the non-head block.
602 new_bd = bd + blocks;
603 new_bd->blocks = bd->blocks - blocks;
611 initMBlock(void *mblock)
616 /* the first few Bdescr's in a block are unused, so we don't want to
617 * put them all on the free list.
619 block = FIRST_BLOCK(mblock);
620 bd = FIRST_BDESCR(mblock);
622 /* Initialise the start field of each block descriptor
624 for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1,
625 block += BLOCK_SIZE) {
626 bd->start = (void*)block;
630 /* -----------------------------------------------------------------------------
632 -------------------------------------------------------------------------- */
636 check_tail (bdescr *bd)
638 bdescr *tail = tail_of(bd);
642 ASSERT(tail->blocks == 0);
643 ASSERT(tail->free == 0);
644 ASSERT(tail->link == bd);
649 checkFreeListSanity(void)
656 for (ln = 0; ln < MAX_FREE_LIST; ln++) {
657 IF_DEBUG(block_alloc, debugBelch("free block list [%d]:\n", ln));
660 for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
662 IF_DEBUG(block_alloc,
663 debugBelch("group at %p, length %ld blocks\n",
664 bd->start, (long)bd->blocks));
665 ASSERT(bd->free == (P_)-1);
666 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
667 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
668 ASSERT(bd->link != bd); // catch easy loops
673 ASSERT(bd->u.back == prev);
675 ASSERT(bd->u.back == NULL);
679 next = bd + bd->blocks;
680 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
682 ASSERT(next->free != (P_)-1);
690 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
692 IF_DEBUG(block_alloc,
693 debugBelch("mega group at %p, length %ld blocks\n",
694 bd->start, (long)bd->blocks));
696 ASSERT(bd->link != bd); // catch easy loops
698 if (bd->link != NULL)
700 // make sure the list is sorted
701 ASSERT(bd->start < bd->link->start);
704 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
705 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
708 // make sure we're fully coalesced
709 if (bd->link != NULL)
711 ASSERT (MBLOCK_ROUND_DOWN(bd->link) !=
712 (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
713 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
722 lnat total_blocks = 0;
725 for (ln=0; ln < MAX_FREE_LIST; ln++) {
726 for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
727 total_blocks += bd->blocks;
730 for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
731 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
732 // The caller of this function, memInventory(), expects to match
733 // the total number of blocks in the system against mblocks *
734 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
735 // block descriptors from *every* mblock.
741 markBlocks (bdescr *bd)
743 for (; bd != NULL; bd = bd->link) {
744 bd->flags |= BF_KNOWN;
749 reportUnmarkedBlocks (void)
754 debugBelch("Unreachable blocks:\n");
755 for (mblock = getFirstMBlock(); mblock != NULL;
756 mblock = getNextMBlock(mblock)) {
757 for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
758 if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
759 debugBelch(" %p\n",bd);
761 if (bd->blocks >= BLOCKS_PER_MBLOCK) {
762 mblock = (StgWord8*)mblock +
763 (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;