1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * The block allocator and free list manager.
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
10 * void *getMBlock(nat n);
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlock().
16 * ---------------------------------------------------------------------------*/
18 #include "PosixSource.h"
22 #include "BlockAlloc.h"
28 static void initMBlock(void *mblock);
30 // The free_list is kept sorted by size, smallest first.
31 // In THREADED_RTS mode, the free list is protected by sm_mutex.
33 /* -----------------------------------------------------------------------------
39 - bdescr = block descriptor
40 - bgroup = block group (1 or more adjacent blocks)
42 - mgroup = mega group (1 or more adjacent mblocks)
44 Invariants on block descriptors
45 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
46 bd->start always points to the start of the block.
49 - zero for a non-group-head; bd->link points to the head
50 - (-1) for the head of a free block group
51 - or it points within the block
54 - zero for a non-group-head; bd->link points to the head
55 - number of blocks in this group otherwise
57 bd->link either points to a block descriptor or is NULL
59 The following fields are not used by the allocator:
64 Exceptions: we don't maintain invariants for all the blocks within a
65 group on the free list, because it is expensive to modify every
66 bdescr in a group when coalescing. Just the head and last bdescrs
67 will be correct for a group on the free list.
74 - most allocations are for small blocks
75 - sometimes the OS gives us new memory backwards in the address
76 space, sometimes forwards, so we should not be biased towards
77 any particular layout in the address space
78 - We want to avoid fragmentation
79 - We want allocation and freeing to be O(1) or close.
81 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
82 whether it can be coalesced with other free bgroups by checking the
83 bdescrs for the blocks on either side of it. This means that we can
84 coalesce in O(1) time. Every free bgroup must have its head and tail
85 bdescrs initialised, the rest don't matter.
87 We keep the free list in buckets, using a heap-sort strategy.
88 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
89 blocks in each bucket is doubly-linked, so that if a block is
90 coalesced we can easily remove it from its current free list.
92 To allocate a new block of size S, grab a block from bucket
93 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
94 least as big as S, and split it if necessary. If there are no
95 blocks in that bucket, look at bigger buckets until a block is found
96 Allocation is therefore O(logN) time.
99 - coalesce it with neighbours.
100 - remove coalesced neighbour(s) from free list(s)
101 - add the new (coalesced) block to the front of the appropriate
102 bucket, given by log2(S) where S is the size of the block.
106 We cannot play this coalescing trick with mblocks, because there is
107 no requirement that the bdescrs in the second and subsequent mblock
108 of an mgroup are initialised (the mgroup might be filled with a
109 large array, overwriting the bdescrs for example).
111 So there is a separate free list for megablocks, sorted in *address*
112 order, so that we can coalesce. Allocation in this list is best-fit
113 by traversing the whole list: we don't expect this list to be long,
114 and allocation/freeing of large blocks is rare; avoiding
115 fragmentation is more important than performance here.
117 freeGroup() might end up moving a block from free_list to
118 free_mblock_list, if after coalescing we end up with a full mblock.
120 checkFreeListSanity() checks all the invariants on the free lists.
122 --------------------------------------------------------------------------- */
124 #define MAX_FREE_LIST 9
126 static bdescr *free_list[MAX_FREE_LIST];
127 static bdescr *free_mblock_list;
129 // free_list[i] contains blocks that are at least size 2^i, and at
130 // most size 2^(i+1) - 1.
132 // To find the free list in which to place a block, use log_2(size).
133 // To find a free block of the right size, use log_2_ceil(size).
135 lnat n_alloc_blocks; // currently allocated blocks
136 lnat hw_alloc_blocks; // high-water allocated blocks
138 /* -----------------------------------------------------------------------------
140 -------------------------------------------------------------------------- */
142 void initBlockAllocator(void)
145 for (i=0; i < MAX_FREE_LIST; i++) {
148 free_mblock_list = NULL;
153 /* -----------------------------------------------------------------------------
155 -------------------------------------------------------------------------- */
158 initGroup(bdescr *head)
164 head->free = head->start;
166 for (i=1, bd = head+1; i < n; i++, bd++) {
173 // There are quicker non-loopy ways to do log_2, but we expect n to be
174 // usually small, and MAX_FREE_LIST is also small, so the loop version
175 // might well be the best choice here.
181 for (i=0; i < MAX_FREE_LIST; i++) {
182 if (x >= n) return i;
185 return MAX_FREE_LIST;
193 for (i=0; i < MAX_FREE_LIST; i++) {
195 if (x == 0) return i;
197 return MAX_FREE_LIST;
201 free_list_insert (bdescr *bd)
205 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
206 ln = log_2(bd->blocks);
208 dbl_link_onto(bd, &free_list[ln]);
212 STATIC_INLINE bdescr *
215 return bd + bd->blocks - 1;
218 // After splitting a group, the last block of each group must have a
219 // tail that points to the head block, to keep our invariants for
222 setup_tail (bdescr *bd)
234 // Take a free block group bd, and split off a group of size n from
235 // it. Adjust the free list as necessary, and return the new group.
237 split_free_block (bdescr *bd, nat n, nat ln)
239 bdescr *fg; // free group
241 ASSERT(bd->blocks > n);
242 dbl_link_remove(bd, &free_list[ln]);
243 fg = bd + bd->blocks - n; // take n blocks off the end
247 ln = log_2(bd->blocks);
248 dbl_link_onto(bd, &free_list[ln]);
253 alloc_mega_group (nat mblocks)
255 bdescr *best, *bd, *prev;
258 n = MBLOCK_GROUP_BLOCKS(mblocks);
262 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
267 prev->link = bd->link;
269 free_mblock_list = bd->link;
274 else if (bd->blocks > n)
276 if (!best || bd->blocks < best->blocks)
285 // we take our chunk off the end here.
286 nat best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
287 bd = FIRST_BDESCR(MBLOCK_ROUND_DOWN(best) +
288 (best_mblocks-mblocks)*MBLOCK_SIZE);
290 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
291 initMBlock(MBLOCK_ROUND_DOWN(bd));
295 void *mblock = getMBlocks(mblocks);
296 initMBlock(mblock); // only need to init the 1st one
297 bd = FIRST_BDESCR(mblock);
299 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
309 if (n == 0) barf("allocGroup: requested zero blocks");
311 if (n >= BLOCKS_PER_MBLOCK)
315 mblocks = BLOCKS_TO_MBLOCKS(n);
317 // n_alloc_blocks doesn't count the extra blocks we get in a
319 n_alloc_blocks += mblocks * BLOCKS_PER_MBLOCK;
320 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
322 bd = alloc_mega_group(mblocks);
323 // only the bdescrs of the first MB are required to be initialised
326 IF_DEBUG(sanity, checkFreeListSanity());
331 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
335 while (ln < MAX_FREE_LIST && free_list[ln] == NULL) {
339 if (ln == MAX_FREE_LIST) {
341 if ((mblocks_allocated * MBLOCK_SIZE_W - n_alloc_blocks * BLOCK_SIZE_W) > (1024*1024)/sizeof(W_)) {
342 debugBelch("Fragmentation, wanted %d blocks:", n);
343 RtsFlags.DebugFlags.block_alloc = 1;
344 checkFreeListSanity();
348 bd = alloc_mega_group(1);
350 initGroup(bd); // we know the group will fit
352 rem->blocks = BLOCKS_PER_MBLOCK-n;
353 initGroup(rem); // init the slop
354 n_alloc_blocks += rem->blocks;
355 freeGroup(rem); // add the slop on to the free list
356 IF_DEBUG(sanity, checkFreeListSanity());
362 if (bd->blocks == n) // exactly the right size!
364 dbl_link_remove(bd, &free_list[ln]);
366 else if (bd->blocks > n) // block too big...
368 bd = split_free_block(bd, n, ln);
372 barf("allocGroup: free list corrupted");
374 initGroup(bd); // initialise it
375 IF_DEBUG(sanity, checkFreeListSanity());
376 ASSERT(bd->blocks == n);
381 allocGroup_lock(nat n)
393 return allocGroup(1);
397 allocBlock_lock(void)
406 /* -----------------------------------------------------------------------------
408 -------------------------------------------------------------------------- */
410 STATIC_INLINE bdescr *
411 coalesce_mblocks (bdescr *p)
417 MBLOCK_ROUND_DOWN(q) ==
418 MBLOCK_ROUND_DOWN(p) + BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
420 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
421 BLOCKS_TO_MBLOCKS(q->blocks));
429 free_mega_group (bdescr *mg)
433 // Find the right place in the free list. free_mblock_list is
434 // sorted by *address*, not by size as the free_list is.
436 bd = free_mblock_list;
437 while (bd && bd->start < mg->start) {
442 // coalesce backwards
445 mg->link = prev->link;
447 mg = coalesce_mblocks(prev);
451 mg->link = free_mblock_list;
452 free_mblock_list = mg;
455 coalesce_mblocks(mg);
457 IF_DEBUG(sanity, checkFreeListSanity());
466 // Todo: not true in multithreaded GC
469 ASSERT(p->free != (P_)-1);
471 p->free = (void *)-1; /* indicates that this block is free */
474 /* fill the block group with garbage if sanity checking is on */
475 IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
477 if (p->blocks == 0) barf("freeGroup: block size is zero");
479 if (p->blocks >= BLOCKS_PER_MBLOCK)
483 mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
484 // If this is an mgroup, make sure it has the right number of blocks
485 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
487 n_alloc_blocks -= mblocks * BLOCKS_PER_MBLOCK;
493 ASSERT(n_alloc_blocks >= p->blocks);
494 n_alloc_blocks -= p->blocks;
499 next = p + p->blocks;
500 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
502 p->blocks += next->blocks;
503 ln = log_2(next->blocks);
504 dbl_link_remove(next, &free_list[ln]);
505 if (p->blocks == BLOCKS_PER_MBLOCK)
514 // coalesce backwards
515 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
519 if (prev->blocks == 0) prev = prev->link; // find the head
521 if (prev->free == (P_)-1)
523 ln = log_2(prev->blocks);
524 dbl_link_remove(prev, &free_list[ln]);
525 prev->blocks += p->blocks;
526 if (prev->blocks >= BLOCKS_PER_MBLOCK)
528 free_mega_group(prev);
538 IF_DEBUG(sanity, checkFreeListSanity());
542 freeGroup_lock(bdescr *p)
550 freeChain(bdescr *bd)
561 freeChain_lock(bdescr *bd)
569 splitBlockGroup (bdescr *bd, nat blocks)
573 if (bd->blocks <= blocks) {
574 barf("splitLargeBlock: too small");
577 if (bd->blocks > BLOCKS_PER_MBLOCK) {
580 if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
581 barf("splitLargeBlock: not a multiple of a megablock");
583 mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
584 new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + mblocks * MBLOCK_SIZE_W);
585 initMBlock(new_mblock);
586 new_bd = FIRST_BDESCR(new_mblock);
587 new_bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
591 // NB. we're not updating all the bdescrs in the split groups to
592 // point to the new heads, so this can only be used for large
593 // objects which do not start in the non-head block.
594 new_bd = bd + blocks;
595 new_bd->blocks = bd->blocks - blocks;
603 initMBlock(void *mblock)
608 /* the first few Bdescr's in a block are unused, so we don't want to
609 * put them all on the free list.
611 block = FIRST_BLOCK(mblock);
612 bd = FIRST_BDESCR(mblock);
614 /* Initialise the start field of each block descriptor
616 for (; block <= LAST_BLOCK(mblock); bd += 1, block += BLOCK_SIZE) {
621 /* -----------------------------------------------------------------------------
623 -------------------------------------------------------------------------- */
627 check_tail (bdescr *bd)
629 bdescr *tail = tail_of(bd);
633 ASSERT(tail->blocks == 0);
634 ASSERT(tail->free == 0);
635 ASSERT(tail->link == bd);
640 checkFreeListSanity(void)
647 for (ln = 0; ln < MAX_FREE_LIST; ln++) {
648 IF_DEBUG(block_alloc, debugBelch("free block list [%d]:\n", ln));
651 for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
653 IF_DEBUG(block_alloc,
654 debugBelch("group at %p, length %ld blocks\n",
655 bd->start, (long)bd->blocks));
656 ASSERT(bd->free == (P_)-1);
657 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
658 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
659 ASSERT(bd->link != bd); // catch easy loops
664 ASSERT(bd->u.back == prev);
666 ASSERT(bd->u.back == NULL);
670 next = bd + bd->blocks;
671 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
673 ASSERT(next->free != (P_)-1);
681 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
683 IF_DEBUG(block_alloc,
684 debugBelch("mega group at %p, length %ld blocks\n",
685 bd->start, (long)bd->blocks));
687 ASSERT(bd->link != bd); // catch easy loops
689 if (bd->link != NULL)
691 // make sure the list is sorted
692 ASSERT(bd->start < bd->link->start);
695 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
696 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
699 // make sure we're fully coalesced
700 if (bd->link != NULL)
702 ASSERT (MBLOCK_ROUND_DOWN(bd->link) !=
703 MBLOCK_ROUND_DOWN(bd) +
704 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
713 lnat total_blocks = 0;
716 for (ln=0; ln < MAX_FREE_LIST; ln++) {
717 for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
718 total_blocks += bd->blocks;
721 for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
722 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
723 // The caller of this function, memInventory(), expects to match
724 // the total number of blocks in the system against mblocks *
725 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
726 // block descriptors from *every* mblock.