1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * The block allocator and free list manager.
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
10 * void *getMBlock(nat n);
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlock().
16 * ---------------------------------------------------------------------------*/
18 #include "PosixSource.h"
22 #include "BlockAlloc.h"
28 static void initMBlock(void *mblock);
30 // The free_list is kept sorted by size, smallest first.
31 // In THREADED_RTS mode, the free list is protected by sm_mutex.
33 /* -----------------------------------------------------------------------------
39 - bdescr = block descriptor
40 - bgroup = block group (1 or more adjacent blocks)
42 - mgroup = mega group (1 or more adjacent mblocks)
44 Invariants on block descriptors
45 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
46 bd->start always points to the start of the block.
49 - zero for a non-group-head; bd->link points to the head
50 - (-1) for the head of a free block group
51 - or it points within the block
54 - zero for a non-group-head; bd->link points to the head
55 - number of blocks in this group otherwise
57 bd->link either points to a block descriptor or is NULL
59 The following fields are not used by the allocator:
64 Exceptions: we don't maintain invariants for all the blocks within a
65 group on the free list, because it is expensive to modify every
66 bdescr in a group when coalescing. Just the head and last bdescrs
67 will be correct for a group on the free list.
74 - most allocations are for small blocks
75 - sometimes the OS gives us new memory backwards in the address
76 space, sometimes forwards, so we should not be biased towards
77 any particular layout in the address space
78 - We want to avoid fragmentation
79 - We want allocation and freeing to be O(1) or close.
81 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
82 whether it can be coalesced with other free bgroups by checking the
83 bdescrs for the blocks on either side of it. This means that we can
84 coalesce in O(1) time. Every free bgroup must have its head and tail
85 bdescrs initialised, the rest don't matter.
87 We keep the free list in buckets, using a heap-sort strategy.
88 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
89 blocks in each bucket is doubly-linked, so that if a block is
90 coalesced we can easily remove it from its current free list.
92 To allocate a new block of size S, grab a block from bucket
93 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
94 least as big as S, and split it if necessary. If there are no
95 blocks in that bucket, look at bigger buckets until a block is found
96 Allocation is therefore O(logN) time.
99 - coalesce it with neighbours.
100 - remove coalesced neighbour(s) from free list(s)
101 - add the new (coalesced) block to the front of the appropriate
102 bucket, given by log2(S) where S is the size of the block.
106 We cannot play this coalescing trick with mblocks, because there is
107 no requirement that the bdescrs in the second and subsequent mblock
108 of an mgroup are initialised (the mgroup might be filled with a
109 large array, overwriting the bdescrs for example).
111 So there is a separate free list for megablocks, sorted in *address*
112 order, so that we can coalesce. Allocation in this list is best-fit
113 by traversing the whole list: we don't expect this list to be long,
114 and allocation/freeing of large blocks is rare; avoiding
115 fragmentation is more important than performance here.
117 freeGroup() might end up moving a block from free_list to
118 free_mblock_list, if after coalescing we end up with a full mblock.
120 checkFreeListSanity() checks all the invariants on the free lists.
122 --------------------------------------------------------------------------- */
124 #define MAX_FREE_LIST 9
126 static bdescr *free_list[MAX_FREE_LIST];
127 static bdescr *free_mblock_list;
129 // free_list[i] contains blocks that are at least size 2^i, and at
130 // most size 2^(i+1) - 1.
132 // To find the free list in which to place a block, use log_2(size).
133 // To find a free block of the right size, use log_2_ceil(size).
135 lnat n_alloc_blocks; // currently allocated blocks
136 lnat hw_alloc_blocks; // high-water allocated blocks
138 /* -----------------------------------------------------------------------------
140 -------------------------------------------------------------------------- */
142 void initBlockAllocator(void)
145 for (i=0; i < MAX_FREE_LIST; i++) {
148 free_mblock_list = NULL;
153 /* -----------------------------------------------------------------------------
155 -------------------------------------------------------------------------- */
158 initGroup(nat n, bdescr *head)
164 head->free = head->start;
166 for (i=1, bd = head+1; i < n; i++, bd++) {
174 // There are quicker non-loopy ways to do log_2, but we expect n to be
175 // usually small, and MAX_FREE_LIST is also small, so the loop version
176 // might well be the best choice here.
182 for (i=0; i < MAX_FREE_LIST; i++) {
183 if (x >= n) return i;
186 return MAX_FREE_LIST;
194 for (i=0; i < MAX_FREE_LIST; i++) {
196 if (x == 0) return i;
198 return MAX_FREE_LIST;
202 free_list_insert (bdescr *bd)
206 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
207 ln = log_2(bd->blocks);
209 dbl_link_onto(bd, &free_list[ln]);
213 STATIC_INLINE bdescr *
216 return bd + bd->blocks - 1;
219 // After splitting a group, the last block of each group must have a
220 // tail that points to the head block, to keep our invariants for
223 setup_tail (bdescr *bd)
235 // Take a free block group bd, and split off a group of size n from
236 // it. Adjust the free list as necessary, and return the new group.
238 split_free_block (bdescr *bd, nat n, nat ln)
240 bdescr *fg; // free group
242 ASSERT(bd->blocks > n);
243 dbl_link_remove(bd, &free_list[ln]);
244 fg = bd + bd->blocks - n; // take n blocks off the end
248 ln = log_2(bd->blocks);
249 dbl_link_onto(bd, &free_list[ln]);
254 alloc_mega_group (nat mblocks)
256 bdescr *best, *bd, *prev;
259 n = MBLOCK_GROUP_BLOCKS(mblocks);
263 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
268 prev->link = bd->link;
270 free_mblock_list = bd->link;
275 else if (bd->blocks > n)
277 if (!best || bd->blocks < best->blocks)
286 // we take our chunk off the end here.
287 nat best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
288 bd = FIRST_BDESCR(MBLOCK_ROUND_DOWN(best) +
289 (best_mblocks-mblocks)*MBLOCK_SIZE);
291 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
292 initMBlock(MBLOCK_ROUND_DOWN(bd));
296 void *mblock = getMBlocks(mblocks);
297 initMBlock(mblock); // only need to init the 1st one
298 bd = FIRST_BDESCR(mblock);
300 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
310 // Todo: not true in multithreaded GC, where we use allocBlock_sync().
313 if (n == 0) barf("allocGroup: requested zero blocks");
316 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
318 if (n >= BLOCKS_PER_MBLOCK)
320 bd = alloc_mega_group(BLOCKS_TO_MBLOCKS(n));
321 // only the bdescrs of the first MB are required to be initialised
322 initGroup(BLOCKS_PER_MBLOCK, bd);
323 IF_DEBUG(sanity, checkFreeListSanity());
329 while (free_list[ln] == NULL && ln < MAX_FREE_LIST) {
333 if (ln == MAX_FREE_LIST) {
334 bd = alloc_mega_group(1);
336 initGroup(n,bd); // we know the group will fit
338 rem->blocks = BLOCKS_PER_MBLOCK-n;
339 initGroup(BLOCKS_PER_MBLOCK-n, rem); // init the slop
340 n_alloc_blocks += rem->blocks;
341 freeGroup(rem); // add the slop on to the free list
342 IF_DEBUG(sanity, checkFreeListSanity());
348 if (bd->blocks == n) // exactly the right size!
350 dbl_link_remove(bd, &free_list[ln]);
352 else if (bd->blocks > n) // block too big...
354 bd = split_free_block(bd, n, ln);
358 barf("allocGroup: free list corrupted");
360 initGroup(n, bd); // initialise it
361 IF_DEBUG(sanity, checkFreeListSanity());
362 ASSERT(bd->blocks == n);
367 allocGroup_lock(nat n)
379 return allocGroup(1);
383 allocBlock_lock(void)
392 /* -----------------------------------------------------------------------------
394 -------------------------------------------------------------------------- */
396 STATIC_INLINE bdescr *
397 coalesce_mblocks (bdescr *p)
403 MBLOCK_ROUND_DOWN(q) ==
404 MBLOCK_ROUND_DOWN(p) + BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
406 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
407 BLOCKS_TO_MBLOCKS(q->blocks));
415 free_mega_group (bdescr *mg)
419 // Find the right place in the free list. free_mblock_list is
420 // sorted by *address*, not by size as the free_list is.
422 bd = free_mblock_list;
423 while (bd && bd->start < mg->start) {
428 // coalesce backwards
431 mg->link = prev->link;
433 mg = coalesce_mblocks(prev);
437 mg->link = free_mblock_list;
438 free_mblock_list = mg;
441 coalesce_mblocks(mg);
443 IF_DEBUG(sanity, checkFreeListSanity());
452 // Todo: not true in multithreaded GC
455 ASSERT(p->free != (P_)-1);
457 n_alloc_blocks -= p->blocks;
459 p->free = (void *)-1; /* indicates that this block is free */
462 /* fill the block group with garbage if sanity checking is on */
463 IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
465 if (p->blocks == 0) barf("freeGroup: block size is zero");
467 if (p->blocks >= BLOCKS_PER_MBLOCK)
469 // If this is an mgroup, make sure it has the right number of blocks
470 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks)));
478 next = p + p->blocks;
479 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
481 p->blocks += next->blocks;
482 ln = log_2(next->blocks);
483 dbl_link_remove(next, &free_list[ln]);
484 if (p->blocks == BLOCKS_PER_MBLOCK)
493 // coalesce backwards
494 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
498 if (prev->blocks == 0) prev = prev->link; // find the head
500 if (prev->free == (P_)-1)
502 ln = log_2(prev->blocks);
503 dbl_link_remove(prev, &free_list[ln]);
504 prev->blocks += p->blocks;
505 if (prev->blocks >= BLOCKS_PER_MBLOCK)
507 free_mega_group(prev);
517 IF_DEBUG(sanity, checkFreeListSanity());
521 freeGroup_lock(bdescr *p)
529 freeChain(bdescr *bd)
540 freeChain_lock(bdescr *bd)
548 splitBlockGroup (bdescr *bd, nat blocks)
552 if (bd->blocks <= blocks) {
553 barf("splitLargeBlock: too small");
556 if (bd->blocks > BLOCKS_PER_MBLOCK) {
559 if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
560 barf("splitLargeBlock: not a multiple of a megablock");
562 mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
563 new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + mblocks * MBLOCK_SIZE_W);
564 initMBlock(new_mblock);
565 new_bd = FIRST_BDESCR(new_mblock);
566 new_bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
570 // NB. we're not updating all the bdescrs in the split groups to
571 // point to the new heads, so this can only be used for large
572 // objects which do not start in the non-head block.
573 new_bd = bd + blocks;
574 new_bd->blocks = bd->blocks - blocks;
582 initMBlock(void *mblock)
587 /* the first few Bdescr's in a block are unused, so we don't want to
588 * put them all on the free list.
590 block = FIRST_BLOCK(mblock);
591 bd = FIRST_BDESCR(mblock);
593 /* Initialise the start field of each block descriptor
595 for (; block <= LAST_BLOCK(mblock); bd += 1, block += BLOCK_SIZE) {
600 /* -----------------------------------------------------------------------------
602 -------------------------------------------------------------------------- */
606 check_tail (bdescr *bd)
608 bdescr *tail = tail_of(bd);
612 ASSERT(tail->blocks == 0);
613 ASSERT(tail->free == 0);
614 ASSERT(tail->link == bd);
619 checkFreeListSanity(void)
626 for (ln = 0; ln < MAX_FREE_LIST; ln++) {
627 IF_DEBUG(block_alloc, debugBelch("free block list [%d]:\n", ln));
630 for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
632 IF_DEBUG(block_alloc,
633 debugBelch("group at %p, length %ld blocks\n",
634 bd->start, (long)bd->blocks));
635 ASSERT(bd->free == (P_)-1);
636 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
637 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
638 ASSERT(bd->link != bd); // catch easy loops
643 ASSERT(bd->u.back == prev);
645 ASSERT(bd->u.back == NULL);
649 next = bd + bd->blocks;
650 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
652 ASSERT(next->free != (P_)-1);
660 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
662 IF_DEBUG(block_alloc,
663 debugBelch("mega group at %p, length %ld blocks\n",
664 bd->start, (long)bd->blocks));
666 ASSERT(bd->link != bd); // catch easy loops
668 if (bd->link != NULL)
670 // make sure the list is sorted
671 ASSERT(bd->start < bd->link->start);
674 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
675 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
678 // make sure we're fully coalesced
679 if (bd->link != NULL)
681 ASSERT (MBLOCK_ROUND_DOWN(bd->link) !=
682 MBLOCK_ROUND_DOWN(bd) +
683 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
692 lnat total_blocks = 0;
695 for (ln=0; ln < MAX_FREE_LIST; ln++) {
696 for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
697 total_blocks += bd->blocks;
700 for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
701 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
702 // The caller of this function, memInventory(), expects to match
703 // the total number of blocks in the system against mblocks *
704 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
705 // block descriptors from *every* mblock.