1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * The block allocator and free list manager.
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
10 * void *getMBlock(nat n);
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlock().
16 * ---------------------------------------------------------------------------*/
18 #include "PosixSource.h"
22 #include "BlockAlloc.h"
28 static void initMBlock(void *mblock);
30 // The free_list is kept sorted by size, smallest first.
31 // In THREADED_RTS mode, the free list is protected by sm_mutex.
33 /* -----------------------------------------------------------------------------
39 - bdescr = block descriptor
40 - bgroup = block group (1 or more adjacent blocks)
42 - mgroup = mega group (1 or more adjacent mblocks)
44 Invariants on block descriptors
45 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
46 bd->start always points to the start of the block.
49 - zero for a non-group-head; bd->link points to the head
50 - (-1) for the head of a free block group
51 - or it points within the block
54 - zero for a non-group-head; bd->link points to the head
55 - number of blocks in this group otherwise
57 bd->link either points to a block descriptor or is NULL
59 The following fields are not used by the allocator:
64 Exceptions: we don't maintain invariants for all the blocks within a
65 group on the free list, because it is expensive to modify every
66 bdescr in a group when coalescing. Just the head and last bdescrs
67 will be correct for a group on the free list.
74 - most allocations are for small blocks
75 - sometimes the OS gives us new memory backwards in the address
76 space, sometimes forwards, so we should not be biased towards
77 any particular layout in the address space
78 - We want to avoid fragmentation
79 - We want allocation and freeing to be O(1) or close.
81 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
82 whether it can be coalesced with other free bgroups by checking the
83 bdescrs for the blocks on either side of it. This means that we can
84 coalesce in O(1) time. Every free bgroup must have its head and tail
85 bdescrs initialised, the rest don't matter.
87 We keep the free list in buckets, using a heap-sort strategy.
88 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
89 blocks in each bucket is doubly-linked, so that if a block is
90 coalesced we can easily remove it from its current free list.
92 To allocate a new block of size S, grab a block from bucket
93 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
94 least as big as S, and split it if necessary. If there are no
95 blocks in that bucket, look at bigger buckets until a block is found
96 Allocation is therefore O(logN) time.
99 - coalesce it with neighbours.
100 - remove coalesced neighbour(s) from free list(s)
101 - add the new (coalesced) block to the front of the appropriate
102 bucket, given by log2(S) where S is the size of the block.
106 We cannot play this coalescing trick with mblocks, because there is
107 no requirement that the bdescrs in the second and subsequent mblock
108 of an mgroup are initialised (the mgroup might be filled with a
109 large array, overwriting the bdescrs for example).
111 So there is a separate free list for megablocks, sorted in *address*
112 order, so that we can coalesce. Allocation in this list is best-fit
113 by traversing the whole list: we don't expect this list to be long,
114 and allocation/freeing of large blocks is rare; avoiding
115 fragmentation is more important than performance here.
117 freeGroup() might end up moving a block from free_list to
118 free_mblock_list, if after coalescing we end up with a full mblock.
120 checkFreeListSanity() checks all the invariants on the free lists.
122 --------------------------------------------------------------------------- */
124 #define MAX_FREE_LIST 9
126 static bdescr *free_list[MAX_FREE_LIST];
127 static bdescr *free_mblock_list;
129 // free_list[i] contains blocks that are at least size 2^i, and at
130 // most size 2^(i+1) - 1.
132 // To find the free list in which to place a block, use log_2(size).
133 // To find a free block of the right size, use log_2_ceil(size).
135 lnat n_alloc_blocks; // currently allocated blocks
136 lnat hw_alloc_blocks; // high-water allocated blocks
138 /* -----------------------------------------------------------------------------
140 -------------------------------------------------------------------------- */
142 void initBlockAllocator(void)
145 for (i=0; i < MAX_FREE_LIST; i++) {
148 free_mblock_list = NULL;
153 /* -----------------------------------------------------------------------------
155 -------------------------------------------------------------------------- */
158 initGroup(bdescr *head)
164 head->free = head->start;
166 for (i=1, bd = head+1; i < n; i++, bd++) {
173 // There are quicker non-loopy ways to do log_2, but we expect n to be
174 // usually small, and MAX_FREE_LIST is also small, so the loop version
175 // might well be the best choice here.
181 for (i=0; i < MAX_FREE_LIST; i++) {
182 if (x >= n) return i;
185 return MAX_FREE_LIST;
193 for (i=0; i < MAX_FREE_LIST; i++) {
195 if (x == 0) return i;
197 return MAX_FREE_LIST;
201 free_list_insert (bdescr *bd)
205 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
206 ln = log_2(bd->blocks);
208 dbl_link_onto(bd, &free_list[ln]);
212 STATIC_INLINE bdescr *
215 return bd + bd->blocks - 1;
218 // After splitting a group, the last block of each group must have a
219 // tail that points to the head block, to keep our invariants for
222 setup_tail (bdescr *bd)
234 // Take a free block group bd, and split off a group of size n from
235 // it. Adjust the free list as necessary, and return the new group.
237 split_free_block (bdescr *bd, nat n, nat ln)
239 bdescr *fg; // free group
241 ASSERT(bd->blocks > n);
242 dbl_link_remove(bd, &free_list[ln]);
243 fg = bd + bd->blocks - n; // take n blocks off the end
247 ln = log_2(bd->blocks);
248 dbl_link_onto(bd, &free_list[ln]);
253 alloc_mega_group (nat mblocks)
255 bdescr *best, *bd, *prev;
258 n = MBLOCK_GROUP_BLOCKS(mblocks);
262 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
267 prev->link = bd->link;
269 free_mblock_list = bd->link;
274 else if (bd->blocks > n)
276 if (!best || bd->blocks < best->blocks)
285 // we take our chunk off the end here.
286 nat best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
287 bd = FIRST_BDESCR(MBLOCK_ROUND_DOWN(best) +
288 (best_mblocks-mblocks)*MBLOCK_SIZE);
290 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
291 initMBlock(MBLOCK_ROUND_DOWN(bd));
295 void *mblock = getMBlocks(mblocks);
296 initMBlock(mblock); // only need to init the 1st one
297 bd = FIRST_BDESCR(mblock);
299 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
309 // Todo: not true in multithreaded GC, where we use allocBlock_sync().
312 if (n == 0) barf("allocGroup: requested zero blocks");
315 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
317 if (n >= BLOCKS_PER_MBLOCK)
319 bd = alloc_mega_group(BLOCKS_TO_MBLOCKS(n));
320 // only the bdescrs of the first MB are required to be initialised
322 IF_DEBUG(sanity, checkFreeListSanity());
328 while (free_list[ln] == NULL && ln < MAX_FREE_LIST) {
332 if (ln == MAX_FREE_LIST) {
334 if ((mblocks_allocated * MBLOCK_SIZE_W - n_alloc_blocks * BLOCK_SIZE_W) > (1024*1024)/sizeof(W_)) {
335 debugBelch("Fragmentation, wanted %d blocks:", n);
336 RtsFlags.DebugFlags.block_alloc = 1;
337 checkFreeListSanity();
341 bd = alloc_mega_group(1);
343 initGroup(bd); // we know the group will fit
345 rem->blocks = BLOCKS_PER_MBLOCK-n;
346 initGroup(rem); // init the slop
347 n_alloc_blocks += rem->blocks;
348 freeGroup(rem); // add the slop on to the free list
349 IF_DEBUG(sanity, checkFreeListSanity());
355 if (bd->blocks == n) // exactly the right size!
357 dbl_link_remove(bd, &free_list[ln]);
359 else if (bd->blocks > n) // block too big...
361 bd = split_free_block(bd, n, ln);
365 barf("allocGroup: free list corrupted");
367 initGroup(bd); // initialise it
368 IF_DEBUG(sanity, checkFreeListSanity());
369 ASSERT(bd->blocks == n);
374 allocGroup_lock(nat n)
386 return allocGroup(1);
390 allocBlock_lock(void)
399 /* -----------------------------------------------------------------------------
401 -------------------------------------------------------------------------- */
403 STATIC_INLINE bdescr *
404 coalesce_mblocks (bdescr *p)
410 MBLOCK_ROUND_DOWN(q) ==
411 MBLOCK_ROUND_DOWN(p) + BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
413 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
414 BLOCKS_TO_MBLOCKS(q->blocks));
422 free_mega_group (bdescr *mg)
426 // Find the right place in the free list. free_mblock_list is
427 // sorted by *address*, not by size as the free_list is.
429 bd = free_mblock_list;
430 while (bd && bd->start < mg->start) {
435 // coalesce backwards
438 mg->link = prev->link;
440 mg = coalesce_mblocks(prev);
444 mg->link = free_mblock_list;
445 free_mblock_list = mg;
448 coalesce_mblocks(mg);
450 IF_DEBUG(sanity, checkFreeListSanity());
459 // Todo: not true in multithreaded GC
462 ASSERT(p->free != (P_)-1);
464 n_alloc_blocks -= p->blocks;
466 p->free = (void *)-1; /* indicates that this block is free */
469 /* fill the block group with garbage if sanity checking is on */
470 IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
472 if (p->blocks == 0) barf("freeGroup: block size is zero");
474 if (p->blocks >= BLOCKS_PER_MBLOCK)
476 // If this is an mgroup, make sure it has the right number of blocks
477 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks)));
485 next = p + p->blocks;
486 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
488 p->blocks += next->blocks;
489 ln = log_2(next->blocks);
490 dbl_link_remove(next, &free_list[ln]);
491 if (p->blocks == BLOCKS_PER_MBLOCK)
500 // coalesce backwards
501 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
505 if (prev->blocks == 0) prev = prev->link; // find the head
507 if (prev->free == (P_)-1)
509 ln = log_2(prev->blocks);
510 dbl_link_remove(prev, &free_list[ln]);
511 prev->blocks += p->blocks;
512 if (prev->blocks >= BLOCKS_PER_MBLOCK)
514 free_mega_group(prev);
524 IF_DEBUG(sanity, checkFreeListSanity());
528 freeGroup_lock(bdescr *p)
536 freeChain(bdescr *bd)
547 freeChain_lock(bdescr *bd)
555 splitBlockGroup (bdescr *bd, nat blocks)
559 if (bd->blocks <= blocks) {
560 barf("splitLargeBlock: too small");
563 if (bd->blocks > BLOCKS_PER_MBLOCK) {
566 if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
567 barf("splitLargeBlock: not a multiple of a megablock");
569 mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
570 new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + mblocks * MBLOCK_SIZE_W);
571 initMBlock(new_mblock);
572 new_bd = FIRST_BDESCR(new_mblock);
573 new_bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
577 // NB. we're not updating all the bdescrs in the split groups to
578 // point to the new heads, so this can only be used for large
579 // objects which do not start in the non-head block.
580 new_bd = bd + blocks;
581 new_bd->blocks = bd->blocks - blocks;
589 initMBlock(void *mblock)
594 /* the first few Bdescr's in a block are unused, so we don't want to
595 * put them all on the free list.
597 block = FIRST_BLOCK(mblock);
598 bd = FIRST_BDESCR(mblock);
600 /* Initialise the start field of each block descriptor
602 for (; block <= LAST_BLOCK(mblock); bd += 1, block += BLOCK_SIZE) {
607 /* -----------------------------------------------------------------------------
609 -------------------------------------------------------------------------- */
613 check_tail (bdescr *bd)
615 bdescr *tail = tail_of(bd);
619 ASSERT(tail->blocks == 0);
620 ASSERT(tail->free == 0);
621 ASSERT(tail->link == bd);
626 checkFreeListSanity(void)
633 for (ln = 0; ln < MAX_FREE_LIST; ln++) {
634 IF_DEBUG(block_alloc, debugBelch("free block list [%d]:\n", ln));
637 for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
639 IF_DEBUG(block_alloc,
640 debugBelch("group at %p, length %ld blocks\n",
641 bd->start, (long)bd->blocks));
642 ASSERT(bd->free == (P_)-1);
643 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
644 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
645 ASSERT(bd->link != bd); // catch easy loops
650 ASSERT(bd->u.back == prev);
652 ASSERT(bd->u.back == NULL);
656 next = bd + bd->blocks;
657 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
659 ASSERT(next->free != (P_)-1);
667 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
669 IF_DEBUG(block_alloc,
670 debugBelch("mega group at %p, length %ld blocks\n",
671 bd->start, (long)bd->blocks));
673 ASSERT(bd->link != bd); // catch easy loops
675 if (bd->link != NULL)
677 // make sure the list is sorted
678 ASSERT(bd->start < bd->link->start);
681 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
682 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
685 // make sure we're fully coalesced
686 if (bd->link != NULL)
688 ASSERT (MBLOCK_ROUND_DOWN(bd->link) !=
689 MBLOCK_ROUND_DOWN(bd) +
690 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
699 lnat total_blocks = 0;
702 for (ln=0; ln < MAX_FREE_LIST; ln++) {
703 for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
704 total_blocks += bd->blocks;
707 for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
708 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
709 // The caller of this function, memInventory(), expects to match
710 // the total number of blocks in the system against mblocks *
711 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
712 // block descriptors from *every* mblock.