1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-2008
5 * The block allocator and free list manager.
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
10 * void *getMBlock(nat n);
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlock().
16 * ---------------------------------------------------------------------------*/
18 #include "PosixSource.h"
22 #include "BlockAlloc.h"
28 static void initMBlock(void *mblock);
30 // The free_list is kept sorted by size, smallest first.
31 // In THREADED_RTS mode, the free list is protected by sm_mutex.
33 /* -----------------------------------------------------------------------------
39 - bdescr = block descriptor
40 - bgroup = block group (1 or more adjacent blocks)
42 - mgroup = mega group (1 or more adjacent mblocks)
44 Invariants on block descriptors
45 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
46 bd->start always points to the start of the block.
49 - zero for a non-group-head; bd->link points to the head
50 - (-1) for the head of a free block group
51 - or it points within the block
54 - zero for a non-group-head; bd->link points to the head
55 - number of blocks in this group otherwise
57 bd->link either points to a block descriptor or is NULL
59 The following fields are not used by the allocator:
64 Exceptions: we don't maintain invariants for all the blocks within a
65 group on the free list, because it is expensive to modify every
66 bdescr in a group when coalescing. Just the head and last bdescrs
67 will be correct for a group on the free list.
74 - most allocations are for small blocks
75 - sometimes the OS gives us new memory backwards in the address
76 space, sometimes forwards, so we should not be biased towards
77 any particular layout in the address space
78 - We want to avoid fragmentation
79 - We want allocation and freeing to be O(1) or close.
81 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
82 whether it can be coalesced with other free bgroups by checking the
83 bdescrs for the blocks on either side of it. This means that we can
84 coalesce in O(1) time. Every free bgroup must have its head and tail
85 bdescrs initialised, the rest don't matter.
87 We keep the free list in buckets, using a heap-sort strategy.
88 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
89 blocks in each bucket is doubly-linked, so that if a block is
90 coalesced we can easily remove it from its current free list.
92 To allocate a new block of size S, grab a block from bucket
93 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
94 least as big as S, and split it if necessary. If there are no
95 blocks in that bucket, look at bigger buckets until a block is found
96 Allocation is therefore O(logN) time.
99 - coalesce it with neighbours.
100 - remove coalesced neighbour(s) from free list(s)
101 - add the new (coalesced) block to the front of the appropriate
102 bucket, given by log2(S) where S is the size of the block.
106 We cannot play this coalescing trick with mblocks, because there is
107 no requirement that the bdescrs in the second and subsequent mblock
108 of an mgroup are initialised (the mgroup might be filled with a
109 large array, overwriting the bdescrs for example).
111 So there is a separate free list for megablocks, sorted in *address*
112 order, so that we can coalesce. Allocation in this list is best-fit
113 by traversing the whole list: we don't expect this list to be long,
114 and allocation/freeing of large blocks is rare; avoiding
115 fragmentation is more important than performance here.
117 freeGroup() might end up moving a block from free_list to
118 free_mblock_list, if after coalescing we end up with a full mblock.
120 checkFreeListSanity() checks all the invariants on the free lists.
122 --------------------------------------------------------------------------- */
124 #define MAX_FREE_LIST 9
126 static bdescr *free_list[MAX_FREE_LIST];
127 static bdescr *free_mblock_list;
129 // free_list[i] contains blocks that are at least size 2^i, and at
130 // most size 2^(i+1) - 1.
132 // To find the free list in which to place a block, use log_2(size).
133 // To find a free block of the right size, use log_2_ceil(size).
135 lnat n_alloc_blocks; // currently allocated blocks
136 lnat hw_alloc_blocks; // high-water allocated blocks
138 /* -----------------------------------------------------------------------------
140 -------------------------------------------------------------------------- */
142 void initBlockAllocator(void)
145 for (i=0; i < MAX_FREE_LIST; i++) {
148 free_mblock_list = NULL;
153 /* -----------------------------------------------------------------------------
155 -------------------------------------------------------------------------- */
158 initGroup(nat n, bdescr *head)
164 head->free = head->start;
166 for (i=1, bd = head+1; i < n; i++, bd++) {
174 // There are quicker non-loopy ways to do log_2, but we expect n to be
175 // usually small, and MAX_FREE_LIST is also small, so the loop version
176 // might well be the best choice here.
182 for (i=0; i < MAX_FREE_LIST; i++) {
183 if (x >= n) return i;
186 return MAX_FREE_LIST;
194 for (i=0; i < MAX_FREE_LIST; i++) {
196 if (x == 0) return i;
198 return MAX_FREE_LIST;
202 free_list_insert (bdescr *bd)
206 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
207 ln = log_2(bd->blocks);
209 dbl_link_onto(bd, &free_list[ln]);
213 STATIC_INLINE bdescr *
216 return bd + bd->blocks - 1;
219 // After splitting a group, the last block of each group must have a
220 // tail that points to the head block, to keep our invariants for
223 setup_tail (bdescr *bd)
235 // Take a free block group bd, and split off a group of size n from
236 // it. Adjust the free list as necessary, and return the new group.
238 split_free_block (bdescr *bd, nat n, nat ln)
240 bdescr *fg; // free group
242 ASSERT(bd->blocks > n);
243 dbl_link_remove(bd, &free_list[ln]);
244 fg = bd + bd->blocks - n; // take n blocks off the end
248 ln = log_2(bd->blocks);
249 dbl_link_onto(bd, &free_list[ln]);
254 alloc_mega_group (nat mblocks)
256 bdescr *best, *bd, *prev;
259 n = MBLOCK_GROUP_BLOCKS(mblocks);
263 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
268 prev->link = bd->link;
270 free_mblock_list = bd->link;
275 else if (bd->blocks > n)
277 if (!best || bd->blocks < best->blocks)
286 // we take our chunk off the end here.
287 nat best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
288 bd = FIRST_BDESCR(MBLOCK_ROUND_DOWN(best) +
289 (best_mblocks-mblocks)*MBLOCK_SIZE);
291 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
292 initMBlock(MBLOCK_ROUND_DOWN(bd));
296 void *mblock = getMBlocks(mblocks);
297 initMBlock(mblock); // only need to init the 1st one
298 bd = FIRST_BDESCR(mblock);
300 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
310 // Todo: not true in multithreaded GC, where we use allocBlock_sync().
313 if (n == 0) barf("allocGroup: requested zero blocks");
316 if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
318 if (n >= BLOCKS_PER_MBLOCK)
320 bd = alloc_mega_group(BLOCKS_TO_MBLOCKS(n));
321 // only the bdescrs of the first MB are required to be initialised
322 initGroup(BLOCKS_PER_MBLOCK, bd);
323 IF_DEBUG(sanity, checkFreeListSanity());
329 while (free_list[ln] == NULL && ln < MAX_FREE_LIST) {
333 if (ln == MAX_FREE_LIST) {
335 if ((mblocks_allocated * MBLOCK_SIZE_W - n_alloc_blocks * BLOCK_SIZE_W) > (1024*1024)/sizeof(W_)) {
336 debugBelch("Fragmentation, wanted %d blocks:", n);
337 RtsFlags.DebugFlags.block_alloc = 1;
338 checkFreeListSanity();
342 bd = alloc_mega_group(1);
344 initGroup(n,bd); // we know the group will fit
346 rem->blocks = BLOCKS_PER_MBLOCK-n;
347 initGroup(BLOCKS_PER_MBLOCK-n, rem); // init the slop
348 n_alloc_blocks += rem->blocks;
349 freeGroup(rem); // add the slop on to the free list
350 IF_DEBUG(sanity, checkFreeListSanity());
356 if (bd->blocks == n) // exactly the right size!
358 dbl_link_remove(bd, &free_list[ln]);
360 else if (bd->blocks > n) // block too big...
362 bd = split_free_block(bd, n, ln);
366 barf("allocGroup: free list corrupted");
368 initGroup(n, bd); // initialise it
369 IF_DEBUG(sanity, checkFreeListSanity());
370 ASSERT(bd->blocks == n);
375 allocGroup_lock(nat n)
387 return allocGroup(1);
391 allocBlock_lock(void)
400 /* -----------------------------------------------------------------------------
402 -------------------------------------------------------------------------- */
404 STATIC_INLINE bdescr *
405 coalesce_mblocks (bdescr *p)
411 MBLOCK_ROUND_DOWN(q) ==
412 MBLOCK_ROUND_DOWN(p) + BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
414 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
415 BLOCKS_TO_MBLOCKS(q->blocks));
423 free_mega_group (bdescr *mg)
427 // Find the right place in the free list. free_mblock_list is
428 // sorted by *address*, not by size as the free_list is.
430 bd = free_mblock_list;
431 while (bd && bd->start < mg->start) {
436 // coalesce backwards
439 mg->link = prev->link;
441 mg = coalesce_mblocks(prev);
445 mg->link = free_mblock_list;
446 free_mblock_list = mg;
449 coalesce_mblocks(mg);
451 IF_DEBUG(sanity, checkFreeListSanity());
460 // Todo: not true in multithreaded GC
463 ASSERT(p->free != (P_)-1);
465 n_alloc_blocks -= p->blocks;
467 p->free = (void *)-1; /* indicates that this block is free */
470 /* fill the block group with garbage if sanity checking is on */
471 IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
473 if (p->blocks == 0) barf("freeGroup: block size is zero");
475 if (p->blocks >= BLOCKS_PER_MBLOCK)
477 // If this is an mgroup, make sure it has the right number of blocks
478 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks)));
486 next = p + p->blocks;
487 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
489 p->blocks += next->blocks;
490 ln = log_2(next->blocks);
491 dbl_link_remove(next, &free_list[ln]);
492 if (p->blocks == BLOCKS_PER_MBLOCK)
501 // coalesce backwards
502 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
506 if (prev->blocks == 0) prev = prev->link; // find the head
508 if (prev->free == (P_)-1)
510 ln = log_2(prev->blocks);
511 dbl_link_remove(prev, &free_list[ln]);
512 prev->blocks += p->blocks;
513 if (prev->blocks >= BLOCKS_PER_MBLOCK)
515 free_mega_group(prev);
525 IF_DEBUG(sanity, checkFreeListSanity());
529 freeGroup_lock(bdescr *p)
537 freeChain(bdescr *bd)
548 freeChain_lock(bdescr *bd)
556 splitBlockGroup (bdescr *bd, nat blocks)
560 if (bd->blocks <= blocks) {
561 barf("splitLargeBlock: too small");
564 if (bd->blocks > BLOCKS_PER_MBLOCK) {
567 if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) {
568 barf("splitLargeBlock: not a multiple of a megablock");
570 mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE);
571 new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + mblocks * MBLOCK_SIZE_W);
572 initMBlock(new_mblock);
573 new_bd = FIRST_BDESCR(new_mblock);
574 new_bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
578 // NB. we're not updating all the bdescrs in the split groups to
579 // point to the new heads, so this can only be used for large
580 // objects which do not start in the non-head block.
581 new_bd = bd + blocks;
582 new_bd->blocks = bd->blocks - blocks;
590 initMBlock(void *mblock)
595 /* the first few Bdescr's in a block are unused, so we don't want to
596 * put them all on the free list.
598 block = FIRST_BLOCK(mblock);
599 bd = FIRST_BDESCR(mblock);
601 /* Initialise the start field of each block descriptor
603 for (; block <= LAST_BLOCK(mblock); bd += 1, block += BLOCK_SIZE) {
608 /* -----------------------------------------------------------------------------
610 -------------------------------------------------------------------------- */
614 check_tail (bdescr *bd)
616 bdescr *tail = tail_of(bd);
620 ASSERT(tail->blocks == 0);
621 ASSERT(tail->free == 0);
622 ASSERT(tail->link == bd);
627 checkFreeListSanity(void)
634 for (ln = 0; ln < MAX_FREE_LIST; ln++) {
635 IF_DEBUG(block_alloc, debugBelch("free block list [%d]:\n", ln));
638 for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
640 IF_DEBUG(block_alloc,
641 debugBelch("group at %p, length %ld blocks\n",
642 bd->start, (long)bd->blocks));
643 ASSERT(bd->free == (P_)-1);
644 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
645 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
646 ASSERT(bd->link != bd); // catch easy loops
651 ASSERT(bd->u.back == prev);
653 ASSERT(bd->u.back == NULL);
657 next = bd + bd->blocks;
658 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
660 ASSERT(next->free != (P_)-1);
668 for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
670 IF_DEBUG(block_alloc,
671 debugBelch("mega group at %p, length %ld blocks\n",
672 bd->start, (long)bd->blocks));
674 ASSERT(bd->link != bd); // catch easy loops
676 if (bd->link != NULL)
678 // make sure the list is sorted
679 ASSERT(bd->start < bd->link->start);
682 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
683 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
686 // make sure we're fully coalesced
687 if (bd->link != NULL)
689 ASSERT (MBLOCK_ROUND_DOWN(bd->link) !=
690 MBLOCK_ROUND_DOWN(bd) +
691 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
700 lnat total_blocks = 0;
703 for (ln=0; ln < MAX_FREE_LIST; ln++) {
704 for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
705 total_blocks += bd->blocks;
708 for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
709 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
710 // The caller of this function, memInventory(), expects to match
711 // the total number of blocks in the system against mblocks *
712 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
713 // block descriptors from *every* mblock.