1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team 1998-1999
5 * MegaBlock Allocator Interface. This file contains all the dirty
6 * architecture-dependent hackery required to get a chunk of aligned
7 * memory from the operating system.
9 * ---------------------------------------------------------------------------*/
11 /* This is non-posix compliant. */
12 /* #include "PosixSource.h" */
18 #include "BlockAlloc.h"
30 #ifdef HAVE_SYS_TYPES_H
31 #include <sys/types.h>
33 #ifndef mingw32_HOST_OS
34 # ifdef HAVE_SYS_MMAN_H
35 # include <sys/mman.h>
45 #include <mach/vm_map.h>
50 lnat mblocks_allocated = 0;
52 #if !defined(mingw32_HOST_OS) && !defined(cygwin32_HOST_OS)
53 static caddr_t next_request = 0;
59 #if !defined(mingw32_HOST_OS) && !defined(cygwin32_HOST_OS)
60 next_request = (caddr_t)RtsFlags.GcFlags.heapBase;
64 /* -----------------------------------------------------------------------------
65 The MBlock Map: provides our implementation of HEAP_ALLOCED()
66 -------------------------------------------------------------------------- */
68 #if SIZEOF_VOID_P == 4
69 StgWord8 mblock_map[MBLOCK_MAP_SIZE]; // initially all zeros
70 #elif SIZEOF_VOID_P == 8
71 static MBlockMap dummy_mblock_map;
72 MBlockMap *mblock_cache = &dummy_mblock_map;
73 int mblock_map_count = 0;
74 MBlockMap **mblock_maps = NULL;
77 findMBlockMap(void *p)
80 StgWord32 hi = (StgWord32) (((StgWord)p) >> 32);
81 for( i = 0; i < mblock_map_count; i++ )
83 if(mblock_maps[i]->addrHigh32 == hi)
85 return mblock_maps[i];
92 slowIsHeapAlloced(void *p)
94 MBlockMap *map = findMBlockMap(p);
98 return map->mblocks[MBLOCK_MAP_ENTRY(p)];
106 markHeapAlloced(void *p)
108 #if SIZEOF_VOID_P == 4
109 mblock_map[MBLOCK_MAP_ENTRY(p)] = 1;
110 #elif SIZEOF_VOID_P == 8
111 MBlockMap *map = findMBlockMap(p);
115 mblock_maps = realloc(mblock_maps,
116 sizeof(MBlockMap*) * mblock_map_count);
117 map = mblock_maps[mblock_map_count-1] = calloc(1,sizeof(MBlockMap));
118 map->addrHigh32 = (StgWord32) (((StgWord)p) >> 32);
120 map->mblocks[MBLOCK_MAP_ENTRY(p)] = 1;
125 /* -----------------------------------------------------------------------------
126 Allocate new mblock(s)
127 -------------------------------------------------------------------------- */
132 return getMBlocks(1);
135 /* -----------------------------------------------------------------------------
138 On Unix-like systems, we use mmap() to allocate our memory. We
139 want memory in chunks of MBLOCK_SIZE, and aligned on an MBLOCK_SIZE
140 boundary. The mmap() interface doesn't give us this level of
141 control, so we have to use some heuristics.
143 In the general case, if we want a block of n megablocks, then we
144 allocate n+1 and trim off the slop from either side (using
145 munmap()) to get an aligned chunk of size n. However, the next
146 time we'll try to allocate directly after the previously allocated
147 chunk, on the grounds that this is aligned and likely to be free.
148 If it turns out that we were wrong, we have to munmap() and try
149 again using the general method.
151 Note on posix_memalign(): this interface is available on recent
152 systems and appears to provide exactly what we want. However, it
153 turns out not to be as good as our mmap() implementation, because
154 it wastes extra space (using double the address space, in a test on
155 x86_64/Linux). The problem seems to be that posix_memalign()
156 returns memory that can be free()'d, so the library must store
157 extra information along with the allocated block, thus messing up
158 the alignment. Hence, we don't use posix_memalign() for now.
160 -------------------------------------------------------------------------- */
162 #if !defined(mingw32_HOST_OS) && !defined(cygwin32_HOST_OS)
164 // A wrapper around mmap(), to abstract away from OS differences in
165 // the mmap() interface.
168 my_mmap (void *addr, lnat size)
172 #if defined(solaris2_HOST_OS) || defined(irix_HOST_OS)
174 int fd = open("/dev/zero",O_RDONLY);
175 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
179 ret = mmap(addr, size, PROT_READ | PROT_WRITE,
180 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
182 // Without MAP_FIXED, Apple's mmap ignores addr.
183 // With MAP_FIXED, it overwrites already mapped regions, whic
184 // mmap(0, ... MAP_FIXED ...) is worst of all: It unmaps the program text
185 // and replaces it with zeroes, causing instant death.
186 // This behaviour seems to be conformant with IEEE Std 1003.1-2001.
187 // Let's just use the underlying Mach Microkernel calls directly,
188 // they're much nicer.
192 if(addr) // try to allocate at adress
193 err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, FALSE);
194 if(!addr || err) // try to allocate anywhere
195 err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, TRUE);
198 // don't know what the error codes mean exactly, assume it's
199 // not our problem though.
200 errorBelch("memory allocation failed (requested %lu bytes)", size);
201 stg_exit(EXIT_FAILURE);
203 vm_protect(mach_task_self(),ret,size,FALSE,VM_PROT_READ|VM_PROT_WRITE);
206 ret = mmap(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC,
207 MAP_ANON | MAP_PRIVATE, -1, 0);
210 if (ret == (void *)-1) {
211 if (errno == ENOMEM ||
212 (errno == EINVAL && sizeof(void*)==4 && size >= 0xc0000000)) {
213 // If we request more than 3Gig, then we get EINVAL
214 // instead of ENOMEM (at least on Linux).
215 errorBelch("out of memory (requested %lu bytes)", size);
216 stg_exit(EXIT_FAILURE);
218 barf("getMBlock: mmap: %s", strerror(errno));
225 // Implements the general case: allocate a chunk of memory of 'size'
229 gen_map_mblocks (lnat size)
234 // Try to map a larger block, and take the aligned portion from
235 // it (unmap the rest).
237 ret = my_mmap(0, size);
239 // unmap the slop bits around the chunk we allocated
240 slop = (W_)ret & MBLOCK_MASK;
242 if (munmap(ret, MBLOCK_SIZE - slop) == -1) {
243 barf("gen_map_mblocks: munmap failed");
245 if (slop > 0 && munmap(ret+size-slop, slop) == -1) {
246 barf("gen_map_mblocks: munmap failed");
249 // ToDo: if we happened to get an aligned block, then don't
250 // unmap the excess, just use it. For this to work, you
251 // need to keep in mind the following:
252 // * Calling my_mmap() with an 'addr' arg pointing to
253 // already my_mmap()ed space is OK and won't fail.
254 // * If my_mmap() can't satisfy the request at the
255 // given 'next_request' address in getMBlocks(), that
256 // you unmap the extra mblock mmap()ed here (or simply
257 // satisfy yourself that the slop introduced isn't worth
261 // next time, try after the block we just got.
262 ret += MBLOCK_SIZE - slop;
267 // The external interface: allocate 'n' mblocks, and return the
274 lnat size = MBLOCK_SIZE * n;
277 if (next_request == 0) {
278 // use gen_map_mblocks the first time.
279 ret = gen_map_mblocks(size);
281 ret = my_mmap(next_request, size);
283 if (((W_)ret & MBLOCK_MASK) != 0) {
285 #if 0 // defined(DEBUG)
286 errorBelch("warning: getMBlock: misaligned block %p returned when allocating %d megablock(s) at %p", ret, n, next_request);
289 // unmap this block...
290 if (munmap(ret, size) == -1) {
291 barf("getMBlock: munmap failed");
293 // and do it the hard way
294 ret = gen_map_mblocks(size);
298 // Next time, we'll try to allocate right after the block we just got.
299 // ToDo: check that we haven't already grabbed the memory at next_request
300 next_request = ret + size;
302 debugTrace(DEBUG_gc, "allocated %d megablock(s) at %p",n,ret);
305 for (i = 0; i < n; i++) {
306 markHeapAlloced( ret + i * MBLOCK_SIZE );
309 mblocks_allocated += n;
317 /* XXX Do something here */
320 #else /* defined(mingw32_HOST_OS) || defined(cygwin32_HOST_OS) */
322 /* alloc_rec keeps the info we need to have matching VirtualAlloc and
325 typedef struct alloc_rec_ {
326 char* base; /* non-aligned base address, directly from VirtualAlloc */
327 int size; /* Size in bytes */
328 struct alloc_rec_* next;
331 typedef struct block_rec_ {
332 char* base; /* base address, non-MBLOCK-aligned */
333 int size; /* size in bytes */
334 struct block_rec_* next;
337 static alloc_rec* allocs = 0;
338 static block_rec* free_blocks = 0;
344 rec = (alloc_rec*)stgMallocBytes(sizeof(alloc_rec),"getMBlocks: allocNew");
345 rec->size = (n+1)*MBLOCK_SIZE;
347 VirtualAlloc(NULL, rec->size, MEM_RESERVE, PAGE_READWRITE);
351 if (GetLastError() == ERROR_NOT_ENOUGH_MEMORY) {
353 errorBelch("out of memory");
356 "getMBlocks: VirtualAlloc MEM_RESERVE %d blocks failed", n);
360 temp.base=0; temp.size=0; temp.next=allocs;
364 for(; it->next!=0 && it->next->base<rec->base; it=it->next) ;
369 debugTrace(DEBUG_gc, "allocated %d megablock(s) at 0x%x",n,(nat)rec->base);
376 insertFree(char* alloc_base, int alloc_size) {
381 temp.base=0; temp.size=0; temp.next=free_blocks;
384 for( ; it!=0 && it->base<alloc_base; prev=it, it=it->next) {}
386 if(it!=0 && alloc_base+alloc_size == it->base) {
387 if(prev->base + prev->size == alloc_base) { /* Merge it, alloc, prev */
388 prev->size += alloc_size + it->size;
389 prev->next = it->next;
391 } else { /* Merge it, alloc */
392 it->base = alloc_base;
393 it->size += alloc_size;
395 } else if(prev->base + prev->size == alloc_base) { /* Merge alloc, prev */
396 prev->size += alloc_size;
397 } else { /* Merge none */
399 rec = (block_rec*)stgMallocBytes(sizeof(block_rec),"getMBlocks: insertFree");
400 rec->base=alloc_base;
401 rec->size=alloc_size;
405 free_blocks=temp.next;
410 findFreeBlocks(nat n) {
418 required_size = n*MBLOCK_SIZE;
419 temp.next=free_blocks; temp.base=0; temp.size=0;
421 /* TODO: Don't just take first block, find smallest sufficient block */
422 for( ; it!=0 && it->size<required_size; prev=it, it=it->next ) {}
424 if( (((unsigned long)it->base) & MBLOCK_MASK) == 0) { /* MBlock aligned */
425 ret = (void*)it->base;
426 if(it->size==required_size) {
430 it->base += required_size;
431 it->size -=required_size;
437 need_base = (char*)(((unsigned long)it->base) & ((unsigned long)~MBLOCK_MASK)) + MBLOCK_SIZE;
438 next = (block_rec*)stgMallocBytes(
440 , "getMBlocks: findFreeBlocks: splitting");
441 new_size = need_base - it->base;
442 next->base = need_base +required_size;
443 next->size = it->size - (new_size+required_size);
445 next->next = it->next;
447 ret=(void*)need_base;
450 free_blocks=temp.next;
454 /* VirtualAlloc MEM_COMMIT can't cross boundaries of VirtualAlloc MEM_RESERVE,
455 so we might need to do many VirtualAlloc MEM_COMMITs. We simply walk the
456 (ordered) allocated blocks. */
458 commitBlocks(char* base, int size) {
461 for( ; it!=0 && (it->base+it->size)<=base; it=it->next ) {}
462 for( ; it!=0 && size>0; it=it->next ) {
465 size_delta = it->size - (base-it->base);
466 if(size_delta>size) size_delta=size;
467 temp = VirtualAlloc(base, size_delta, MEM_COMMIT, PAGE_READWRITE);
469 sysErrorBelch("getMBlocks: VirtualAlloc MEM_COMMIT failed");
470 stg_exit(EXIT_FAILURE);
480 ret = findFreeBlocks(n);
484 /* We already belch in allocNew if it fails */
486 stg_exit(EXIT_FAILURE);
488 insertFree(alloc->base, alloc->size);
489 ret = findFreeBlocks(n);
494 /* (In)sanity tests */
495 if (((W_)ret & MBLOCK_MASK) != 0) {
496 barf("getMBlocks: misaligned block returned");
499 commitBlocks(ret, MBLOCK_SIZE*n);
501 /* Global bookkeeping */
502 mblocks_allocated += n;
504 for(i=0; i<(int)n; ++i) {
505 markHeapAlloced( ret + i * MBLOCK_SIZE );
532 if(!VirtualFree((void*)it->base, 0, MEM_RELEASE)) {
533 sysErrorBelch("freeAllMBlocks: VirtualFree MEM_RELEASE failed");
534 stg_exit(EXIT_FAILURE);