ghc/rts/MBlock.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-1999
   4  *
   5  * MegaBlock Allocator Interface.  This file contains all the dirty
   6  * architecture-dependent hackery required to get a chunk of aligned
   7  * memory from the operating system.
   8  *
   9  * ---------------------------------------------------------------------------*/
  10
  11 /* This is non-posix compliant. */
  12 /* #include "PosixSource.h" */
  13
  14 #include "Rts.h"
  15 #include "RtsUtils.h"
  16 #include "RtsFlags.h"
  17 #include "MBlock.h"
  18 #include "BlockAlloc.h"
  19
  20 #ifdef HAVE_STDLIB_H
  21 #include <stdlib.h>
  22 #endif
  23 #ifdef HAVE_STRING_H
  24 #include <string.h>
  25 #endif
  26 #ifdef HAVE_UNISTD_H
  27 #include <unistd.h>
  28 #endif
  29 #ifdef HAVE_SYS_TYPES_H
  30 #include <sys/types.h>
  31 #endif
  32 #ifndef mingw32_TARGET_OS
  33 # ifdef HAVE_SYS_MMAN_H
  34 # include <sys/mman.h>
  35 # endif
  36 #endif
  37 #ifdef HAVE_FCNTL_H
  38 #include <fcntl.h>
  39 #endif
  40 #if HAVE_WINDOWS_H
  41 #include <windows.h>
  42 #endif
  43 #if darwin_TARGET_OS
  44 #include <mach/vm_map.h>
  45 #endif
  46
  47 #include <errno.h>
  48
  49 lnat mblocks_allocated = 0;
  50
  51 /* -----------------------------------------------------------------------------
  52    The MBlock Map: provides our implementation of HEAP_ALLOCED()
  53    -------------------------------------------------------------------------- */
  54
  55 #if SIZEOF_VOID_P == 4
  56 StgWord8 mblock_map[MBLOCK_MAP_SIZE]; // initially all zeros
  57 #elif SIZEOF_VOID_P == 8
  58 static MBlockMap dummy_mblock_map;
  59 MBlockMap *mblock_cache = &dummy_mblock_map;
  60 int mblock_map_count = 0;
  61 MBlockMap **mblock_maps = NULL;
  62
  63 static MBlockMap *
  64 findMBlockMap(void *p)
  65 {
  66     int i;
  67     StgWord32 hi = (StgWord32) (((StgWord)p) >> 32);
  68     for( i = 0; i < mblock_map_count; i++ )
  69     {
  70         if(mblock_maps[i]->addrHigh32 == hi)
  71         {
  72             return mblock_maps[i];
  73         }
  74     }
  75     return NULL;
  76 }
  77
  78 StgBool
  79 slowIsHeapAlloced(void *p)
  80 {
  81     MBlockMap *map = findMBlockMap(p);
  82     if(map)
  83     {
  84         mblock_cache = map;
  85         return map->mblocks[MBLOCK_MAP_ENTRY(p)];
  86     }
  87     else
  88         return 0;
  89 }
  90 #endif
  91
  92 static void
  93 markHeapAlloced(void *p)
  94 {
  95 #if SIZEOF_VOID_P == 4
  96     mblock_map[MBLOCK_MAP_ENTRY(p)] = 1;
  97 #elif SIZEOF_VOID_P == 8
  98     MBlockMap *map = findMBlockMap(p);
  99     if(map == NULL)
 100     {
 101         mblock_map_count++;
 102         mblock_maps = realloc(mblock_maps,
 103                               sizeof(MBlockMap*) * mblock_map_count);
 104         map = mblock_maps[mblock_map_count-1] = calloc(1,sizeof(MBlockMap));
 105         map->addrHigh32 = (StgWord32) (((StgWord)p) >> 32);
 106     }
 107     map->mblocks[MBLOCK_MAP_ENTRY(p)] = 1;
 108     mblock_cache = map;
 109 #endif
 110 }
 111
 112 /* -----------------------------------------------------------------------------
 113    Allocate new mblock(s)
 114    -------------------------------------------------------------------------- */
 115
 116 void *
 117 getMBlock(void)
 118 {
 119   return getMBlocks(1);
 120 }
 121
 122 /* -----------------------------------------------------------------------------
 123    The mmap() method
 124
 125    On Unix-like systems, we use mmap() to allocate our memory.  We
 126    want memory in chunks of MBLOCK_SIZE, and aligned on an MBLOCK_SIZE
 127    boundary.  The mmap() interface doesn't give us this level of
 128    control, so we have to use some heuristics.
 129
 130    In the general case, if we want a block of n megablocks, then we
 131    allocate n+1 and trim off the slop from either side (using
 132    munmap()) to get an aligned chunk of size n.  However, the next
 133    time we'll try to allocate directly after the previously allocated
 134    chunk, on the grounds that this is aligned and likely to be free.
 135    If it turns out that we were wrong, we have to munmap() and try
 136    again using the general method.
 137    -------------------------------------------------------------------------- */
 138
 139 #if !defined(mingw32_TARGET_OS) && !defined(cygwin32_TARGET_OS)
 140
 141 // A wrapper around mmap(), to abstract away from OS differences in
 142 // the mmap() interface.
 143
 144 static void *
 145 my_mmap (void *addr, lnat size)
 146 {
 147     void *ret;
 148
 149 #if defined(solaris2_TARGET_OS) || defined(irix_TARGET_OS)
 150     {
 151         int fd = open("/dev/zero",O_RDONLY);
 152         ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 153         close(fd);
 154     }
 155 #elif hpux_TARGET_OS
 156     ret = mmap(addr, size, PROT_READ | PROT_WRITE,
 157                MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 158 #elif darwin_TARGET_OS
 159     // Without MAP_FIXED, Apple's mmap ignores addr.
 160     // With MAP_FIXED, it overwrites already mapped regions, whic
 161     // mmap(0, ... MAP_FIXED ...) is worst of all: It unmaps the program text
 162     // and replaces it with zeroes, causing instant death.
 163     // This behaviour seems to be conformant with IEEE Std 1003.1-2001.
 164     // Let's just use the underlying Mach Microkernel calls directly,
 165     // they're much nicer.
 166
 167     kern_return_t err;
 168     ret = addr;
 169     if(addr)    // try to allocate at adress
 170         err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, FALSE);
 171     if(!addr || err)    // try to allocate anywhere
 172         err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, TRUE);
 173
 174     if(err) // don't know what the error codes mean exactly
 175         barf("memory allocation failed (requested %lu bytes)", size);
 176     else
 177         vm_protect(mach_task_self(),ret,size,FALSE,VM_PROT_READ|VM_PROT_WRITE);
 178 #else
 179     ret = mmap(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC,
 180                MAP_ANON | MAP_PRIVATE, -1, 0);
 181 #endif
 182
 183     if (ret == (void *)-1) {
 184         if (errno == ENOMEM ||
 185             (errno == EINVAL && sizeof(void*)==4 && size >= 0xc0000000)) {
 186             // If we request more than 3Gig, then we get EINVAL
 187             // instead of ENOMEM (at least on Linux).
 188             errorBelch("out of memory (requested %lu bytes)", size);
 189             stg_exit(EXIT_FAILURE);
 190         } else {
 191             barf("getMBlock: mmap: %s", strerror(errno));
 192         }
 193     }
 194
 195     return ret;
 196 }
 197
 198 // Implements the general case: allocate a chunk of memory of 'size'
 199 // mblocks.
 200
 201 static void *
 202 gen_map_mblocks (lnat size)
 203 {
 204     int slop;
 205     void *ret;
 206
 207     // Try to map a larger block, and take the aligned portion from
 208     // it (unmap the rest).
 209     size += MBLOCK_SIZE;
 210     ret = my_mmap(0, size);
 211
 212     // unmap the slop bits around the chunk we allocated
 213     slop = (W_)ret & MBLOCK_MASK;
 214
 215     if (munmap(ret, MBLOCK_SIZE - slop) == -1) {
 216       barf("gen_map_mblocks: munmap failed");
 217     }
 218     if (slop > 0 && munmap(ret+size-slop, slop) == -1) {
 219       barf("gen_map_mblocks: munmap failed");
 220     }
 221
 222     // ToDo: if we happened to get an aligned block, then don't
 223     // unmap the excess, just use it. For this to work, you
 224     // need to keep in mind the following:
 225     //     * Calling my_mmap() with an 'addr' arg pointing to
 226     //       already my_mmap()ed space is OK and won't fail.
 227     //     * If my_mmap() can't satisfy the request at the
 228     //       given 'next_request' address in getMBlocks(), that
 229     //       you unmap the extra mblock mmap()ed here (or simply
 230     //       satisfy yourself that the slop introduced isn't worth
 231     //       salvaging.)
 232     //
 233
 234     // next time, try after the block we just got.
 235     ret += MBLOCK_SIZE - slop;
 236     return ret;
 237 }
 238
 239
 240 // The external interface: allocate 'n' mblocks, and return the
 241 // address.
 242
 243 void *
 244 getMBlocks(nat n)
 245 {
 246   static caddr_t next_request = (caddr_t)HEAP_BASE;
 247   caddr_t ret;
 248   lnat size = MBLOCK_SIZE * n;
 249   nat i;
 250
 251   if (next_request == 0) {
 252       // use gen_map_mblocks the first time.
 253       ret = gen_map_mblocks(size);
 254   } else {
 255       ret = my_mmap(next_request, size);
 256
 257       if (((W_)ret & MBLOCK_MASK) != 0) {
 258           // misaligned block!
 259 #if 0 // defined(DEBUG)
 260           errorBelch("warning: getMBlock: misaligned block %p returned when allocating %d megablock(s) at %p", ret, n, next_request);
 261 #endif
 262
 263           // unmap this block...
 264           if (munmap(ret, size) == -1) {
 265               barf("getMBlock: munmap failed");
 266           }
 267           // and do it the hard way
 268           ret = gen_map_mblocks(size);
 269       }
 270   }
 271
 272   // Next time, we'll try to allocate right after the block we just got.
 273   // ToDo: check that we haven't already grabbed the memory at next_request
 274   next_request = ret + size;
 275
 276   IF_DEBUG(gc,debugBelch("Allocated %d megablock(s) at %p\n",n,ret));
 277
 278   // fill in the table
 279   for (i = 0; i < n; i++) {
 280       markHeapAlloced( ret + i * MBLOCK_SIZE );
 281   }
 282
 283   mblocks_allocated += n;
 284
 285   return ret;
 286 }
 287
 288 #else /* defined(mingw32_TARGET_OS) || defined(cygwin32_TARGET_OS) */
 289
 290 /*
 291  On Win32 platforms we make use of the two-phased virtual memory API
 292  to allocate mega blocks. We proceed as follows:
 293
 294  Reserve a large chunk of VM (256M at the time, or what the user asked
 295  for via the -M option), but don't supply a base address that's aligned on
 296  a MB boundary. Instead we round up to the nearest mblock from the chunk of
 297  VM we're handed back from the OS (at the moment we just leave the 'slop' at
 298  the beginning of the reserved chunk unused - ToDo: reuse it .)
 299
 300  Reserving memory doesn't allocate physical storage (not even in the
 301  page file), this is done later on by committing pages (or mega-blocks in
 302  our case).
 303 */
 304
 305 char* base_non_committed = (char*)0;
 306 char* end_non_committed = (char*)0;
 307
 308 /* Default is to reserve 256M of VM to minimise the slop cost. */
 309 #define SIZE_RESERVED_POOL  ( 256 * 1024 * 1024 )
 310
 311 /* Number of bytes reserved */
 312 static unsigned long size_reserved_pool = SIZE_RESERVED_POOL;
 313
 314 void *
 315 getMBlocks(nat n)
 316 {
 317   static char* base_mblocks       = (char*)0;
 318   static char* next_request       = (char*)0;
 319   void* ret                       = (void*)0;
 320   nat i;
 321
 322   lnat size = MBLOCK_SIZE * n;
 323
 324   if ( (base_non_committed == 0) || (next_request + size > end_non_committed) ) {
 325     if (base_non_committed) {
 326         /* Tacky, but if no user-provided -M option is in effect,
 327          * set it to the default (==256M) in time for the heap overflow PSA.
 328          */
 329         if (RtsFlags.GcFlags.maxHeapSize == 0) {
 330             RtsFlags.GcFlags.maxHeapSize = size_reserved_pool / BLOCK_SIZE;
 331         }
 332         heapOverflow();
 333     }
 334     if (RtsFlags.GcFlags.maxHeapSize != 0) {
 335       size_reserved_pool = BLOCK_SIZE * RtsFlags.GcFlags.maxHeapSize;
 336       if (size_reserved_pool < MBLOCK_SIZE) {
 337         size_reserved_pool = 2*MBLOCK_SIZE;
 338       }
 339     }
 340     base_non_committed = VirtualAlloc ( NULL
 341                                       , size_reserved_pool
 342                                       , MEM_RESERVE
 343                                       , PAGE_READWRITE
 344                                       );
 345     if ( base_non_committed == 0 ) {
 346          errorBelch("getMBlocks: VirtualAlloc failed with: %ld\n", GetLastError());
 347          ret=(void*)-1;
 348     } else {
 349       end_non_committed = (char*)base_non_committed + (unsigned long)size_reserved_pool;
 350       /* The returned pointer is not aligned on a mega-block boundary. Make it. */
 351       base_mblocks = (char*)((unsigned long)base_non_committed & (unsigned long)~MBLOCK_MASK) + MBLOCK_SIZE;
 352 #      if 0
 353        debugBelch("getMBlocks: Dropping %d bytes off of 256M chunk\n",
 354                   (unsigned)base_mblocks - (unsigned)base_non_committed);
 355 #      endif
 356
 357        if ( ((char*)base_mblocks + size) > end_non_committed ) {
 358           debugBelch("getMBlocks: oops, committed too small a region to start with.");
 359           ret=(void*)-1;
 360        } else {
 361           next_request = base_mblocks;
 362        }
 363     }
 364   }
 365   /* Commit the mega block(s) to phys mem */
 366   if ( ret != (void*)-1 ) {
 367      ret = VirtualAlloc(next_request, size, MEM_COMMIT, PAGE_READWRITE);
 368      if (ret == NULL) {
 369         debugBelch("getMBlocks: VirtualAlloc failed with: %ld\n", GetLastError());
 370         ret=(void*)-1;
 371      }
 372   }
 373
 374   if (((W_)ret & MBLOCK_MASK) != 0) {
 375     barf("getMBlocks: misaligned block returned");
 376   }
 377
 378   if (ret == (void*)-1) {
 379      barf("getMBlocks: unknown memory allocation failure on Win32.");
 380   }
 381
 382   IF_DEBUG(gc,debugBelch("Allocated %d megablock(s) at 0x%x\n",n,(nat)ret));
 383   next_request = (char*)next_request + size;
 384
 385   mblocks_allocated += n;
 386
 387   // fill in the table
 388   for (i = 0; i < n; i++) {
 389       markHeapAlloced( ret + i * MBLOCK_SIZE );
 390   }
 391
 392   return ret;
 393 }
 394
 395 /* Hand back the physical memory that is allocated to a mega-block.
 396    ToDo: chain the released mega block onto some list so that
 397          getMBlocks() can get at it.
 398
 399    Currently unused.
 400 */
 401 #if 0
 402 void
 403 freeMBlock(void* p, nat n)
 404 {
 405   BOOL rc;
 406
 407   rc = VirtualFree(p, n * MBLOCK_SIZE , MEM_DECOMMIT );
 408
 409   if (rc == FALSE) {
 410 #    ifdef DEBUG
 411      debugBelch("freeMBlocks: VirtualFree failed with: %d\n", GetLastError());
 412 #    endif
 413   }
 414
 415 }
 416 #endif
 417
 418 #endif