The MBlock Map: provides our implementation of HEAP_ALLOCED()
-------------------------------------------------------------------------- */
-#ifdef MBLOCK_MAP_SIZE
+#if SIZEOF_VOID_P == 4
StgWord8 mblock_map[MBLOCK_MAP_SIZE]; // initially all zeros
+#elif SIZEOF_VOID_P == 8
+static MBlockMap dummy_mblock_map;
+MBlockMap *mblock_cache = &dummy_mblock_map;
+int mblock_map_count = 0;
+MBlockMap **mblock_maps = NULL;
+
+static MBlockMap *
+findMBlockMap(void *p)
+{
+ int i;
+ StgWord32 hi = (StgWord32) (((StgWord)p) >> 32);
+ for( i = 0; i < mblock_map_count; i++ )
+ {
+ if(mblock_maps[i]->addrHigh32 == hi)
+ {
+ return mblock_maps[i];
+ }
+ }
+ return NULL;
+}
+
+StgBool
+slowIsHeapAlloced(void *p)
+{
+ MBlockMap *map = findMBlockMap(p);
+ if(map)
+ {
+ mblock_cache = map;
+ return map->mblocks[MBLOCK_MAP_ENTRY(p)];
+ }
+ else
+ return 0;
+}
#endif
+static void
+markHeapAlloced(void *p)
+{
+#if SIZEOF_VOID_P == 4
+ mblock_map[MBLOCK_MAP_ENTRY(p)] = 1;
+#elif SIZEOF_VOID_P == 8
+ MBlockMap *map = findMBlockMap(p);
+ if(map == NULL)
+ {
+ mblock_map_count++;
+ mblock_maps = realloc(mblock_maps,
+ sizeof(MBlockMap*) * mblock_map_count);
+ map = mblock_maps[mblock_map_count-1] = calloc(1,sizeof(MBlockMap));
+ map->addrHigh32 = (StgWord32) (((StgWord)p) >> 32);
+ }
+ map->mblocks[MBLOCK_MAP_ENTRY(p)] = 1;
+ mblock_cache = map;
+#endif
+}
+
/* -----------------------------------------------------------------------------
Allocate new mblock(s)
-------------------------------------------------------------------------- */
// fill in the table
for (i = 0; i < n; i++) {
- MARK_HEAP_ALLOCED( ret + i * MBLOCK_SIZE );
+ markHeapAlloced( ret + i * MBLOCK_SIZE );
}
mblocks_allocated += n;
// fill in the table
for (i = 0; i < n; i++) {
- MARK_HEAP_ALLOCED ( ret + i * MBLOCK_SIZE );
+ markHeapAlloced( ret + i * MBLOCK_SIZE );
}
return ret;
will be quickly cached (indeed, performance measurements showed no
measurable difference between doing the table lookup and using a
constant comparison).
+
+ On 64-bit machines, we cache one 12-bit block map that describes
+ 4096 megablocks or 4GB of memory. If HEAP_ALLOCED is called for
+ an address that is not in the cache, it calls slowIsHeapAlloced
+ (see MBlock.c) which will find the block map for the 4GB block in
+ question.
-------------------------------------------------------------------------- */
+#if SIZEOF_VOID_P == 4
extern StgWord8 mblock_map[];
-#if SIZEOF_VOID_P == 4
/* On a 32-bit machine a 4KB table is always sufficient */
# define MBLOCK_MAP_SIZE 4096
# define MBLOCK_MAP_ENTRY(p) ((StgWord)(p) >> MBLOCK_SHIFT)
# define HEAP_ALLOCED(p) mblock_map[MBLOCK_MAP_ENTRY(p)]
-# define MARK_HEAP_ALLOCED(p) (mblock_map[MBLOCK_MAP_ENTRY(p)] = 1)
-
-#elif defined(ia64_TARGET_ARCH)
-/* Instead of trying to cover the whole 64-bit address space (which would
- * require a better data structure), we assume that mmap allocates mappings
- * from the bottom of region 1, and track some portion of address space from
- * there upwards (currently 4GB). */
-# define MBLOCK_MAP_SIZE 4096
-# define MBLOCK_MAP_ENTRY(p) (((StgWord)(p) - (1UL << 61)) >> MBLOCK_SHIFT)
-# define HEAP_ALLOCED(p) ((MBLOCK_MAP_ENTRY(p) < MBLOCK_MAP_SIZE) \
- && mblock_map[MBLOCK_MAP_ENTRY(p)])
-# define MARK_HEAP_ALLOCED(p) ((MBLOCK_MAP_ENTRY(p) < MBLOCK_MAP_SIZE) \
- && (mblock_map[MBLOCK_MAP_ENTRY(p)] = 1))
#elif SIZEOF_VOID_P == 8
-/* XXX: This is a HACK, and will not work in general! We just use the
- * lower 32 bits of the address, and do the same as for the 32-bit
- * version. As long as the OS gives us memory in a roughly linear
- * fashion, it won't go wrong until we've allocated 4G. */
+
# define MBLOCK_MAP_SIZE 4096
# define MBLOCK_MAP_ENTRY(p) (((StgWord)(p) & 0xffffffff) >> MBLOCK_SHIFT)
-# define HEAP_ALLOCED(p) (mblock_map[MBLOCK_MAP_ENTRY(p)])
-# define MARK_HEAP_ALLOCED(p) (mblock_map[MBLOCK_MAP_ENTRY(p)] = 1)
+typedef struct {
+ StgWord32 addrHigh32;
+ StgWord8 mblocks[MBLOCK_MAP_SIZE];
+} MBlockMap;
+
+extern MBlockMap *mblock_cache;
+
+StgBool slowIsHeapAlloced(void *p);
+
+# define HEAP_ALLOCED(p) \
+ ( ((((StgWord)(p)) >> 32) == mblock_cache->addrHigh32) \
+ ? mblock_cache->mblocks[MBLOCK_MAP_ENTRY(p)] \
+ : slowIsHeapAlloced(p) )
#else
# error HEAP_ALLOCED not defined