+extern lnat mpc_misses;
+
+StgBool HEAP_ALLOCED_miss(StgWord mblock, void *p);
+
+INLINE_HEADER
+StgBool HEAP_ALLOCED(void *p)
+{
+ StgWord mblock;
+ nat entry_no;
+ MbcCacheLine entry, value;
+
+ mblock = (StgWord)p >> MBLOCK_SHIFT;
+ entry_no = mblock & (MBC_ENTRIES-1);
+ entry = mblock_cache[entry_no];
+ value = entry ^ (mblock << 1);
+ // this formulation coaxes gcc into prioritising the value==1
+ // case, which we expect to be the most common.
+ // __builtin_expect() didn't have any useful effect (gcc-4.3.0).
+ if (value == 1) {
+ return 1;
+ } else if (value == 0) {
+ return 0;
+ } else {
+ // putting the rest out of line turned out to be a slight
+ // performance improvement:
+ return HEAP_ALLOCED_miss(mblock,p);
+ }
+}
+
+// In the parallel GC, the cache itself is safe to *read*, and can be
+// updated atomically, but we need to place a lock around operations
+// that touch the MBlock map.
+INLINE_HEADER
+StgBool HEAP_ALLOCED_GC(void *p)
+{
+ StgWord mblock;
+ nat entry_no;
+ MbcCacheLine entry, value;
+ StgBool b;
+
+ mblock = (StgWord)p >> MBLOCK_SHIFT;
+ entry_no = mblock & (MBC_ENTRIES-1);
+ entry = mblock_cache[entry_no];
+ value = entry ^ (mblock << 1);
+ if (value == 1) {
+ return 1;
+ } else if (value == 0) {
+ return 0;
+ } else {
+ // putting the rest out of line turned out to be a slight
+ // performance improvement:
+ ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ b = HEAP_ALLOCED_miss(mblock,p);
+ RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ return b;
+ }
+}