Remove the unused remains of __decodeFloat
[ghc-hetmet.git] / rts / PrimOps.cmm
index 40948a3..521a55e 100644 (file)
@@ -83,32 +83,65 @@ newByteArrayzh_fast
     RET_P(p);
 }
 
+#define BA_ALIGN 16
+#define BA_MASK  (BA_ALIGN-1)
+
 newPinnedByteArrayzh_fast
 {
-    W_ words, payload_words, n, p;
+    W_ words, bytes, payload_words, p;
 
     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
-    n = R1;
-    payload_words = ROUNDUP_BYTES_TO_WDS(n);
+    bytes = R1;
+    /* payload_words is what we will tell the profiler we had to allocate */
+    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
+    /* When we actually allocate memory, we need to allow space for the
+       header: */
+    bytes = bytes + SIZEOF_StgArrWords;
+    /* And we want to align to BA_ALIGN bytes, so we need to allow space
+       to shift up to BA_ALIGN - 1 bytes: */
+    bytes = bytes + BA_ALIGN - 1;
+    /* Now we convert to a number of words: */
+    words = ROUNDUP_BYTES_TO_WDS(bytes);
 
-    // We want an 8-byte aligned array.  allocatePinned() gives us
-    // 8-byte aligned memory by default, but we want to align the
-    // *goods* inside the ArrWords object, so we have to check the
-    // size of the ArrWords header and adjust our size accordingly.
-    words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
-    if ((SIZEOF_StgArrWords & 7) != 0) {
-       words = words + 1;
-    }
+    ("ptr" p) = foreign "C" allocatePinned(words) [];
+    TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
+
+    /* Now we need to move p forward so that the payload is aligned
+       to BA_ALIGN bytes: */
+    p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
+
+    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+    StgArrWords_words(p) = payload_words;
+    RET_P(p);
+}
+
+newAlignedPinnedByteArrayzh_fast
+{
+    W_ words, bytes, payload_words, p, alignment;
+
+    MAYBE_GC(NO_PTRS,newAlignedPinnedByteArrayzh_fast);
+    bytes = R1;
+    alignment = R2;
+
+    /* payload_words is what we will tell the profiler we had to allocate */
+    payload_words = ROUNDUP_BYTES_TO_WDS(bytes);
+
+    /* When we actually allocate memory, we need to allow space for the
+       header: */
+    bytes = bytes + SIZEOF_StgArrWords;
+    /* And we want to align to <alignment> bytes, so we need to allow space
+       to shift up to <alignment - 1> bytes: */
+    bytes = bytes + alignment - 1;
+    /* Now we convert to a number of words: */
+    words = ROUNDUP_BYTES_TO_WDS(bytes);
 
     ("ptr" p) = foreign "C" allocatePinned(words) [];
     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
 
-    // Again, if the ArrWords header isn't a multiple of 8 bytes, we
-    // have to push the object forward one word so that the goods
-    // fall on an 8-byte boundary.
-    if ((SIZEOF_StgArrWords & 7) != 0) {
-       p = p + WDS(1);
-    }
+    /* Now we need to move p forward so that the payload is aligned
+       to <alignment> bytes. Note that we are assuming that
+       <alignment> is a power of 2, which is technically not guaranteed */
+    p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
 
     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
     StgArrWords_words(p) = payload_words;
@@ -334,7 +367,7 @@ mkWeakForeignEnvzh_fast
   flag = R5;
   eptr = R6;
 
-  ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakForeignEnvzh_fast );
+  ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, mkWeakForeignEnvzh_fast );
 
   w = Hp - SIZEOF_StgWeak + WDS(1);
   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
@@ -921,32 +954,6 @@ integer2Wordzh_fast
   jump %ENTRY_CODE(Sp(0));
 }
 
-decodeFloatzh_fast
-{ 
-    W_ p;
-    F_ arg;
-    FETCH_MP_TEMP(mp_tmp1);
-    FETCH_MP_TEMP(mp_tmp_w);
-    
-    /* arguments: F1 = Float# */
-    arg = F1;
-    
-    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
-    
-    /* Be prepared to tell Lennart-coded __decodeFloat
-       where mantissa._mp_d can be put (it does not care about the rest) */
-    p = Hp - SIZEOF_StgArrWords;
-    SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
-    StgArrWords_words(p) = 1;
-    MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
-    
-    /* Perform the operation */
-    foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
-    
-    /* returns: (Int# (expn), Int#, ByteArray#) */
-    RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
-}
-
 decodeFloatzuIntzh_fast
 { 
     W_ p;
@@ -1040,7 +1047,8 @@ forkzh_fast
 
   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
 
-  // switch at the earliest opportunity
+  // context switch soon, but not immediately: we don't want every
+  // forkIO to force a context-switch.
   Capability_context_switch(MyCapability()) = 1 :: CInt;
   
   RET_P(threadid);
@@ -1069,7 +1077,8 @@ forkOnzh_fast
 
   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
 
-  // switch at the earliest opportunity
+  // context switch soon, but not immediately: we don't want every
+  // forkIO to force a context-switch.
   Capability_context_switch(MyCapability()) = 1 :: CInt;
   
   RET_P(threadid);
@@ -1651,8 +1660,11 @@ takeMVarzh_fast
                                    CurrentTSO) [];
        }
        StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
-       StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
        StgTSO_block_info(CurrentTSO)  = mvar;
+        // write barrier for throwTo(), which looks at block_info
+        // if why_blocked==BlockedOnMVar.
+        prim %write_barrier() [];
+       StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
        StgMVar_tail(mvar) = CurrentTSO;
        
         R1 = mvar;
@@ -1808,8 +1820,11 @@ putMVarzh_fast
                                    CurrentTSO) [];
        }
        StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
-       StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
        StgTSO_block_info(CurrentTSO)  = mvar;
+        // write barrier for throwTo(), which looks at block_info
+        // if why_blocked==BlockedOnMVar.
+        prim %write_barrier() [];
+       StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
        StgMVar_tail(mvar) = CurrentTSO;
        
         R1 = mvar;
@@ -2356,6 +2371,26 @@ getApStackValzh_fast
    RET_NP(ok,val);
 }
 
+/* -----------------------------------------------------------------------------
+   Misc. primitives
+   -------------------------------------------------------------------------- */
+
+// Write the cost center stack of the first argument on stderr; return
+// the second.  Possibly only makes sense for already evaluated
+// things?
+traceCcszh_fast
+{
+    W_ ccs;
+
+#ifdef PROFILING
+    ccs = StgHeader_ccs(UNTAG(R1));
+    foreign "C" fprintCCS_stderr(ccs "ptr") [R2];
+#endif
+
+    R1 = R2;
+    ENTER();
+}
+
 getSparkzh_fast
 {
    W_ spark;