Add a proper write barrier for MVars
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 import __gmpz_init;
32 import __gmpz_add;
33 import __gmpz_sub;
34 import __gmpz_mul;
35 import __gmpz_gcd;
36 import __gmpn_gcd_1;
37 import __gmpn_cmp;
38 import __gmpz_tdiv_q;
39 import __gmpz_tdiv_r;
40 import __gmpz_tdiv_qr;
41 import __gmpz_fdiv_qr;
42 import __gmpz_divexact;
43 import __gmpz_and;
44 import __gmpz_xor;
45 import __gmpz_ior;
46 import __gmpz_com;
47 import base_GHCziIOBase_NestedAtomically_closure;
48 import pthread_mutex_lock;
49 import pthread_mutex_unlock;
50 #endif
51 import EnterCriticalSection;
52 import LeaveCriticalSection;
53
54 /*-----------------------------------------------------------------------------
55   Array Primitives
56
57   Basically just new*Array - the others are all inline macros.
58
59   The size arg is always passed in R1, and the result returned in R1.
60
61   The slow entry point is for returning from a heap check, the saved
62   size argument must be re-loaded from the stack.
63   -------------------------------------------------------------------------- */
64
65 /* for objects that are *less* than the size of a word, make sure we
66  * round up to the nearest word for the size of the array.
67  */
68
69 newByteArrayzh_fast
70 {
71     W_ words, payload_words, n, p;
72     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
73     n = R1;
74     payload_words = ROUNDUP_BYTES_TO_WDS(n);
75     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
76     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
79     StgArrWords_words(p) = payload_words;
80     RET_P(p);
81 }
82
83 newPinnedByteArrayzh_fast
84 {
85     W_ words, payload_words, n, p;
86
87     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
88     n = R1;
89     payload_words = ROUNDUP_BYTES_TO_WDS(n);
90
91     // We want an 8-byte aligned array.  allocatePinned() gives us
92     // 8-byte aligned memory by default, but we want to align the
93     // *goods* inside the ArrWords object, so we have to check the
94     // size of the ArrWords header and adjust our size accordingly.
95     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
96     if ((SIZEOF_StgArrWords & 7) != 0) {
97         words = words + 1;
98     }
99
100     ("ptr" p) = foreign "C" allocatePinned(words) [];
101     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
102
103     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
104     // have to push the object forward one word so that the goods
105     // fall on an 8-byte boundary.
106     if ((SIZEOF_StgArrWords & 7) != 0) {
107         p = p + WDS(1);
108     }
109
110     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
111     StgArrWords_words(p) = payload_words;
112     RET_P(p);
113 }
114
115 newArrayzh_fast
116 {
117     W_ words, n, init, arr, p;
118     /* Args: R1 = words, R2 = initialisation value */
119
120     n = R1;
121     MAYBE_GC(R2_PTR,newArrayzh_fast);
122
123     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
124     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
125     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
126
127     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
128     StgMutArrPtrs_ptrs(arr) = n;
129
130     // Initialise all elements of the the array with the value in R2
131     init = R2;
132     p = arr + SIZEOF_StgMutArrPtrs;
133   for:
134     if (p < arr + WDS(words)) {
135         W_[p] = init;
136         p = p + WDS(1);
137         goto for;
138     }
139
140     RET_P(arr);
141 }
142
143 unsafeThawArrayzh_fast
144 {
145   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
146   //
147   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
148   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
149   // it on the mutable list for the GC to remove (removing something from
150   // the mutable list is not easy, because the mut_list is only singly-linked).
151   // 
152   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
153   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
154   // to indicate that it is still on the mutable list.
155   //
156   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
157   // either it is on a mut_list, or it isn't.  We adopt the convention that
158   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
159   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
160   // we put it on the mutable list more than once, but it would get scavenged
161   // multiple times during GC, which would be unnecessarily slow.
162   //
163   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
164         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
165         recordMutable(R1, R1);
166         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
167         RET_P(R1);
168   } else {
169         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
170         RET_P(R1);
171   }
172 }
173
174 /* -----------------------------------------------------------------------------
175    MutVar primitives
176    -------------------------------------------------------------------------- */
177
178 newMutVarzh_fast
179 {
180     W_ mv;
181     /* Args: R1 = initialisation value */
182
183     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
184
185     mv = Hp - SIZEOF_StgMutVar + WDS(1);
186     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
187     StgMutVar_var(mv) = R1;
188     
189     RET_P(mv);
190 }
191
192 atomicModifyMutVarzh_fast
193 {
194     W_ mv, z, x, y, r;
195     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
196
197     /* If x is the current contents of the MutVar#, then 
198        We want to make the new contents point to
199
200          (sel_0 (f x))
201  
202        and the return value is
203          
204          (sel_1 (f x))
205
206         obviously we can share (f x).
207
208          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
209          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
210          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
211     */
212
213 #if MIN_UPD_SIZE > 1
214 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
215 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
216 #else
217 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
218 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
219 #endif
220
221 #if MIN_UPD_SIZE > 2
222 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
223 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
224 #else
225 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
226 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
227 #endif
228
229 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
230
231    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
232
233 #if defined(THREADED_RTS)
234     ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
235 #endif
236
237    x = StgMutVar_var(R1);
238
239    TICK_ALLOC_THUNK_2();
240    CCCS_ALLOC(THUNK_2_SIZE);
241    z = Hp - THUNK_2_SIZE + WDS(1);
242    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
243    LDV_RECORD_CREATE(z);
244    StgThunk_payload(z,0) = R2;
245    StgThunk_payload(z,1) = x;
246
247    TICK_ALLOC_THUNK_1();
248    CCCS_ALLOC(THUNK_1_SIZE);
249    y = z - THUNK_1_SIZE;
250    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
251    LDV_RECORD_CREATE(y);
252    StgThunk_payload(y,0) = z;
253
254    StgMutVar_var(R1) = y;
255    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
256
257    TICK_ALLOC_THUNK_1();
258    CCCS_ALLOC(THUNK_1_SIZE);
259    r = y - THUNK_1_SIZE;
260    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
261    LDV_RECORD_CREATE(r);
262    StgThunk_payload(r,0) = z;
263
264 #if defined(THREADED_RTS)
265     RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
266 #endif
267
268    RET_P(r);
269 }
270
271 /* -----------------------------------------------------------------------------
272    Weak Pointer Primitives
273    -------------------------------------------------------------------------- */
274
275 STRING(stg_weak_msg,"New weak pointer at %p\n")
276
277 mkWeakzh_fast
278 {
279   /* R1 = key
280      R2 = value
281      R3 = finalizer (or NULL)
282   */
283   W_ w;
284
285   if (R3 == NULL) {
286     R3 = stg_NO_FINALIZER_closure;
287   }
288
289   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
290
291   w = Hp - SIZEOF_StgWeak + WDS(1);
292   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
293
294   StgWeak_key(w)       = R1;
295   StgWeak_value(w)     = R2;
296   StgWeak_finalizer(w) = R3;
297
298   StgWeak_link(w)       = W_[weak_ptr_list];
299   W_[weak_ptr_list]     = w;
300
301   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
302
303   RET_P(w);
304 }
305
306
307 finalizzeWeakzh_fast
308 {
309   /* R1 = weak ptr
310    */
311   W_ w, f;
312
313   w = R1;
314
315   // already dead?
316   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
317       RET_NP(0,stg_NO_FINALIZER_closure);
318   }
319
320   // kill it
321 #ifdef PROFILING
322   // @LDV profiling
323   // A weak pointer is inherently used, so we do not need to call
324   // LDV_recordDead_FILL_SLOP_DYNAMIC():
325   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
326   // or, LDV_recordDead():
327   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
328   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
329   // large as weak pointers, so there is no need to fill the slop, either.
330   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
331 #endif
332
333   //
334   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
335   //
336   SET_INFO(w,stg_DEAD_WEAK_info);
337   LDV_RECORD_CREATE(w);
338
339   f = StgWeak_finalizer(w);
340   StgDeadWeak_link(w) = StgWeak_link(w);
341
342   /* return the finalizer */
343   if (f == stg_NO_FINALIZER_closure) {
344       RET_NP(0,stg_NO_FINALIZER_closure);
345   } else {
346       RET_NP(1,f);
347   }
348 }
349
350 deRefWeakzh_fast
351 {
352   /* R1 = weak ptr */
353   W_ w, code, val;
354
355   w = R1;
356   if (GET_INFO(w) == stg_WEAK_info) {
357     code = 1;
358     val = StgWeak_value(w);
359   } else {
360     code = 0;
361     val = w;
362   }
363   RET_NP(code,val);
364 }
365
366 /* -----------------------------------------------------------------------------
367    Arbitrary-precision Integer operations.
368
369    There are some assumptions in this code that mp_limb_t == W_.  This is
370    the case for all the platforms that GHC supports, currently.
371    -------------------------------------------------------------------------- */
372
373 int2Integerzh_fast
374 {
375    /* arguments: R1 = Int# */
376
377    W_ val, s, p;        /* to avoid aliasing */
378
379    val = R1;
380    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
381
382    p = Hp - SIZEOF_StgArrWords;
383    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
384    StgArrWords_words(p) = 1;
385
386    /* mpz_set_si is inlined here, makes things simpler */
387    if (%lt(val,0)) { 
388         s  = -1;
389         Hp(0) = -val;
390    } else { 
391      if (%gt(val,0)) {
392         s = 1;
393         Hp(0) = val;
394      } else {
395         s = 0;
396      }
397   }
398
399    /* returns (# size  :: Int#, 
400                  data  :: ByteArray# 
401                #)
402    */
403    RET_NP(s,p);
404 }
405
406 word2Integerzh_fast
407 {
408    /* arguments: R1 = Word# */
409
410    W_ val, s, p;        /* to avoid aliasing */
411
412    val = R1;
413
414    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
415
416    p = Hp - SIZEOF_StgArrWords;
417    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
418    StgArrWords_words(p) = 1;
419
420    if (val != 0) {
421         s = 1;
422         W_[Hp] = val;
423    } else {
424         s = 0;
425    }
426
427    /* returns (# size  :: Int#, 
428                  data  :: ByteArray# #)
429    */
430    RET_NP(s,p);
431 }
432
433
434 /*
435  * 'long long' primops for converting to/from Integers.
436  */
437
438 #ifdef SUPPORT_LONG_LONGS
439
440 int64ToIntegerzh_fast
441 {
442    /* arguments: L1 = Int64# */
443
444    L_ val;
445    W_ hi, lo, s, neg, words_needed, p;
446
447    val = L1;
448    neg = 0;
449
450    hi = TO_W_(val >> 32);
451    lo = TO_W_(val);
452
453    if ( hi != 0 && hi != 0xFFFFFFFF )  { 
454        words_needed = 2;
455    } else { 
456        // minimum is one word
457        words_needed = 1;
458    }
459
460    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
461                NO_PTRS, int64ToIntegerzh_fast );
462
463    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
464    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
465    StgArrWords_words(p) = words_needed;
466
467    if ( %lt(hi,0) ) {
468      neg = 1;
469      lo = -lo;
470      if(lo == 0) {
471        hi = -hi;
472      } else {
473        hi = -hi - 1;
474      }
475    }
476
477    if ( words_needed == 2 )  { 
478       s = 2;
479       Hp(-1) = lo;
480       Hp(0) = hi;
481    } else { 
482        if ( lo != 0 ) {
483            s = 1;
484            Hp(0) = lo;
485        } else /* val==0 */  {
486            s = 0;
487        }
488    }
489    if ( neg != 0 ) {
490         s = -s;
491    }
492
493    /* returns (# size  :: Int#, 
494                  data  :: ByteArray# #)
495    */
496    RET_NP(s,p);
497 }
498 word64ToIntegerzh_fast
499 {
500    /* arguments: L1 = Word64# */
501
502    L_ val;
503    W_ hi, lo, s, words_needed, p;
504
505    val = L1;
506    hi = TO_W_(val >> 32);
507    lo = TO_W_(val);
508
509    if ( hi != 0 ) {
510       words_needed = 2;
511    } else {
512       words_needed = 1;
513    }
514
515    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
516                NO_PTRS, word64ToIntegerzh_fast );
517
518    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
519    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
520    StgArrWords_words(p) = words_needed;
521
522    if ( hi != 0 ) { 
523      s = 2;
524      Hp(-1) = lo;
525      Hp(0)  = hi;
526    } else {
527       if ( lo != 0 ) {
528         s = 1;
529         Hp(0) = lo;
530      } else /* val==0 */  {
531       s = 0;
532      }
533   }
534
535    /* returns (# size  :: Int#, 
536                  data  :: ByteArray# #)
537    */
538    RET_NP(s,p);
539 }
540
541
542
543 #endif /* SUPPORT_LONG_LONGS */
544
545 /* ToDo: this is shockingly inefficient */
546
547 #ifndef THREADED_RTS
548 section "bss" {
549   mp_tmp1:
550     bits8 [SIZEOF_MP_INT];
551 }
552
553 section "bss" {
554   mp_tmp2:
555     bits8 [SIZEOF_MP_INT];
556 }
557
558 section "bss" {
559   mp_result1:
560     bits8 [SIZEOF_MP_INT];
561 }
562
563 section "bss" {
564   mp_result2:
565     bits8 [SIZEOF_MP_INT];
566 }
567 #endif
568
569 #ifdef THREADED_RTS
570 #define FETCH_MP_TEMP(X) \
571 W_ X; \
572 X = BaseReg + (OFFSET_StgRegTable_r ## X);
573 #else
574 #define FETCH_MP_TEMP(X) /* Nothing */
575 #endif
576
577 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
578 name                                                                    \
579 {                                                                       \
580   CInt s1, s2;                                                          \
581   W_ d1, d2;                                                            \
582   FETCH_MP_TEMP(mp_tmp1);                                               \
583   FETCH_MP_TEMP(mp_tmp2);                                               \
584   FETCH_MP_TEMP(mp_result1)                                             \
585   FETCH_MP_TEMP(mp_result2);                                            \
586                                                                         \
587   /* call doYouWantToGC() */                                            \
588   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
589                                                                         \
590   s1 = W_TO_INT(R1);                                                    \
591   d1 = R2;                                                              \
592   s2 = W_TO_INT(R3);                                                    \
593   d2 = R4;                                                              \
594                                                                         \
595   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
596   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
597   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
598   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
599   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
600   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
601                                                                         \
602   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
603                                                                         \
604   /* Perform the operation */                                           \
605   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
606                                                                         \
607   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
608          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
609 }
610
611 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
612 name                                                                    \
613 {                                                                       \
614   CInt s1;                                                              \
615   W_ d1;                                                                \
616   FETCH_MP_TEMP(mp_tmp1);                                               \
617   FETCH_MP_TEMP(mp_result1)                                             \
618                                                                         \
619   /* call doYouWantToGC() */                                            \
620   MAYBE_GC(R2_PTR, name);                                               \
621                                                                         \
622   d1 = R2;                                                              \
623   s1 = W_TO_INT(R1);                                                    \
624                                                                         \
625   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
626   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
627   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
628                                                                         \
629   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
630                                                                         \
631   /* Perform the operation */                                           \
632   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
633                                                                         \
634   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
635          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
636 }
637
638 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
639 name                                                                                    \
640 {                                                                                       \
641   CInt s1, s2;                                                                          \
642   W_ d1, d2;                                                                            \
643   FETCH_MP_TEMP(mp_tmp1);                                                               \
644   FETCH_MP_TEMP(mp_tmp2);                                                               \
645   FETCH_MP_TEMP(mp_result1)                                                             \
646   FETCH_MP_TEMP(mp_result2)                                                             \
647                                                                                         \
648   /* call doYouWantToGC() */                                                            \
649   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
650                                                                                         \
651   s1 = W_TO_INT(R1);                                                                    \
652   d1 = R2;                                                                              \
653   s2 = W_TO_INT(R3);                                                                    \
654   d2 = R4;                                                                              \
655                                                                                         \
656   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
657   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
658   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
659   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
660   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
661   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
662                                                                                         \
663   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
664   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
665                                                                                         \
666   /* Perform the operation */                                                           \
667   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
668                                                                                         \
669   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
670            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
671            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
672            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
673 }
674
675 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
676 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
677 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
678 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
679 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
680 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
681 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
682 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
683 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
684 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
685 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
686
687 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
688 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
689
690 #ifndef THREADED_RTS
691 section "bss" {
692   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
693 }
694 #endif
695
696 gcdIntzh_fast
697 {
698     /* R1 = the first Int#; R2 = the second Int# */
699     W_ r; 
700     FETCH_MP_TEMP(mp_tmp_w);
701
702     W_[mp_tmp_w] = R1;
703     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
704
705     R1 = r;
706     /* Result parked in R1, return via info-pointer at TOS */
707     jump %ENTRY_CODE(Sp(0));
708 }
709
710
711 gcdIntegerIntzh_fast
712 {
713     /* R1 = s1; R2 = d1; R3 = the int */
714     W_ s1;
715     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
716     R1 = s1;
717     
718     /* Result parked in R1, return via info-pointer at TOS */
719     jump %ENTRY_CODE(Sp(0));
720 }
721
722
723 cmpIntegerIntzh_fast
724 {
725     /* R1 = s1; R2 = d1; R3 = the int */
726     W_ usize, vsize, v_digit, u_digit;
727
728     usize = R1;
729     vsize = 0;
730     v_digit = R3;
731
732     // paraphrased from __gmpz_cmp_si() in the GMP sources
733     if (%gt(v_digit,0)) {
734         vsize = 1;
735     } else { 
736         if (%lt(v_digit,0)) {
737             vsize = -1;
738             v_digit = -v_digit;
739         }
740     }
741
742     if (usize != vsize) {
743         R1 = usize - vsize; 
744         jump %ENTRY_CODE(Sp(0));
745     }
746
747     if (usize == 0) {
748         R1 = 0; 
749         jump %ENTRY_CODE(Sp(0));
750     }
751
752     u_digit = W_[BYTE_ARR_CTS(R2)];
753
754     if (u_digit == v_digit) {
755         R1 = 0; 
756         jump %ENTRY_CODE(Sp(0));
757     }
758
759     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
760         R1 = usize; 
761     } else {
762         R1 = -usize; 
763     }
764
765     jump %ENTRY_CODE(Sp(0));
766 }
767
768 cmpIntegerzh_fast
769 {
770     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
771     W_ usize, vsize, size, up, vp;
772     CInt cmp;
773
774     // paraphrased from __gmpz_cmp() in the GMP sources
775     usize = R1;
776     vsize = R3;
777
778     if (usize != vsize) {
779         R1 = usize - vsize; 
780         jump %ENTRY_CODE(Sp(0));
781     }
782
783     if (usize == 0) {
784         R1 = 0; 
785         jump %ENTRY_CODE(Sp(0));
786     }
787
788     if (%lt(usize,0)) { // NB. not <, which is unsigned
789         size = -usize;
790     } else {
791         size = usize;
792     }
793
794     up = BYTE_ARR_CTS(R2);
795     vp = BYTE_ARR_CTS(R4);
796
797     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
798
799     if (cmp == 0 :: CInt) {
800         R1 = 0; 
801         jump %ENTRY_CODE(Sp(0));
802     }
803
804     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
805         R1 = 1;
806     } else {
807         R1 = (-1); 
808     }
809     /* Result parked in R1, return via info-pointer at TOS */
810     jump %ENTRY_CODE(Sp(0));
811 }
812
813 integer2Intzh_fast
814 {
815     /* R1 = s; R2 = d */
816     W_ r, s;
817
818     s = R1;
819     if (s == 0) {
820         r = 0;
821     } else {
822         r = W_[R2 + SIZEOF_StgArrWords];
823         if (%lt(s,0)) {
824             r = -r;
825         }
826     }
827     /* Result parked in R1, return via info-pointer at TOS */
828     R1 = r;
829     jump %ENTRY_CODE(Sp(0));
830 }
831
832 integer2Wordzh_fast
833 {
834   /* R1 = s; R2 = d */
835   W_ r, s;
836
837   s = R1;
838   if (s == 0) {
839     r = 0;
840   } else {
841     r = W_[R2 + SIZEOF_StgArrWords];
842     if (%lt(s,0)) {
843         r = -r;
844     }
845   }
846   /* Result parked in R1, return via info-pointer at TOS */
847   R1 = r;
848   jump %ENTRY_CODE(Sp(0));
849 }
850
851 decodeFloatzh_fast
852
853     W_ p;
854     F_ arg;
855     FETCH_MP_TEMP(mp_tmp1);
856     FETCH_MP_TEMP(mp_tmp_w);
857     
858     /* arguments: F1 = Float# */
859     arg = F1;
860     
861     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
862     
863     /* Be prepared to tell Lennart-coded __decodeFloat
864        where mantissa._mp_d can be put (it does not care about the rest) */
865     p = Hp - SIZEOF_StgArrWords;
866     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
867     StgArrWords_words(p) = 1;
868     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
869     
870     /* Perform the operation */
871     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
872     
873     /* returns: (Int# (expn), Int#, ByteArray#) */
874     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
875 }
876
877 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
878 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
879
880 decodeDoublezh_fast
881
882     D_ arg;
883     W_ p;
884     FETCH_MP_TEMP(mp_tmp1);
885     FETCH_MP_TEMP(mp_tmp_w);
886
887     /* arguments: D1 = Double# */
888     arg = D1;
889
890     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
891     
892     /* Be prepared to tell Lennart-coded __decodeDouble
893        where mantissa.d can be put (it does not care about the rest) */
894     p = Hp - ARR_SIZE + WDS(1);
895     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
896     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
897     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
898
899     /* Perform the operation */
900     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
901     
902     /* returns: (Int# (expn), Int#, ByteArray#) */
903     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
904 }
905
906 /* -----------------------------------------------------------------------------
907  * Concurrency primitives
908  * -------------------------------------------------------------------------- */
909
910 forkzh_fast
911 {
912   /* args: R1 = closure to spark */
913
914   MAYBE_GC(R1_PTR, forkzh_fast);
915
916   W_ closure;
917   W_ threadid;
918   closure = R1;
919
920   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
921                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
922                                 closure "ptr") [];
923   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
924
925   // switch at the earliest opportunity
926   CInt[context_switch] = 1 :: CInt;
927   
928   RET_P(threadid);
929 }
930
931 forkOnzh_fast
932 {
933   /* args: R1 = cpu, R2 = closure to spark */
934
935   MAYBE_GC(R2_PTR, forkOnzh_fast);
936
937   W_ cpu;
938   W_ closure;
939   W_ threadid;
940   cpu = R1;
941   closure = R2;
942
943   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
944                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
945                                 closure "ptr") [];
946   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
947
948   // switch at the earliest opportunity
949   CInt[context_switch] = 1 :: CInt;
950   
951   RET_P(threadid);
952 }
953
954 yieldzh_fast
955 {
956   jump stg_yield_noregs;
957 }
958
959 myThreadIdzh_fast
960 {
961   /* no args. */
962   RET_P(CurrentTSO);
963 }
964
965 labelThreadzh_fast
966 {
967   /* args: 
968         R1 = ThreadId#
969         R2 = Addr# */
970 #ifdef DEBUG
971   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
972 #endif
973   jump %ENTRY_CODE(Sp(0));
974 }
975
976 isCurrentThreadBoundzh_fast
977 {
978   /* no args */
979   W_ r;
980   (r) = foreign "C" isThreadBound(CurrentTSO) [];
981   RET_N(r);
982 }
983
984
985 /* -----------------------------------------------------------------------------
986  * TVar primitives
987  * -------------------------------------------------------------------------- */
988
989 #ifdef REG_R1
990 #define SP_OFF 0
991 #define IF_NOT_REG_R1(x) 
992 #else
993 #define SP_OFF 1
994 #define IF_NOT_REG_R1(x) x
995 #endif
996
997 // Catch retry frame ------------------------------------------------------------
998
999 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1000 #if defined(PROFILING)
1001   W_ unused1, W_ unused2,
1002 #endif
1003   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1004 {
1005    W_ r, frame, trec, outer;
1006    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1007
1008    frame = Sp;
1009    trec = StgTSO_trec(CurrentTSO);
1010    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1011    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1012    if (r != 0) {
1013      /* Succeeded (either first branch or second branch) */
1014      StgTSO_trec(CurrentTSO) = outer;
1015      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1016      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1017      jump %ENTRY_CODE(Sp(SP_OFF));
1018    } else {
1019      /* Did not commit: re-execute */
1020      W_ new_trec;
1021      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1022      StgTSO_trec(CurrentTSO) = new_trec;
1023      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1024        R1 = StgCatchRetryFrame_alt_code(frame);
1025      } else {
1026        R1 = StgCatchRetryFrame_first_code(frame);
1027      }
1028      jump stg_ap_v_fast;
1029    }
1030 }
1031
1032
1033 // Atomically frame ------------------------------------------------------------
1034
1035 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1036 #if defined(PROFILING)
1037   W_ unused1, W_ unused2,
1038 #endif
1039   "ptr" W_ unused3, "ptr" W_ unused4)
1040 {
1041   W_ frame, trec, valid, next_invariant, q, outer;
1042   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1043
1044   frame = Sp;
1045   trec = StgTSO_trec(CurrentTSO);
1046   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1047
1048   if (outer == NO_TREC) {
1049     /* First time back at the atomically frame -- pick up invariants */
1050     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1051     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1052
1053   } else {
1054     /* Second/subsequent time back at the atomically frame -- abort the
1055      * tx that's checking the invariant and move on to the next one */
1056     StgTSO_trec(CurrentTSO) = outer;
1057     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1058     StgInvariantCheckQueue_my_execution(q) = trec;
1059     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1060     /* Don't free trec -- it's linked from q and will be stashed in the
1061      * invariant if we eventually commit. */
1062     q = StgInvariantCheckQueue_next_queue_entry(q);
1063     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1064     trec = outer;
1065   }
1066
1067   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1068
1069   if (q != END_INVARIANT_CHECK_QUEUE) {
1070     /* We can't commit yet: another invariant to check */
1071     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1072     StgTSO_trec(CurrentTSO) = trec;
1073
1074     next_invariant = StgInvariantCheckQueue_invariant(q);
1075     R1 = StgAtomicInvariant_code(next_invariant);
1076     jump stg_ap_v_fast;
1077
1078   } else {
1079
1080     /* We've got no more invariants to check, try to commit */
1081     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1082     if (valid != 0) {
1083       /* Transaction was valid: commit succeeded */
1084       StgTSO_trec(CurrentTSO) = NO_TREC;
1085       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1086       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1087       jump %ENTRY_CODE(Sp(SP_OFF));
1088     } else {
1089       /* Transaction was not valid: try again */
1090       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1091       StgTSO_trec(CurrentTSO) = trec;
1092       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1093       R1 = StgAtomicallyFrame_code(frame);
1094       jump stg_ap_v_fast;
1095     }
1096   }
1097 }
1098
1099 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1100 #if defined(PROFILING)
1101   W_ unused1, W_ unused2,
1102 #endif
1103   "ptr" W_ unused3, "ptr" W_ unused4)
1104 {
1105   W_ frame, trec, valid;
1106   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1107
1108   frame = Sp;
1109
1110   /* The TSO is currently waiting: should we stop waiting? */
1111   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1112   if (valid != 0) {
1113     /* Previous attempt is still valid: no point trying again yet */
1114           IF_NOT_REG_R1(Sp_adj(-2);
1115                         Sp(1) = stg_NO_FINALIZER_closure;
1116                         Sp(0) = stg_ut_1_0_unreg_info;)
1117     jump stg_block_noregs;
1118   } else {
1119     /* Previous attempt is no longer valid: try again */
1120     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1121     StgTSO_trec(CurrentTSO) = trec;
1122     StgHeader_info(frame) = stg_atomically_frame_info;
1123     R1 = StgAtomicallyFrame_code(frame);
1124     jump stg_ap_v_fast;
1125   }
1126 }
1127
1128 // STM catch frame --------------------------------------------------------------
1129
1130 #ifdef REG_R1
1131 #define SP_OFF 0
1132 #else
1133 #define SP_OFF 1
1134 #endif
1135
1136 /* Catch frames are very similar to update frames, but when entering
1137  * one we just pop the frame off the stack and perform the correct
1138  * kind of return to the activation record underneath us on the stack.
1139  */
1140
1141 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1142 #if defined(PROFILING)
1143   W_ unused1, W_ unused2,
1144 #endif
1145   "ptr" W_ unused3, "ptr" W_ unused4)
1146    {
1147       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1148       W_ r, frame, trec, outer;
1149       frame = Sp;
1150       trec = StgTSO_trec(CurrentTSO);
1151       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1152       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1153       if (r != 0) {
1154         /* Commit succeeded */
1155         StgTSO_trec(CurrentTSO) = outer;
1156         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1157         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1158         jump Sp(SP_OFF);
1159       } else {
1160         /* Commit failed */
1161         W_ new_trec;
1162         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1163         StgTSO_trec(CurrentTSO) = new_trec;
1164         R1 = StgCatchSTMFrame_code(frame);
1165         jump stg_ap_v_fast;
1166       }
1167    }
1168
1169
1170 // Primop definition ------------------------------------------------------------
1171
1172 atomicallyzh_fast
1173 {
1174   W_ frame;
1175   W_ old_trec;
1176   W_ new_trec;
1177   
1178   // stmStartTransaction may allocate
1179   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1180
1181   /* Args: R1 = m :: STM a */
1182   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1183
1184   old_trec = StgTSO_trec(CurrentTSO);
1185
1186   /* Nested transactions are not allowed; raise an exception */
1187   if (old_trec != NO_TREC) {
1188      R1 = base_GHCziIOBase_NestedAtomically_closure;
1189      jump raisezh_fast;
1190   }
1191
1192   /* Set up the atomically frame */
1193   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1194   frame = Sp;
1195
1196   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1197   StgAtomicallyFrame_code(frame) = R1;
1198   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1199
1200   /* Start the memory transcation */
1201   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1202   StgTSO_trec(CurrentTSO) = new_trec;
1203
1204   /* Apply R1 to the realworld token */
1205   jump stg_ap_v_fast;
1206 }
1207
1208
1209 catchSTMzh_fast
1210 {
1211   W_ frame;
1212   
1213   /* Args: R1 :: STM a */
1214   /* Args: R2 :: Exception -> STM a */
1215   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1216
1217   /* Set up the catch frame */
1218   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1219   frame = Sp;
1220
1221   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1222   StgCatchSTMFrame_handler(frame) = R2;
1223   StgCatchSTMFrame_code(frame) = R1;
1224
1225   /* Start a nested transaction to run the body of the try block in */
1226   W_ cur_trec;  
1227   W_ new_trec;
1228   cur_trec = StgTSO_trec(CurrentTSO);
1229   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1230   StgTSO_trec(CurrentTSO) = new_trec;
1231
1232   /* Apply R1 to the realworld token */
1233   jump stg_ap_v_fast;
1234 }
1235
1236
1237 catchRetryzh_fast
1238 {
1239   W_ frame;
1240   W_ new_trec;
1241   W_ trec;
1242
1243   // stmStartTransaction may allocate
1244   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1245
1246   /* Args: R1 :: STM a */
1247   /* Args: R2 :: STM a */
1248   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1249
1250   /* Start a nested transaction within which to run the first code */
1251   trec = StgTSO_trec(CurrentTSO);
1252   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1253   StgTSO_trec(CurrentTSO) = new_trec;
1254
1255   /* Set up the catch-retry frame */
1256   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1257   frame = Sp;
1258   
1259   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1260   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1261   StgCatchRetryFrame_first_code(frame) = R1;
1262   StgCatchRetryFrame_alt_code(frame) = R2;
1263
1264   /* Apply R1 to the realworld token */
1265   jump stg_ap_v_fast;
1266 }
1267
1268
1269 retryzh_fast
1270 {
1271   W_ frame_type;
1272   W_ frame;
1273   W_ trec;
1274   W_ outer;
1275   W_ r;
1276
1277   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1278
1279   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1280 retry_pop_stack:
1281   StgTSO_sp(CurrentTSO) = Sp;
1282   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1283   Sp = StgTSO_sp(CurrentTSO);
1284   frame = Sp;
1285   trec = StgTSO_trec(CurrentTSO);
1286   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1287
1288   if (frame_type == CATCH_RETRY_FRAME) {
1289     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1290     ASSERT(outer != NO_TREC);
1291     // Abort the transaction attempting the current branch
1292     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1293     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1294     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1295       // Retry in the first branch: try the alternative
1296       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1297       StgTSO_trec(CurrentTSO) = trec;
1298       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1299       R1 = StgCatchRetryFrame_alt_code(frame);
1300       jump stg_ap_v_fast;
1301     } else {
1302       // Retry in the alternative code: propagate the retry
1303       StgTSO_trec(CurrentTSO) = outer;
1304       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1305       goto retry_pop_stack;
1306     }
1307   }
1308
1309   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1310   ASSERT(frame_type == ATOMICALLY_FRAME);
1311   if (outer != NO_TREC) {
1312     // We called retry while checking invariants, so abort the current
1313     // invariant check (merging its TVar accesses into the parents read
1314     // set so we'll wait on them)
1315     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1316     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1317     trec = outer;
1318     StgTSO_trec(CurrentTSO) = trec;
1319     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1320   }
1321   ASSERT(outer == NO_TREC);
1322
1323   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1324   if (r != 0) {
1325     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1326     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1327     Sp = frame;
1328     // Fix up the stack in the unregisterised case: the return convention is different.
1329     IF_NOT_REG_R1(Sp_adj(-2); 
1330                   Sp(1) = stg_NO_FINALIZER_closure;
1331                   Sp(0) = stg_ut_1_0_unreg_info;)
1332     R3 = trec; // passing to stmWaitUnblock()
1333     jump stg_block_stmwait;
1334   } else {
1335     // Transaction was not valid: retry immediately
1336     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1337     StgTSO_trec(CurrentTSO) = trec;
1338     R1 = StgAtomicallyFrame_code(frame);
1339     Sp = frame;
1340     jump stg_ap_v_fast;
1341   }
1342 }
1343
1344
1345 checkzh_fast
1346 {
1347   W_ trec, closure;
1348
1349   /* Args: R1 = invariant closure */
1350   MAYBE_GC (R1_PTR, checkzh_fast); 
1351
1352   trec = StgTSO_trec(CurrentTSO);
1353   closure = R1;
1354   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1355                                      trec "ptr",
1356                                      closure "ptr") [];
1357
1358   jump %ENTRY_CODE(Sp(0));
1359 }
1360
1361
1362 newTVarzh_fast
1363 {
1364   W_ tv;
1365   W_ new_value;
1366
1367   /* Args: R1 = initialisation value */
1368
1369   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1370   new_value = R1;
1371   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1372   RET_P(tv);
1373 }
1374
1375
1376 readTVarzh_fast
1377 {
1378   W_ trec;
1379   W_ tvar;
1380   W_ result;
1381
1382   /* Args: R1 = TVar closure */
1383
1384   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1385   trec = StgTSO_trec(CurrentTSO);
1386   tvar = R1;
1387   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1388
1389   RET_P(result);
1390 }
1391
1392
1393 writeTVarzh_fast
1394 {
1395   W_ trec;
1396   W_ tvar;
1397   W_ new_value;
1398   
1399   /* Args: R1 = TVar closure */
1400   /*       R2 = New value    */
1401
1402   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1403   trec = StgTSO_trec(CurrentTSO);
1404   tvar = R1;
1405   new_value = R2;
1406   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1407
1408   jump %ENTRY_CODE(Sp(0));
1409 }
1410
1411
1412 /* -----------------------------------------------------------------------------
1413  * MVar primitives
1414  *
1415  * take & putMVar work as follows.  Firstly, an important invariant:
1416  *
1417  *    If the MVar is full, then the blocking queue contains only
1418  *    threads blocked on putMVar, and if the MVar is empty then the
1419  *    blocking queue contains only threads blocked on takeMVar.
1420  *
1421  * takeMvar:
1422  *    MVar empty : then add ourselves to the blocking queue
1423  *    MVar full  : remove the value from the MVar, and
1424  *                 blocking queue empty     : return
1425  *                 blocking queue non-empty : perform the first blocked putMVar
1426  *                                            from the queue, and wake up the
1427  *                                            thread (MVar is now full again)
1428  *
1429  * putMVar is just the dual of the above algorithm.
1430  *
1431  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1432  * the stack of the thread waiting to do the putMVar.  See
1433  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1434  * the stack layout, and the PerformPut and PerformTake macros below.
1435  *
1436  * It is important that a blocked take or put is woken up with the
1437  * take/put already performed, because otherwise there would be a
1438  * small window of vulnerability where the thread could receive an
1439  * exception and never perform its take or put, and we'd end up with a
1440  * deadlock.
1441  *
1442  * -------------------------------------------------------------------------- */
1443
1444 isEmptyMVarzh_fast
1445 {
1446     /* args: R1 = MVar closure */
1447
1448     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1449         RET_N(1);
1450     } else {
1451         RET_N(0);
1452     }
1453 }
1454
1455 newMVarzh_fast
1456 {
1457     /* args: none */
1458     W_ mvar;
1459
1460     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1461   
1462     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1463     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1464         // MVARs start dirty: generation 0 has no mutable list
1465     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1466     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1467     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1468     RET_P(mvar);
1469 }
1470
1471
1472 /* If R1 isn't available, pass it on the stack */
1473 #ifdef REG_R1
1474 #define PerformTake(tso, value)                         \
1475     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1476     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1477 #else
1478 #define PerformTake(tso, value)                                 \
1479     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1480     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1481 #endif
1482
1483 #define PerformPut(tso,lval)                    \
1484     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1485     lval = W_[StgTSO_sp(tso) - WDS(1)];
1486
1487 takeMVarzh_fast
1488 {
1489     W_ mvar, val, info, tso;
1490
1491     /* args: R1 = MVar closure */
1492     mvar = R1;
1493
1494 #if defined(THREADED_RTS)
1495     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1496 #else
1497     info = GET_INFO(mvar);
1498 #endif
1499         
1500     if (info == stg_MVAR_CLEAN_info) {
1501         foreign "C" dirty_MVAR(BaseReg "ptr", mvar);
1502     }
1503
1504     /* If the MVar is empty, put ourselves on its blocking queue,
1505      * and wait until we're woken up.
1506      */
1507     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1508         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1509             StgMVar_head(mvar) = CurrentTSO;
1510         } else {
1511             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1512         }
1513         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1514         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1515         StgTSO_block_info(CurrentTSO)  = mvar;
1516         StgMVar_tail(mvar) = CurrentTSO;
1517         
1518         jump stg_block_takemvar;
1519   }
1520
1521   /* we got the value... */
1522   val = StgMVar_value(mvar);
1523
1524   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1525   {
1526       /* There are putMVar(s) waiting... 
1527        * wake up the first thread on the queue
1528        */
1529       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1530
1531       /* actually perform the putMVar for the thread that we just woke up */
1532       tso = StgMVar_head(mvar);
1533       PerformPut(tso,StgMVar_value(mvar));
1534       dirtyTSO(tso);
1535
1536 #if defined(GRAN) || defined(PAR)
1537       /* ToDo: check 2nd arg (mvar) is right */
1538       ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1539       StgMVar_head(mvar) = tso;
1540 #else
1541       ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", 
1542                                          StgMVar_head(mvar) "ptr") [];
1543       StgMVar_head(mvar) = tso;
1544 #endif
1545
1546       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1547           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1548       }
1549
1550 #if defined(THREADED_RTS)
1551       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1552 #else
1553       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1554 #endif
1555       RET_P(val);
1556   } 
1557   else
1558   {
1559       /* No further putMVars, MVar is now empty */
1560       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1561  
1562 #if defined(THREADED_RTS)
1563       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1564 #else
1565       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1566 #endif
1567
1568       RET_P(val);
1569   }
1570 }
1571
1572
1573 tryTakeMVarzh_fast
1574 {
1575     W_ mvar, val, info, tso;
1576
1577     /* args: R1 = MVar closure */
1578
1579     mvar = R1;
1580
1581 #if defined(THREADED_RTS)
1582     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1583 #else
1584     info = GET_INFO(mvar);
1585 #endif
1586
1587     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1588 #if defined(THREADED_RTS)
1589         unlockClosure(mvar, info);
1590 #endif
1591         /* HACK: we need a pointer to pass back, 
1592          * so we abuse NO_FINALIZER_closure
1593          */
1594         RET_NP(0, stg_NO_FINALIZER_closure);
1595     }
1596
1597     if (info == stg_MVAR_CLEAN_info) {
1598         foreign "C" dirty_MVAR(BaseReg "ptr", mvar);
1599     }
1600
1601     /* we got the value... */
1602     val = StgMVar_value(mvar);
1603
1604     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1605
1606         /* There are putMVar(s) waiting... 
1607          * wake up the first thread on the queue
1608          */
1609         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1610
1611         /* actually perform the putMVar for the thread that we just woke up */
1612         tso = StgMVar_head(mvar);
1613         PerformPut(tso,StgMVar_value(mvar));
1614         dirtyTSO(tso);
1615
1616 #if defined(GRAN) || defined(PAR)
1617         /* ToDo: check 2nd arg (mvar) is right */
1618         ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1619         StgMVar_head(mvar) = tso;
1620 #else
1621         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr",
1622                                            StgMVar_head(mvar) "ptr") [];
1623         StgMVar_head(mvar) = tso;
1624 #endif
1625
1626         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1627             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1628         }
1629 #if defined(THREADED_RTS)
1630         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1631 #else
1632         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1633 #endif
1634     }
1635     else 
1636     {
1637         /* No further putMVars, MVar is now empty */
1638         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1639 #if defined(THREADED_RTS)
1640         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1641 #else
1642         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1643 #endif
1644     }
1645     
1646     RET_NP(1, val);
1647 }
1648
1649
1650 putMVarzh_fast
1651 {
1652     W_ mvar, info, tso;
1653
1654     /* args: R1 = MVar, R2 = value */
1655     mvar = R1;
1656
1657 #if defined(THREADED_RTS)
1658     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1659 #else
1660     info = GET_INFO(mvar);
1661 #endif
1662
1663     if (info == stg_MVAR_CLEAN_info) {
1664         foreign "C" dirty_MVAR(BaseReg "ptr", mvar);
1665     }
1666
1667     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1668         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1669             StgMVar_head(mvar) = CurrentTSO;
1670         } else {
1671             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1672         }
1673         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1674         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1675         StgTSO_block_info(CurrentTSO)  = mvar;
1676         StgMVar_tail(mvar) = CurrentTSO;
1677         
1678         jump stg_block_putmvar;
1679     }
1680   
1681     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1682
1683         /* There are takeMVar(s) waiting: wake up the first one
1684          */
1685         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1686
1687         /* actually perform the takeMVar */
1688         tso = StgMVar_head(mvar);
1689         PerformTake(tso, R2);
1690         dirtyTSO(tso);
1691       
1692 #if defined(GRAN) || defined(PAR)
1693         /* ToDo: check 2nd arg (mvar) is right */
1694         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1695         StgMVar_head(mvar) = tso;
1696 #else
1697         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1698         StgMVar_head(mvar) = tso;
1699 #endif
1700
1701         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1702             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1703         }
1704
1705 #if defined(THREADED_RTS)
1706         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1707 #else
1708         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1709 #endif
1710         jump %ENTRY_CODE(Sp(0));
1711     }
1712     else
1713     {
1714         /* No further takes, the MVar is now full. */
1715         StgMVar_value(mvar) = R2;
1716
1717 #if defined(THREADED_RTS)
1718         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1719 #else
1720         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1721 #endif
1722         jump %ENTRY_CODE(Sp(0));
1723     }
1724     
1725     /* ToDo: yield afterward for better communication performance? */
1726 }
1727
1728
1729 tryPutMVarzh_fast
1730 {
1731     W_ mvar, info, tso;
1732
1733     /* args: R1 = MVar, R2 = value */
1734     mvar = R1;
1735
1736 #if defined(THREADED_RTS)
1737     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1738 #else
1739     info = GET_INFO(mvar);
1740 #endif
1741
1742     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1743 #if defined(THREADED_RTS)
1744         unlockClosure(mvar, info);
1745 #endif
1746         RET_N(0);
1747     }
1748   
1749     if (info == stg_MVAR_CLEAN_info) {
1750         foreign "C" dirty_MVAR(BaseReg "ptr", mvar);
1751     }
1752
1753     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1754
1755         /* There are takeMVar(s) waiting: wake up the first one
1756          */
1757         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1758         
1759         /* actually perform the takeMVar */
1760         tso = StgMVar_head(mvar);
1761         PerformTake(tso, R2);
1762         dirtyTSO(tso);
1763       
1764 #if defined(GRAN) || defined(PAR)
1765         /* ToDo: check 2nd arg (mvar) is right */
1766         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1767         StgMVar_head(mvar) = tso;
1768 #else
1769         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1770         StgMVar_head(mvar) = tso;
1771 #endif
1772
1773         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1774             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1775         }
1776
1777 #if defined(THREADED_RTS)
1778         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1779 #else
1780         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1781 #endif
1782     }
1783     else
1784     {
1785         /* No further takes, the MVar is now full. */
1786         StgMVar_value(mvar) = R2;
1787
1788 #if defined(THREADED_RTS)
1789         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1790 #else
1791         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1792 #endif
1793     }
1794     
1795     RET_N(1);
1796     /* ToDo: yield afterward for better communication performance? */
1797 }
1798
1799
1800 /* -----------------------------------------------------------------------------
1801    Stable pointer primitives
1802    -------------------------------------------------------------------------  */
1803
1804 makeStableNamezh_fast
1805 {
1806     W_ index, sn_obj;
1807
1808     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1809   
1810     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1811
1812     /* Is there already a StableName for this heap object?
1813      *  stable_ptr_table is a pointer to an array of snEntry structs.
1814      */
1815     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1816         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1817         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1818         StgStableName_sn(sn_obj) = index;
1819         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1820     } else {
1821         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1822     }
1823     
1824     RET_P(sn_obj);
1825 }
1826
1827
1828 makeStablePtrzh_fast
1829 {
1830     /* Args: R1 = a */
1831     W_ sp;
1832     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1833     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1834     RET_N(sp);
1835 }
1836
1837 deRefStablePtrzh_fast
1838 {
1839     /* Args: R1 = the stable ptr */
1840     W_ r, sp;
1841     sp = R1;
1842     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1843     RET_P(r);
1844 }
1845
1846 /* -----------------------------------------------------------------------------
1847    Bytecode object primitives
1848    -------------------------------------------------------------------------  */
1849
1850 newBCOzh_fast
1851 {
1852     /* R1 = instrs
1853        R2 = literals
1854        R3 = ptrs
1855        R4 = arity
1856        R5 = bitmap array
1857     */
1858     W_ bco, bitmap_arr, bytes, words;
1859     
1860     bitmap_arr = R5;
1861
1862     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1863     bytes = WDS(words);
1864
1865     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1866
1867     bco = Hp - bytes + WDS(1);
1868     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1869     
1870     StgBCO_instrs(bco)     = R1;
1871     StgBCO_literals(bco)   = R2;
1872     StgBCO_ptrs(bco)       = R3;
1873     StgBCO_arity(bco)      = HALF_W_(R4);
1874     StgBCO_size(bco)       = HALF_W_(words);
1875     
1876     // Copy the arity/bitmap info into the BCO
1877     W_ i;
1878     i = 0;
1879 for:
1880     if (i < StgArrWords_words(bitmap_arr)) {
1881         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1882         i = i + 1;
1883         goto for;
1884     }
1885     
1886     RET_P(bco);
1887 }
1888
1889
1890 mkApUpd0zh_fast
1891 {
1892     // R1 = the BCO# for the AP
1893     //  
1894     W_ ap;
1895
1896     // This function is *only* used to wrap zero-arity BCOs in an
1897     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1898     // saturated and always points directly to a FUN or BCO.
1899     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1900            StgBCO_arity(R1) == HALF_W_(0));
1901
1902     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1903     TICK_ALLOC_UP_THK(0, 0);
1904     CCCS_ALLOC(SIZEOF_StgAP);
1905
1906     ap = Hp - SIZEOF_StgAP + WDS(1);
1907     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1908     
1909     StgAP_n_args(ap) = HALF_W_(0);
1910     StgAP_fun(ap) = R1;
1911     
1912     RET_P(ap);
1913 }
1914
1915 unpackClosurezh_fast
1916 {
1917 /* args: R1 = closure to analyze */
1918 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1919
1920     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1921     info  = %GET_STD_INFO(UNTAG(R1));
1922
1923     // Some closures have non-standard layout, so we omit those here.
1924     W_ type;
1925     type = TO_W_(%INFO_TYPE(info));
1926     switch [0 .. N_CLOSURE_TYPES] type {
1927     case THUNK_SELECTOR : {
1928         ptrs = 1;
1929         nptrs = 0;
1930         goto out;
1931     }
1932     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1933          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1934         ptrs = 0;
1935         nptrs = 0;
1936         goto out;
1937     }
1938     default: {
1939         ptrs  = TO_W_(%INFO_PTRS(info)); 
1940         nptrs = TO_W_(%INFO_NPTRS(info));
1941         goto out;
1942     }}
1943 out:
1944
1945     W_ ptrs_arr_sz, nptrs_arr_sz;
1946     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1947     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
1948
1949     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
1950
1951     W_ clos;
1952     clos = UNTAG(R1);
1953
1954     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1955     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1956
1957     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1958     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1959     p = 0;
1960 for:
1961     if(p < ptrs) {
1962          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1963          p = p + 1;
1964          goto for;
1965     }
1966     
1967     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1968     StgArrWords_words(nptrs_arr) = nptrs;
1969     p = 0;
1970 for2:
1971     if(p < nptrs) {
1972          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1973          p = p + 1;
1974          goto for2;
1975     }
1976     RET_NPP(info, ptrs_arr, nptrs_arr);
1977 }
1978
1979 /* -----------------------------------------------------------------------------
1980    Thread I/O blocking primitives
1981    -------------------------------------------------------------------------- */
1982
1983 /* Add a thread to the end of the blocked queue. (C-- version of the C
1984  * macro in Schedule.h).
1985  */
1986 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1987     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1988     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1989       W_[blocked_queue_hd] = tso;                       \
1990     } else {                                            \
1991       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1992     }                                                   \
1993     W_[blocked_queue_tl] = tso;
1994
1995 waitReadzh_fast
1996 {
1997     /* args: R1 */
1998 #ifdef THREADED_RTS
1999     foreign "C" barf("waitRead# on threaded RTS") never returns;
2000 #else
2001
2002     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2003     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2004     StgTSO_block_info(CurrentTSO) = R1;
2005     // No locking - we're not going to use this interface in the
2006     // threaded RTS anyway.
2007     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2008     jump stg_block_noregs;
2009 #endif
2010 }
2011
2012 waitWritezh_fast
2013 {
2014     /* args: R1 */
2015 #ifdef THREADED_RTS
2016     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2017 #else
2018
2019     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2020     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2021     StgTSO_block_info(CurrentTSO) = R1;
2022     // No locking - we're not going to use this interface in the
2023     // threaded RTS anyway.
2024     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2025     jump stg_block_noregs;
2026 #endif
2027 }
2028
2029
2030 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2031 delayzh_fast
2032 {
2033 #ifdef mingw32_HOST_OS
2034     W_ ares;
2035     CInt reqID;
2036 #else
2037     W_ t, prev, target;
2038 #endif
2039
2040 #ifdef THREADED_RTS
2041     foreign "C" barf("delay# on threaded RTS") never returns;
2042 #else
2043
2044     /* args: R1 (microsecond delay amount) */
2045     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2046     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2047
2048 #ifdef mingw32_HOST_OS
2049
2050     /* could probably allocate this on the heap instead */
2051     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2052                                             stg_delayzh_malloc_str);
2053     (reqID) = foreign "C" addDelayRequest(R1);
2054     StgAsyncIOResult_reqID(ares)   = reqID;
2055     StgAsyncIOResult_len(ares)     = 0;
2056     StgAsyncIOResult_errCode(ares) = 0;
2057     StgTSO_block_info(CurrentTSO)  = ares;
2058
2059     /* Having all async-blocked threads reside on the blocked_queue
2060      * simplifies matters, so change the status to OnDoProc put the
2061      * delayed thread on the blocked_queue.
2062      */
2063     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2064     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2065     jump stg_block_async_void;
2066
2067 #else
2068
2069     W_ time;
2070     W_ divisor;
2071     (time) = foreign "C" getourtimeofday() [R1];
2072     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000;
2073     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2074            + time + 1; /* Add 1 as getourtimeofday rounds down */
2075     StgTSO_block_info(CurrentTSO) = target;
2076
2077     /* Insert the new thread in the sleeping queue. */
2078     prev = NULL;
2079     t = W_[sleeping_queue];
2080 while:
2081     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2082         prev = t;
2083         t = StgTSO_link(t);
2084         goto while;
2085     }
2086
2087     StgTSO_link(CurrentTSO) = t;
2088     if (prev == NULL) {
2089         W_[sleeping_queue] = CurrentTSO;
2090     } else {
2091         StgTSO_link(prev) = CurrentTSO;
2092     }
2093     jump stg_block_noregs;
2094 #endif
2095 #endif /* !THREADED_RTS */
2096 }
2097
2098
2099 #ifdef mingw32_HOST_OS
2100 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2101 asyncReadzh_fast
2102 {
2103     W_ ares;
2104     CInt reqID;
2105
2106 #ifdef THREADED_RTS
2107     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2108 #else
2109
2110     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2111     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2112     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2113
2114     /* could probably allocate this on the heap instead */
2115     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2116                                             stg_asyncReadzh_malloc_str)
2117                         [R1,R2,R3,R4];
2118     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2119     StgAsyncIOResult_reqID(ares)   = reqID;
2120     StgAsyncIOResult_len(ares)     = 0;
2121     StgAsyncIOResult_errCode(ares) = 0;
2122     StgTSO_block_info(CurrentTSO)  = ares;
2123     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2124     jump stg_block_async;
2125 #endif
2126 }
2127
2128 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2129 asyncWritezh_fast
2130 {
2131     W_ ares;
2132     CInt reqID;
2133
2134 #ifdef THREADED_RTS
2135     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2136 #else
2137
2138     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2139     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2140     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2141
2142     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2143                                             stg_asyncWritezh_malloc_str)
2144                         [R1,R2,R3,R4];
2145     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2146
2147     StgAsyncIOResult_reqID(ares)   = reqID;
2148     StgAsyncIOResult_len(ares)     = 0;
2149     StgAsyncIOResult_errCode(ares) = 0;
2150     StgTSO_block_info(CurrentTSO)  = ares;
2151     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2152     jump stg_block_async;
2153 #endif
2154 }
2155
2156 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2157 asyncDoProczh_fast
2158 {
2159     W_ ares;
2160     CInt reqID;
2161
2162 #ifdef THREADED_RTS
2163     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2164 #else
2165
2166     /* args: R1 = proc, R2 = param */
2167     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2168     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2169
2170     /* could probably allocate this on the heap instead */
2171     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2172                                             stg_asyncDoProczh_malloc_str) 
2173                                 [R1,R2];
2174     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2175     StgAsyncIOResult_reqID(ares)   = reqID;
2176     StgAsyncIOResult_len(ares)     = 0;
2177     StgAsyncIOResult_errCode(ares) = 0;
2178     StgTSO_block_info(CurrentTSO) = ares;
2179     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2180     jump stg_block_async;
2181 #endif
2182 }
2183 #endif
2184
2185 // noDuplicate# tries to ensure that none of the thunks under
2186 // evaluation by the current thread are also under evaluation by
2187 // another thread.  It relies on *both* threads doing noDuplicate#;
2188 // the second one will get blocked if they are duplicating some work.
2189 noDuplicatezh_fast
2190 {
2191     SAVE_THREAD_STATE();
2192     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2193     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2194     
2195     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2196         jump stg_threadFinished;
2197     } else {
2198         LOAD_THREAD_STATE();
2199         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2200         jump %ENTRY_CODE(Sp(0));
2201     }
2202 }
2203
2204 getApStackValzh_fast
2205 {
2206    W_ ap_stack, offset, val, ok;
2207
2208    /* args: R1 = AP_STACK, R2 = offset */
2209    ap_stack = R1;
2210    offset   = R2;
2211
2212    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2213         ok = 1;
2214         val = StgAP_STACK_payload(ap_stack,offset); 
2215    } else {
2216         ok = 0;
2217         val = R1;
2218    }
2219    RET_NP(ok,val);
2220 }