Add a write barrier to the TSO link field (#1589)
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_GHCziIOBase_NestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55
56 /*-----------------------------------------------------------------------------
57   Array Primitives
58
59   Basically just new*Array - the others are all inline macros.
60
61   The size arg is always passed in R1, and the result returned in R1.
62
63   The slow entry point is for returning from a heap check, the saved
64   size argument must be re-loaded from the stack.
65   -------------------------------------------------------------------------- */
66
67 /* for objects that are *less* than the size of a word, make sure we
68  * round up to the nearest word for the size of the array.
69  */
70
71 newByteArrayzh_fast
72 {
73     W_ words, payload_words, n, p;
74     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
75     n = R1;
76     payload_words = ROUNDUP_BYTES_TO_WDS(n);
77     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
78     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
79     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
80     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
81     StgArrWords_words(p) = payload_words;
82     RET_P(p);
83 }
84
85 newPinnedByteArrayzh_fast
86 {
87     W_ words, payload_words, n, p;
88
89     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
90     n = R1;
91     payload_words = ROUNDUP_BYTES_TO_WDS(n);
92
93     // We want an 8-byte aligned array.  allocatePinned() gives us
94     // 8-byte aligned memory by default, but we want to align the
95     // *goods* inside the ArrWords object, so we have to check the
96     // size of the ArrWords header and adjust our size accordingly.
97     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
98     if ((SIZEOF_StgArrWords & 7) != 0) {
99         words = words + 1;
100     }
101
102     ("ptr" p) = foreign "C" allocatePinned(words) [];
103     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
104
105     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
106     // have to push the object forward one word so that the goods
107     // fall on an 8-byte boundary.
108     if ((SIZEOF_StgArrWords & 7) != 0) {
109         p = p + WDS(1);
110     }
111
112     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
113     StgArrWords_words(p) = payload_words;
114     RET_P(p);
115 }
116
117 newArrayzh_fast
118 {
119     W_ words, n, init, arr, p;
120     /* Args: R1 = words, R2 = initialisation value */
121
122     n = R1;
123     MAYBE_GC(R2_PTR,newArrayzh_fast);
124
125     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
126     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
127     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
128
129     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
130     StgMutArrPtrs_ptrs(arr) = n;
131
132     // Initialise all elements of the the array with the value in R2
133     init = R2;
134     p = arr + SIZEOF_StgMutArrPtrs;
135   for:
136     if (p < arr + WDS(words)) {
137         W_[p] = init;
138         p = p + WDS(1);
139         goto for;
140     }
141
142     RET_P(arr);
143 }
144
145 unsafeThawArrayzh_fast
146 {
147   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
148   //
149   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
150   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
151   // it on the mutable list for the GC to remove (removing something from
152   // the mutable list is not easy, because the mut_list is only singly-linked).
153   // 
154   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
155   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
156   // to indicate that it is still on the mutable list.
157   //
158   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
159   // either it is on a mut_list, or it isn't.  We adopt the convention that
160   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
161   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
162   // we put it on the mutable list more than once, but it would get scavenged
163   // multiple times during GC, which would be unnecessarily slow.
164   //
165   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
166         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
167         recordMutable(R1, R1);
168         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
169         RET_P(R1);
170   } else {
171         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
172         RET_P(R1);
173   }
174 }
175
176 /* -----------------------------------------------------------------------------
177    MutVar primitives
178    -------------------------------------------------------------------------- */
179
180 newMutVarzh_fast
181 {
182     W_ mv;
183     /* Args: R1 = initialisation value */
184
185     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
186
187     mv = Hp - SIZEOF_StgMutVar + WDS(1);
188     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
189     StgMutVar_var(mv) = R1;
190     
191     RET_P(mv);
192 }
193
194 atomicModifyMutVarzh_fast
195 {
196     W_ mv, z, x, y, r;
197     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
198
199     /* If x is the current contents of the MutVar#, then 
200        We want to make the new contents point to
201
202          (sel_0 (f x))
203  
204        and the return value is
205          
206          (sel_1 (f x))
207
208         obviously we can share (f x).
209
210          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
211          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
212          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
213     */
214
215 #if MIN_UPD_SIZE > 1
216 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
217 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
218 #else
219 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
220 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
221 #endif
222
223 #if MIN_UPD_SIZE > 2
224 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
225 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
226 #else
227 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
228 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
229 #endif
230
231 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
232
233    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
234
235 #if defined(THREADED_RTS)
236     ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
237 #endif
238
239    x = StgMutVar_var(R1);
240
241    TICK_ALLOC_THUNK_2();
242    CCCS_ALLOC(THUNK_2_SIZE);
243    z = Hp - THUNK_2_SIZE + WDS(1);
244    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
245    LDV_RECORD_CREATE(z);
246    StgThunk_payload(z,0) = R2;
247    StgThunk_payload(z,1) = x;
248
249    TICK_ALLOC_THUNK_1();
250    CCCS_ALLOC(THUNK_1_SIZE);
251    y = z - THUNK_1_SIZE;
252    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
253    LDV_RECORD_CREATE(y);
254    StgThunk_payload(y,0) = z;
255
256    StgMutVar_var(R1) = y;
257    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
258
259    TICK_ALLOC_THUNK_1();
260    CCCS_ALLOC(THUNK_1_SIZE);
261    r = y - THUNK_1_SIZE;
262    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
263    LDV_RECORD_CREATE(r);
264    StgThunk_payload(r,0) = z;
265
266 #if defined(THREADED_RTS)
267     RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
268 #endif
269
270    RET_P(r);
271 }
272
273 /* -----------------------------------------------------------------------------
274    Weak Pointer Primitives
275    -------------------------------------------------------------------------- */
276
277 STRING(stg_weak_msg,"New weak pointer at %p\n")
278
279 mkWeakzh_fast
280 {
281   /* R1 = key
282      R2 = value
283      R3 = finalizer (or NULL)
284   */
285   W_ w;
286
287   if (R3 == NULL) {
288     R3 = stg_NO_FINALIZER_closure;
289   }
290
291   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
292
293   w = Hp - SIZEOF_StgWeak + WDS(1);
294   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
295
296   StgWeak_key(w)       = R1;
297   StgWeak_value(w)     = R2;
298   StgWeak_finalizer(w) = R3;
299
300   StgWeak_link(w)       = W_[weak_ptr_list];
301   W_[weak_ptr_list]     = w;
302
303   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
304
305   RET_P(w);
306 }
307
308
309 finalizzeWeakzh_fast
310 {
311   /* R1 = weak ptr
312    */
313   W_ w, f;
314
315   w = R1;
316
317   // already dead?
318   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
319       RET_NP(0,stg_NO_FINALIZER_closure);
320   }
321
322   // kill it
323 #ifdef PROFILING
324   // @LDV profiling
325   // A weak pointer is inherently used, so we do not need to call
326   // LDV_recordDead_FILL_SLOP_DYNAMIC():
327   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
328   // or, LDV_recordDead():
329   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
330   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
331   // large as weak pointers, so there is no need to fill the slop, either.
332   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
333 #endif
334
335   //
336   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
337   //
338   SET_INFO(w,stg_DEAD_WEAK_info);
339   LDV_RECORD_CREATE(w);
340
341   f = StgWeak_finalizer(w);
342   StgDeadWeak_link(w) = StgWeak_link(w);
343
344   /* return the finalizer */
345   if (f == stg_NO_FINALIZER_closure) {
346       RET_NP(0,stg_NO_FINALIZER_closure);
347   } else {
348       RET_NP(1,f);
349   }
350 }
351
352 deRefWeakzh_fast
353 {
354   /* R1 = weak ptr */
355   W_ w, code, val;
356
357   w = R1;
358   if (GET_INFO(w) == stg_WEAK_info) {
359     code = 1;
360     val = StgWeak_value(w);
361   } else {
362     code = 0;
363     val = w;
364   }
365   RET_NP(code,val);
366 }
367
368 /* -----------------------------------------------------------------------------
369    Arbitrary-precision Integer operations.
370
371    There are some assumptions in this code that mp_limb_t == W_.  This is
372    the case for all the platforms that GHC supports, currently.
373    -------------------------------------------------------------------------- */
374
375 int2Integerzh_fast
376 {
377    /* arguments: R1 = Int# */
378
379    W_ val, s, p;        /* to avoid aliasing */
380
381    val = R1;
382    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
383
384    p = Hp - SIZEOF_StgArrWords;
385    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
386    StgArrWords_words(p) = 1;
387
388    /* mpz_set_si is inlined here, makes things simpler */
389    if (%lt(val,0)) { 
390         s  = -1;
391         Hp(0) = -val;
392    } else { 
393      if (%gt(val,0)) {
394         s = 1;
395         Hp(0) = val;
396      } else {
397         s = 0;
398      }
399   }
400
401    /* returns (# size  :: Int#, 
402                  data  :: ByteArray# 
403                #)
404    */
405    RET_NP(s,p);
406 }
407
408 word2Integerzh_fast
409 {
410    /* arguments: R1 = Word# */
411
412    W_ val, s, p;        /* to avoid aliasing */
413
414    val = R1;
415
416    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
417
418    p = Hp - SIZEOF_StgArrWords;
419    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
420    StgArrWords_words(p) = 1;
421
422    if (val != 0) {
423         s = 1;
424         W_[Hp] = val;
425    } else {
426         s = 0;
427    }
428
429    /* returns (# size  :: Int#, 
430                  data  :: ByteArray# #)
431    */
432    RET_NP(s,p);
433 }
434
435
436 /*
437  * 'long long' primops for converting to/from Integers.
438  */
439
440 #ifdef SUPPORT_LONG_LONGS
441
442 int64ToIntegerzh_fast
443 {
444    /* arguments: L1 = Int64# */
445
446    L_ val;
447    W_ hi, lo, s, neg, words_needed, p;
448
449    val = L1;
450    neg = 0;
451
452    hi = TO_W_(val >> 32);
453    lo = TO_W_(val);
454
455    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
456        // minimum is one word
457        words_needed = 1;
458    } else { 
459        words_needed = 2;
460    }
461
462    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
463                NO_PTRS, int64ToIntegerzh_fast );
464
465    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
466    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
467    StgArrWords_words(p) = words_needed;
468
469    if ( %lt(hi,0) ) {
470      neg = 1;
471      lo = -lo;
472      if(lo == 0) {
473        hi = -hi;
474      } else {
475        hi = -hi - 1;
476      }
477    }
478
479    if ( words_needed == 2 )  { 
480       s = 2;
481       Hp(-1) = lo;
482       Hp(0) = hi;
483    } else { 
484        if ( lo != 0 ) {
485            s = 1;
486            Hp(0) = lo;
487        } else /* val==0 */  {
488            s = 0;
489        }
490    }
491    if ( neg != 0 ) {
492         s = -s;
493    }
494
495    /* returns (# size  :: Int#, 
496                  data  :: ByteArray# #)
497    */
498    RET_NP(s,p);
499 }
500 word64ToIntegerzh_fast
501 {
502    /* arguments: L1 = Word64# */
503
504    L_ val;
505    W_ hi, lo, s, words_needed, p;
506
507    val = L1;
508    hi = TO_W_(val >> 32);
509    lo = TO_W_(val);
510
511    if ( hi != 0 ) {
512       words_needed = 2;
513    } else {
514       words_needed = 1;
515    }
516
517    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
518                NO_PTRS, word64ToIntegerzh_fast );
519
520    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
521    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
522    StgArrWords_words(p) = words_needed;
523
524    if ( hi != 0 ) { 
525      s = 2;
526      Hp(-1) = lo;
527      Hp(0)  = hi;
528    } else {
529       if ( lo != 0 ) {
530         s = 1;
531         Hp(0) = lo;
532      } else /* val==0 */  {
533       s = 0;
534      }
535   }
536
537    /* returns (# size  :: Int#, 
538                  data  :: ByteArray# #)
539    */
540    RET_NP(s,p);
541 }
542
543
544
545 #endif /* SUPPORT_LONG_LONGS */
546
547 /* ToDo: this is shockingly inefficient */
548
549 #ifndef THREADED_RTS
550 section "bss" {
551   mp_tmp1:
552     bits8 [SIZEOF_MP_INT];
553 }
554
555 section "bss" {
556   mp_tmp2:
557     bits8 [SIZEOF_MP_INT];
558 }
559
560 section "bss" {
561   mp_result1:
562     bits8 [SIZEOF_MP_INT];
563 }
564
565 section "bss" {
566   mp_result2:
567     bits8 [SIZEOF_MP_INT];
568 }
569 #endif
570
571 #ifdef THREADED_RTS
572 #define FETCH_MP_TEMP(X) \
573 W_ X; \
574 X = BaseReg + (OFFSET_StgRegTable_r ## X);
575 #else
576 #define FETCH_MP_TEMP(X) /* Nothing */
577 #endif
578
579 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
580 name                                                                    \
581 {                                                                       \
582   CInt s1, s2;                                                          \
583   W_ d1, d2;                                                            \
584   FETCH_MP_TEMP(mp_tmp1);                                               \
585   FETCH_MP_TEMP(mp_tmp2);                                               \
586   FETCH_MP_TEMP(mp_result1)                                             \
587   FETCH_MP_TEMP(mp_result2);                                            \
588                                                                         \
589   /* call doYouWantToGC() */                                            \
590   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
591                                                                         \
592   s1 = W_TO_INT(R1);                                                    \
593   d1 = R2;                                                              \
594   s2 = W_TO_INT(R3);                                                    \
595   d2 = R4;                                                              \
596                                                                         \
597   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
598   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
599   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
600   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
601   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
602   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
603                                                                         \
604   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
605                                                                         \
606   /* Perform the operation */                                           \
607   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
608                                                                         \
609   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
610          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
611 }
612
613 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
614 name                                                                    \
615 {                                                                       \
616   CInt s1;                                                              \
617   W_ d1;                                                                \
618   FETCH_MP_TEMP(mp_tmp1);                                               \
619   FETCH_MP_TEMP(mp_result1)                                             \
620                                                                         \
621   /* call doYouWantToGC() */                                            \
622   MAYBE_GC(R2_PTR, name);                                               \
623                                                                         \
624   d1 = R2;                                                              \
625   s1 = W_TO_INT(R1);                                                    \
626                                                                         \
627   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
628   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
629   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
630                                                                         \
631   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
632                                                                         \
633   /* Perform the operation */                                           \
634   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
635                                                                         \
636   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
637          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
638 }
639
640 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
641 name                                                                                    \
642 {                                                                                       \
643   CInt s1, s2;                                                                          \
644   W_ d1, d2;                                                                            \
645   FETCH_MP_TEMP(mp_tmp1);                                                               \
646   FETCH_MP_TEMP(mp_tmp2);                                                               \
647   FETCH_MP_TEMP(mp_result1)                                                             \
648   FETCH_MP_TEMP(mp_result2)                                                             \
649                                                                                         \
650   /* call doYouWantToGC() */                                                            \
651   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
652                                                                                         \
653   s1 = W_TO_INT(R1);                                                                    \
654   d1 = R2;                                                                              \
655   s2 = W_TO_INT(R3);                                                                    \
656   d2 = R4;                                                                              \
657                                                                                         \
658   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
659   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
660   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
661   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
662   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
663   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
664                                                                                         \
665   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
666   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
667                                                                                         \
668   /* Perform the operation */                                                           \
669   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
670                                                                                         \
671   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
672            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
673            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
674            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
675 }
676
677 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
678 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
679 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
680 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
681 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
682 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
683 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
684 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
685 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
686 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
687 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
688
689 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
690 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
691
692 #ifndef THREADED_RTS
693 section "bss" {
694   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
695 }
696 #endif
697
698 gcdIntzh_fast
699 {
700     /* R1 = the first Int#; R2 = the second Int# */
701     W_ r; 
702     FETCH_MP_TEMP(mp_tmp_w);
703
704     W_[mp_tmp_w] = R1;
705     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
706
707     R1 = r;
708     /* Result parked in R1, return via info-pointer at TOS */
709     jump %ENTRY_CODE(Sp(0));
710 }
711
712
713 gcdIntegerIntzh_fast
714 {
715     /* R1 = s1; R2 = d1; R3 = the int */
716     W_ s1;
717     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
718     R1 = s1;
719     
720     /* Result parked in R1, return via info-pointer at TOS */
721     jump %ENTRY_CODE(Sp(0));
722 }
723
724
725 cmpIntegerIntzh_fast
726 {
727     /* R1 = s1; R2 = d1; R3 = the int */
728     W_ usize, vsize, v_digit, u_digit;
729
730     usize = R1;
731     vsize = 0;
732     v_digit = R3;
733
734     // paraphrased from __gmpz_cmp_si() in the GMP sources
735     if (%gt(v_digit,0)) {
736         vsize = 1;
737     } else { 
738         if (%lt(v_digit,0)) {
739             vsize = -1;
740             v_digit = -v_digit;
741         }
742     }
743
744     if (usize != vsize) {
745         R1 = usize - vsize; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (usize == 0) {
750         R1 = 0; 
751         jump %ENTRY_CODE(Sp(0));
752     }
753
754     u_digit = W_[BYTE_ARR_CTS(R2)];
755
756     if (u_digit == v_digit) {
757         R1 = 0; 
758         jump %ENTRY_CODE(Sp(0));
759     }
760
761     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
762         R1 = usize; 
763     } else {
764         R1 = -usize; 
765     }
766
767     jump %ENTRY_CODE(Sp(0));
768 }
769
770 cmpIntegerzh_fast
771 {
772     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
773     W_ usize, vsize, size, up, vp;
774     CInt cmp;
775
776     // paraphrased from __gmpz_cmp() in the GMP sources
777     usize = R1;
778     vsize = R3;
779
780     if (usize != vsize) {
781         R1 = usize - vsize; 
782         jump %ENTRY_CODE(Sp(0));
783     }
784
785     if (usize == 0) {
786         R1 = 0; 
787         jump %ENTRY_CODE(Sp(0));
788     }
789
790     if (%lt(usize,0)) { // NB. not <, which is unsigned
791         size = -usize;
792     } else {
793         size = usize;
794     }
795
796     up = BYTE_ARR_CTS(R2);
797     vp = BYTE_ARR_CTS(R4);
798
799     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
800
801     if (cmp == 0 :: CInt) {
802         R1 = 0; 
803         jump %ENTRY_CODE(Sp(0));
804     }
805
806     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
807         R1 = 1;
808     } else {
809         R1 = (-1); 
810     }
811     /* Result parked in R1, return via info-pointer at TOS */
812     jump %ENTRY_CODE(Sp(0));
813 }
814
815 integer2Intzh_fast
816 {
817     /* R1 = s; R2 = d */
818     W_ r, s;
819
820     s = R1;
821     if (s == 0) {
822         r = 0;
823     } else {
824         r = W_[R2 + SIZEOF_StgArrWords];
825         if (%lt(s,0)) {
826             r = -r;
827         }
828     }
829     /* Result parked in R1, return via info-pointer at TOS */
830     R1 = r;
831     jump %ENTRY_CODE(Sp(0));
832 }
833
834 integer2Wordzh_fast
835 {
836   /* R1 = s; R2 = d */
837   W_ r, s;
838
839   s = R1;
840   if (s == 0) {
841     r = 0;
842   } else {
843     r = W_[R2 + SIZEOF_StgArrWords];
844     if (%lt(s,0)) {
845         r = -r;
846     }
847   }
848   /* Result parked in R1, return via info-pointer at TOS */
849   R1 = r;
850   jump %ENTRY_CODE(Sp(0));
851 }
852
853 decodeFloatzh_fast
854
855     W_ p;
856     F_ arg;
857     FETCH_MP_TEMP(mp_tmp1);
858     FETCH_MP_TEMP(mp_tmp_w);
859     
860     /* arguments: F1 = Float# */
861     arg = F1;
862     
863     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
864     
865     /* Be prepared to tell Lennart-coded __decodeFloat
866        where mantissa._mp_d can be put (it does not care about the rest) */
867     p = Hp - SIZEOF_StgArrWords;
868     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
869     StgArrWords_words(p) = 1;
870     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
871     
872     /* Perform the operation */
873     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
874     
875     /* returns: (Int# (expn), Int#, ByteArray#) */
876     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
877 }
878
879 decodeFloatzuIntzh_fast
880
881     W_ p;
882     F_ arg;
883     FETCH_MP_TEMP(mp_tmp1);
884     FETCH_MP_TEMP(mp_tmp_w);
885     
886     /* arguments: F1 = Float# */
887     arg = F1;
888     
889     /* Perform the operation */
890     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
891     
892     /* returns: (Int# (mantissa), Int# (exponent)) */
893     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
894 }
895
896 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
897 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
898
899 decodeDoublezh_fast
900
901     D_ arg;
902     W_ p;
903     FETCH_MP_TEMP(mp_tmp1);
904     FETCH_MP_TEMP(mp_tmp_w);
905
906     /* arguments: D1 = Double# */
907     arg = D1;
908
909     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
910     
911     /* Be prepared to tell Lennart-coded __decodeDouble
912        where mantissa.d can be put (it does not care about the rest) */
913     p = Hp - ARR_SIZE + WDS(1);
914     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
915     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
916     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
917
918     /* Perform the operation */
919     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
920     
921     /* returns: (Int# (expn), Int#, ByteArray#) */
922     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
923 }
924
925 decodeDoublezu2Intzh_fast
926
927     D_ arg;
928     W_ p;
929     FETCH_MP_TEMP(mp_tmp1);
930     FETCH_MP_TEMP(mp_tmp2);
931     FETCH_MP_TEMP(mp_result1);
932     FETCH_MP_TEMP(mp_result2);
933
934     /* arguments: D1 = Double# */
935     arg = D1;
936
937     /* Perform the operation */
938     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
939                                     mp_result1 "ptr", mp_result2 "ptr",
940                                     arg) [];
941
942     /* returns:
943        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
944     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
945 }
946
947 /* -----------------------------------------------------------------------------
948  * Concurrency primitives
949  * -------------------------------------------------------------------------- */
950
951 forkzh_fast
952 {
953   /* args: R1 = closure to spark */
954
955   MAYBE_GC(R1_PTR, forkzh_fast);
956
957   W_ closure;
958   W_ threadid;
959   closure = R1;
960
961   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
962                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
963                                 closure "ptr") [];
964
965   /* start blocked if the current thread is blocked */
966   StgTSO_flags(threadid) = 
967      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
968                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
969
970   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
971
972   // switch at the earliest opportunity
973   CInt[context_switch] = 1 :: CInt;
974   
975   RET_P(threadid);
976 }
977
978 forkOnzh_fast
979 {
980   /* args: R1 = cpu, R2 = closure to spark */
981
982   MAYBE_GC(R2_PTR, forkOnzh_fast);
983
984   W_ cpu;
985   W_ closure;
986   W_ threadid;
987   cpu = R1;
988   closure = R2;
989
990   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
991                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
992                                 closure "ptr") [];
993
994   /* start blocked if the current thread is blocked */
995   StgTSO_flags(threadid) = 
996      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
997                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
998
999   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1000
1001   // switch at the earliest opportunity
1002   CInt[context_switch] = 1 :: CInt;
1003   
1004   RET_P(threadid);
1005 }
1006
1007 yieldzh_fast
1008 {
1009   jump stg_yield_noregs;
1010 }
1011
1012 myThreadIdzh_fast
1013 {
1014   /* no args. */
1015   RET_P(CurrentTSO);
1016 }
1017
1018 labelThreadzh_fast
1019 {
1020   /* args: 
1021         R1 = ThreadId#
1022         R2 = Addr# */
1023 #ifdef DEBUG
1024   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1025 #endif
1026   jump %ENTRY_CODE(Sp(0));
1027 }
1028
1029 isCurrentThreadBoundzh_fast
1030 {
1031   /* no args */
1032   W_ r;
1033   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1034   RET_N(r);
1035 }
1036
1037
1038 /* -----------------------------------------------------------------------------
1039  * TVar primitives
1040  * -------------------------------------------------------------------------- */
1041
1042 #ifdef REG_R1
1043 #define SP_OFF 0
1044 #define IF_NOT_REG_R1(x) 
1045 #else
1046 #define SP_OFF 1
1047 #define IF_NOT_REG_R1(x) x
1048 #endif
1049
1050 // Catch retry frame ------------------------------------------------------------
1051
1052 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1053 #if defined(PROFILING)
1054   W_ unused1, W_ unused2,
1055 #endif
1056   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1057 {
1058    W_ r, frame, trec, outer;
1059    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1060
1061    frame = Sp;
1062    trec = StgTSO_trec(CurrentTSO);
1063    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1064    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1065    if (r != 0) {
1066      /* Succeeded (either first branch or second branch) */
1067      StgTSO_trec(CurrentTSO) = outer;
1068      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1069      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1070      jump %ENTRY_CODE(Sp(SP_OFF));
1071    } else {
1072      /* Did not commit: re-execute */
1073      W_ new_trec;
1074      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1075      StgTSO_trec(CurrentTSO) = new_trec;
1076      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1077        R1 = StgCatchRetryFrame_alt_code(frame);
1078      } else {
1079        R1 = StgCatchRetryFrame_first_code(frame);
1080      }
1081      jump stg_ap_v_fast;
1082    }
1083 }
1084
1085
1086 // Atomically frame ------------------------------------------------------------
1087
1088 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1089 #if defined(PROFILING)
1090   W_ unused1, W_ unused2,
1091 #endif
1092   "ptr" W_ unused3, "ptr" W_ unused4)
1093 {
1094   W_ frame, trec, valid, next_invariant, q, outer;
1095   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1096
1097   frame = Sp;
1098   trec = StgTSO_trec(CurrentTSO);
1099   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1100
1101   if (outer == NO_TREC) {
1102     /* First time back at the atomically frame -- pick up invariants */
1103     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1104     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1105
1106   } else {
1107     /* Second/subsequent time back at the atomically frame -- abort the
1108      * tx that's checking the invariant and move on to the next one */
1109     StgTSO_trec(CurrentTSO) = outer;
1110     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1111     StgInvariantCheckQueue_my_execution(q) = trec;
1112     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1113     /* Don't free trec -- it's linked from q and will be stashed in the
1114      * invariant if we eventually commit. */
1115     q = StgInvariantCheckQueue_next_queue_entry(q);
1116     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1117     trec = outer;
1118   }
1119
1120   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1121
1122   if (q != END_INVARIANT_CHECK_QUEUE) {
1123     /* We can't commit yet: another invariant to check */
1124     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1125     StgTSO_trec(CurrentTSO) = trec;
1126
1127     next_invariant = StgInvariantCheckQueue_invariant(q);
1128     R1 = StgAtomicInvariant_code(next_invariant);
1129     jump stg_ap_v_fast;
1130
1131   } else {
1132
1133     /* We've got no more invariants to check, try to commit */
1134     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1135     if (valid != 0) {
1136       /* Transaction was valid: commit succeeded */
1137       StgTSO_trec(CurrentTSO) = NO_TREC;
1138       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1139       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1140       jump %ENTRY_CODE(Sp(SP_OFF));
1141     } else {
1142       /* Transaction was not valid: try again */
1143       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1144       StgTSO_trec(CurrentTSO) = trec;
1145       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1146       R1 = StgAtomicallyFrame_code(frame);
1147       jump stg_ap_v_fast;
1148     }
1149   }
1150 }
1151
1152 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1153 #if defined(PROFILING)
1154   W_ unused1, W_ unused2,
1155 #endif
1156   "ptr" W_ unused3, "ptr" W_ unused4)
1157 {
1158   W_ frame, trec, valid;
1159   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1160
1161   frame = Sp;
1162
1163   /* The TSO is currently waiting: should we stop waiting? */
1164   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1165   if (valid != 0) {
1166     /* Previous attempt is still valid: no point trying again yet */
1167           IF_NOT_REG_R1(Sp_adj(-2);
1168                         Sp(1) = stg_NO_FINALIZER_closure;
1169                         Sp(0) = stg_ut_1_0_unreg_info;)
1170     jump stg_block_noregs;
1171   } else {
1172     /* Previous attempt is no longer valid: try again */
1173     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1174     StgTSO_trec(CurrentTSO) = trec;
1175     StgHeader_info(frame) = stg_atomically_frame_info;
1176     R1 = StgAtomicallyFrame_code(frame);
1177     jump stg_ap_v_fast;
1178   }
1179 }
1180
1181 // STM catch frame --------------------------------------------------------------
1182
1183 #ifdef REG_R1
1184 #define SP_OFF 0
1185 #else
1186 #define SP_OFF 1
1187 #endif
1188
1189 /* Catch frames are very similar to update frames, but when entering
1190  * one we just pop the frame off the stack and perform the correct
1191  * kind of return to the activation record underneath us on the stack.
1192  */
1193
1194 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1195 #if defined(PROFILING)
1196   W_ unused1, W_ unused2,
1197 #endif
1198   "ptr" W_ unused3, "ptr" W_ unused4)
1199    {
1200       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1201       W_ r, frame, trec, outer;
1202       frame = Sp;
1203       trec = StgTSO_trec(CurrentTSO);
1204       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1205       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1206       if (r != 0) {
1207         /* Commit succeeded */
1208         StgTSO_trec(CurrentTSO) = outer;
1209         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1210         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1211         jump Sp(SP_OFF);
1212       } else {
1213         /* Commit failed */
1214         W_ new_trec;
1215         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1216         StgTSO_trec(CurrentTSO) = new_trec;
1217         R1 = StgCatchSTMFrame_code(frame);
1218         jump stg_ap_v_fast;
1219       }
1220    }
1221
1222
1223 // Primop definition ------------------------------------------------------------
1224
1225 atomicallyzh_fast
1226 {
1227   W_ frame;
1228   W_ old_trec;
1229   W_ new_trec;
1230   
1231   // stmStartTransaction may allocate
1232   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1233
1234   /* Args: R1 = m :: STM a */
1235   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1236
1237   old_trec = StgTSO_trec(CurrentTSO);
1238
1239   /* Nested transactions are not allowed; raise an exception */
1240   if (old_trec != NO_TREC) {
1241      R1 = base_GHCziIOBase_NestedAtomically_closure;
1242      jump raisezh_fast;
1243   }
1244
1245   /* Set up the atomically frame */
1246   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1247   frame = Sp;
1248
1249   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1250   StgAtomicallyFrame_code(frame) = R1;
1251   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1252
1253   /* Start the memory transcation */
1254   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1255   StgTSO_trec(CurrentTSO) = new_trec;
1256
1257   /* Apply R1 to the realworld token */
1258   jump stg_ap_v_fast;
1259 }
1260
1261
1262 catchSTMzh_fast
1263 {
1264   W_ frame;
1265   
1266   /* Args: R1 :: STM a */
1267   /* Args: R2 :: Exception -> STM a */
1268   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1269
1270   /* Set up the catch frame */
1271   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1272   frame = Sp;
1273
1274   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1275   StgCatchSTMFrame_handler(frame) = R2;
1276   StgCatchSTMFrame_code(frame) = R1;
1277
1278   /* Start a nested transaction to run the body of the try block in */
1279   W_ cur_trec;  
1280   W_ new_trec;
1281   cur_trec = StgTSO_trec(CurrentTSO);
1282   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1283   StgTSO_trec(CurrentTSO) = new_trec;
1284
1285   /* Apply R1 to the realworld token */
1286   jump stg_ap_v_fast;
1287 }
1288
1289
1290 catchRetryzh_fast
1291 {
1292   W_ frame;
1293   W_ new_trec;
1294   W_ trec;
1295
1296   // stmStartTransaction may allocate
1297   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1298
1299   /* Args: R1 :: STM a */
1300   /* Args: R2 :: STM a */
1301   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1302
1303   /* Start a nested transaction within which to run the first code */
1304   trec = StgTSO_trec(CurrentTSO);
1305   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1306   StgTSO_trec(CurrentTSO) = new_trec;
1307
1308   /* Set up the catch-retry frame */
1309   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1310   frame = Sp;
1311   
1312   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1313   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1314   StgCatchRetryFrame_first_code(frame) = R1;
1315   StgCatchRetryFrame_alt_code(frame) = R2;
1316
1317   /* Apply R1 to the realworld token */
1318   jump stg_ap_v_fast;
1319 }
1320
1321
1322 retryzh_fast
1323 {
1324   W_ frame_type;
1325   W_ frame;
1326   W_ trec;
1327   W_ outer;
1328   W_ r;
1329
1330   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1331
1332   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1333 retry_pop_stack:
1334   StgTSO_sp(CurrentTSO) = Sp;
1335   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1336   Sp = StgTSO_sp(CurrentTSO);
1337   frame = Sp;
1338   trec = StgTSO_trec(CurrentTSO);
1339   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1340
1341   if (frame_type == CATCH_RETRY_FRAME) {
1342     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1343     ASSERT(outer != NO_TREC);
1344     // Abort the transaction attempting the current branch
1345     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1346     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1347     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1348       // Retry in the first branch: try the alternative
1349       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1350       StgTSO_trec(CurrentTSO) = trec;
1351       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1352       R1 = StgCatchRetryFrame_alt_code(frame);
1353       jump stg_ap_v_fast;
1354     } else {
1355       // Retry in the alternative code: propagate the retry
1356       StgTSO_trec(CurrentTSO) = outer;
1357       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1358       goto retry_pop_stack;
1359     }
1360   }
1361
1362   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1363   ASSERT(frame_type == ATOMICALLY_FRAME);
1364   if (outer != NO_TREC) {
1365     // We called retry while checking invariants, so abort the current
1366     // invariant check (merging its TVar accesses into the parents read
1367     // set so we'll wait on them)
1368     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1369     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1370     trec = outer;
1371     StgTSO_trec(CurrentTSO) = trec;
1372     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1373   }
1374   ASSERT(outer == NO_TREC);
1375
1376   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1377   if (r != 0) {
1378     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1379     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1380     Sp = frame;
1381     // Fix up the stack in the unregisterised case: the return convention is different.
1382     IF_NOT_REG_R1(Sp_adj(-2); 
1383                   Sp(1) = stg_NO_FINALIZER_closure;
1384                   Sp(0) = stg_ut_1_0_unreg_info;)
1385     R3 = trec; // passing to stmWaitUnblock()
1386     jump stg_block_stmwait;
1387   } else {
1388     // Transaction was not valid: retry immediately
1389     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1390     StgTSO_trec(CurrentTSO) = trec;
1391     R1 = StgAtomicallyFrame_code(frame);
1392     Sp = frame;
1393     jump stg_ap_v_fast;
1394   }
1395 }
1396
1397
1398 checkzh_fast
1399 {
1400   W_ trec, closure;
1401
1402   /* Args: R1 = invariant closure */
1403   MAYBE_GC (R1_PTR, checkzh_fast); 
1404
1405   trec = StgTSO_trec(CurrentTSO);
1406   closure = R1;
1407   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1408                                      trec "ptr",
1409                                      closure "ptr") [];
1410
1411   jump %ENTRY_CODE(Sp(0));
1412 }
1413
1414
1415 newTVarzh_fast
1416 {
1417   W_ tv;
1418   W_ new_value;
1419
1420   /* Args: R1 = initialisation value */
1421
1422   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1423   new_value = R1;
1424   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1425   RET_P(tv);
1426 }
1427
1428
1429 readTVarzh_fast
1430 {
1431   W_ trec;
1432   W_ tvar;
1433   W_ result;
1434
1435   /* Args: R1 = TVar closure */
1436
1437   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1438   trec = StgTSO_trec(CurrentTSO);
1439   tvar = R1;
1440   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1441
1442   RET_P(result);
1443 }
1444
1445
1446 writeTVarzh_fast
1447 {
1448   W_ trec;
1449   W_ tvar;
1450   W_ new_value;
1451   
1452   /* Args: R1 = TVar closure */
1453   /*       R2 = New value    */
1454
1455   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1456   trec = StgTSO_trec(CurrentTSO);
1457   tvar = R1;
1458   new_value = R2;
1459   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1460
1461   jump %ENTRY_CODE(Sp(0));
1462 }
1463
1464
1465 /* -----------------------------------------------------------------------------
1466  * MVar primitives
1467  *
1468  * take & putMVar work as follows.  Firstly, an important invariant:
1469  *
1470  *    If the MVar is full, then the blocking queue contains only
1471  *    threads blocked on putMVar, and if the MVar is empty then the
1472  *    blocking queue contains only threads blocked on takeMVar.
1473  *
1474  * takeMvar:
1475  *    MVar empty : then add ourselves to the blocking queue
1476  *    MVar full  : remove the value from the MVar, and
1477  *                 blocking queue empty     : return
1478  *                 blocking queue non-empty : perform the first blocked putMVar
1479  *                                            from the queue, and wake up the
1480  *                                            thread (MVar is now full again)
1481  *
1482  * putMVar is just the dual of the above algorithm.
1483  *
1484  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1485  * the stack of the thread waiting to do the putMVar.  See
1486  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1487  * the stack layout, and the PerformPut and PerformTake macros below.
1488  *
1489  * It is important that a blocked take or put is woken up with the
1490  * take/put already performed, because otherwise there would be a
1491  * small window of vulnerability where the thread could receive an
1492  * exception and never perform its take or put, and we'd end up with a
1493  * deadlock.
1494  *
1495  * -------------------------------------------------------------------------- */
1496
1497 isEmptyMVarzh_fast
1498 {
1499     /* args: R1 = MVar closure */
1500
1501     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1502         RET_N(1);
1503     } else {
1504         RET_N(0);
1505     }
1506 }
1507
1508 newMVarzh_fast
1509 {
1510     /* args: none */
1511     W_ mvar;
1512
1513     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1514   
1515     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1516     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1517         // MVARs start dirty: generation 0 has no mutable list
1518     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1519     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1520     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1521     RET_P(mvar);
1522 }
1523
1524
1525 /* If R1 isn't available, pass it on the stack */
1526 #ifdef REG_R1
1527 #define PerformTake(tso, value)                         \
1528     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1529     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1530 #else
1531 #define PerformTake(tso, value)                                 \
1532     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1533     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1534 #endif
1535
1536 #define PerformPut(tso,lval)                    \
1537     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1538     lval = W_[StgTSO_sp(tso) - WDS(1)];
1539
1540 takeMVarzh_fast
1541 {
1542     W_ mvar, val, info, tso;
1543
1544     /* args: R1 = MVar closure */
1545     mvar = R1;
1546
1547 #if defined(THREADED_RTS)
1548     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1549 #else
1550     info = GET_INFO(mvar);
1551 #endif
1552         
1553     if (info == stg_MVAR_CLEAN_info) {
1554         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1555     }
1556
1557     /* If the MVar is empty, put ourselves on its blocking queue,
1558      * and wait until we're woken up.
1559      */
1560     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1561         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1562             StgMVar_head(mvar) = CurrentTSO;
1563         } else {
1564             foreign "C" setTSOLink(MyCapability() "ptr", StgMVar_tail(mvar),
1565                                    CurrentTSO);
1566         }
1567         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1568         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1569         StgTSO_block_info(CurrentTSO)  = mvar;
1570         StgMVar_tail(mvar) = CurrentTSO;
1571         
1572         jump stg_block_takemvar;
1573   }
1574
1575   /* we got the value... */
1576   val = StgMVar_value(mvar);
1577
1578   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1579   {
1580       /* There are putMVar(s) waiting... 
1581        * wake up the first thread on the queue
1582        */
1583       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1584
1585       /* actually perform the putMVar for the thread that we just woke up */
1586       tso = StgMVar_head(mvar);
1587       PerformPut(tso,StgMVar_value(mvar));
1588
1589       if (StgTSO_flags(tso) & TSO_DIRTY == 0) {
1590           foreign "C" dirty_TSO(MyCapability(), tso);
1591       }
1592
1593 #if defined(GRAN) || defined(PAR)
1594       /* ToDo: check 2nd arg (mvar) is right */
1595       ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1596       StgMVar_head(mvar) = tso;
1597 #else
1598       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1599                                             StgMVar_head(mvar) "ptr", 1) [];
1600       StgMVar_head(mvar) = tso;
1601 #endif
1602
1603       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1604           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1605       }
1606
1607 #if defined(THREADED_RTS)
1608       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1609 #else
1610       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1611 #endif
1612       RET_P(val);
1613   } 
1614   else
1615   {
1616       /* No further putMVars, MVar is now empty */
1617       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1618  
1619 #if defined(THREADED_RTS)
1620       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1621 #else
1622       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1623 #endif
1624
1625       RET_P(val);
1626   }
1627 }
1628
1629
1630 tryTakeMVarzh_fast
1631 {
1632     W_ mvar, val, info, tso;
1633
1634     /* args: R1 = MVar closure */
1635
1636     mvar = R1;
1637
1638 #if defined(THREADED_RTS)
1639     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1640 #else
1641     info = GET_INFO(mvar);
1642 #endif
1643
1644     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1645 #if defined(THREADED_RTS)
1646         unlockClosure(mvar, info);
1647 #endif
1648         /* HACK: we need a pointer to pass back, 
1649          * so we abuse NO_FINALIZER_closure
1650          */
1651         RET_NP(0, stg_NO_FINALIZER_closure);
1652     }
1653
1654     if (info == stg_MVAR_CLEAN_info) {
1655         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1656     }
1657
1658     /* we got the value... */
1659     val = StgMVar_value(mvar);
1660
1661     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1662
1663         /* There are putMVar(s) waiting... 
1664          * wake up the first thread on the queue
1665          */
1666         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1667
1668         /* actually perform the putMVar for the thread that we just woke up */
1669         tso = StgMVar_head(mvar);
1670         PerformPut(tso,StgMVar_value(mvar));
1671         if (StgTSO_flags(tso) & TSO_DIRTY == 0) {
1672             foreign "C" dirty_TSO(MyCapability(), tso);
1673         }
1674
1675 #if defined(GRAN) || defined(PAR)
1676         /* ToDo: check 2nd arg (mvar) is right */
1677         ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1678         StgMVar_head(mvar) = tso;
1679 #else
1680         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1681                                               StgMVar_head(mvar) "ptr", 1) [];
1682         StgMVar_head(mvar) = tso;
1683 #endif
1684
1685         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1686             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1687         }
1688 #if defined(THREADED_RTS)
1689         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1690 #else
1691         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1692 #endif
1693     }
1694     else 
1695     {
1696         /* No further putMVars, MVar is now empty */
1697         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1698 #if defined(THREADED_RTS)
1699         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1700 #else
1701         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1702 #endif
1703     }
1704     
1705     RET_NP(1, val);
1706 }
1707
1708
1709 putMVarzh_fast
1710 {
1711     W_ mvar, info, tso;
1712
1713     /* args: R1 = MVar, R2 = value */
1714     mvar = R1;
1715
1716 #if defined(THREADED_RTS)
1717     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1718 #else
1719     info = GET_INFO(mvar);
1720 #endif
1721
1722     if (info == stg_MVAR_CLEAN_info) {
1723         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1724     }
1725
1726     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1727         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1728             StgMVar_head(mvar) = CurrentTSO;
1729         } else {
1730             foreign "C" setTSOLink(MyCapability() "ptr", StgMVar_tail(mvar),
1731                                    CurrentTSO);
1732         }
1733         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1734         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1735         StgTSO_block_info(CurrentTSO)  = mvar;
1736         StgMVar_tail(mvar) = CurrentTSO;
1737         
1738         jump stg_block_putmvar;
1739     }
1740   
1741     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1742
1743         /* There are takeMVar(s) waiting: wake up the first one
1744          */
1745         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1746
1747         /* actually perform the takeMVar */
1748         tso = StgMVar_head(mvar);
1749         PerformTake(tso, R2);
1750         if (StgTSO_flags(tso) & TSO_DIRTY == 0) {
1751             foreign "C" dirty_TSO(MyCapability(), tso);
1752         }
1753       
1754 #if defined(GRAN) || defined(PAR)
1755         /* ToDo: check 2nd arg (mvar) is right */
1756         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1757         StgMVar_head(mvar) = tso;
1758 #else
1759         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1760                                               StgMVar_head(mvar) "ptr", 1) [];
1761         StgMVar_head(mvar) = tso;
1762 #endif
1763
1764         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1765             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1766         }
1767
1768 #if defined(THREADED_RTS)
1769         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1770 #else
1771         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1772 #endif
1773         jump %ENTRY_CODE(Sp(0));
1774     }
1775     else
1776     {
1777         /* No further takes, the MVar is now full. */
1778         StgMVar_value(mvar) = R2;
1779
1780 #if defined(THREADED_RTS)
1781         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1782 #else
1783         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1784 #endif
1785         jump %ENTRY_CODE(Sp(0));
1786     }
1787     
1788     /* ToDo: yield afterward for better communication performance? */
1789 }
1790
1791
1792 tryPutMVarzh_fast
1793 {
1794     W_ mvar, info, tso;
1795
1796     /* args: R1 = MVar, R2 = value */
1797     mvar = R1;
1798
1799 #if defined(THREADED_RTS)
1800     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1801 #else
1802     info = GET_INFO(mvar);
1803 #endif
1804
1805     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1806 #if defined(THREADED_RTS)
1807         unlockClosure(mvar, info);
1808 #endif
1809         RET_N(0);
1810     }
1811   
1812     if (info == stg_MVAR_CLEAN_info) {
1813         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1814     }
1815
1816     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1817
1818         /* There are takeMVar(s) waiting: wake up the first one
1819          */
1820         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1821         
1822         /* actually perform the takeMVar */
1823         tso = StgMVar_head(mvar);
1824         PerformTake(tso, R2);
1825         if (StgTSO_flags(tso) & TSO_DIRTY == 0) {
1826             foreign "C" dirty_TSO(MyCapability(), tso);
1827         }
1828       
1829 #if defined(GRAN) || defined(PAR)
1830         /* ToDo: check 2nd arg (mvar) is right */
1831         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1832         StgMVar_head(mvar) = tso;
1833 #else
1834         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1835                                               StgMVar_head(mvar) "ptr", 1) [];
1836         StgMVar_head(mvar) = tso;
1837 #endif
1838
1839         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1840             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1841         }
1842
1843 #if defined(THREADED_RTS)
1844         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1845 #else
1846         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1847 #endif
1848     }
1849     else
1850     {
1851         /* No further takes, the MVar is now full. */
1852         StgMVar_value(mvar) = R2;
1853
1854 #if defined(THREADED_RTS)
1855         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1856 #else
1857         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1858 #endif
1859     }
1860     
1861     RET_N(1);
1862     /* ToDo: yield afterward for better communication performance? */
1863 }
1864
1865
1866 /* -----------------------------------------------------------------------------
1867    Stable pointer primitives
1868    -------------------------------------------------------------------------  */
1869
1870 makeStableNamezh_fast
1871 {
1872     W_ index, sn_obj;
1873
1874     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1875   
1876     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1877
1878     /* Is there already a StableName for this heap object?
1879      *  stable_ptr_table is a pointer to an array of snEntry structs.
1880      */
1881     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1882         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1883         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1884         StgStableName_sn(sn_obj) = index;
1885         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1886     } else {
1887         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1888     }
1889     
1890     RET_P(sn_obj);
1891 }
1892
1893
1894 makeStablePtrzh_fast
1895 {
1896     /* Args: R1 = a */
1897     W_ sp;
1898     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1899     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1900     RET_N(sp);
1901 }
1902
1903 deRefStablePtrzh_fast
1904 {
1905     /* Args: R1 = the stable ptr */
1906     W_ r, sp;
1907     sp = R1;
1908     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1909     RET_P(r);
1910 }
1911
1912 /* -----------------------------------------------------------------------------
1913    Bytecode object primitives
1914    -------------------------------------------------------------------------  */
1915
1916 newBCOzh_fast
1917 {
1918     /* R1 = instrs
1919        R2 = literals
1920        R3 = ptrs
1921        R4 = arity
1922        R5 = bitmap array
1923     */
1924     W_ bco, bitmap_arr, bytes, words;
1925     
1926     bitmap_arr = R5;
1927
1928     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1929     bytes = WDS(words);
1930
1931     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1932
1933     bco = Hp - bytes + WDS(1);
1934     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1935     
1936     StgBCO_instrs(bco)     = R1;
1937     StgBCO_literals(bco)   = R2;
1938     StgBCO_ptrs(bco)       = R3;
1939     StgBCO_arity(bco)      = HALF_W_(R4);
1940     StgBCO_size(bco)       = HALF_W_(words);
1941     
1942     // Copy the arity/bitmap info into the BCO
1943     W_ i;
1944     i = 0;
1945 for:
1946     if (i < StgArrWords_words(bitmap_arr)) {
1947         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1948         i = i + 1;
1949         goto for;
1950     }
1951     
1952     RET_P(bco);
1953 }
1954
1955
1956 mkApUpd0zh_fast
1957 {
1958     // R1 = the BCO# for the AP
1959     //  
1960     W_ ap;
1961
1962     // This function is *only* used to wrap zero-arity BCOs in an
1963     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1964     // saturated and always points directly to a FUN or BCO.
1965     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1966            StgBCO_arity(R1) == HALF_W_(0));
1967
1968     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1969     TICK_ALLOC_UP_THK(0, 0);
1970     CCCS_ALLOC(SIZEOF_StgAP);
1971
1972     ap = Hp - SIZEOF_StgAP + WDS(1);
1973     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1974     
1975     StgAP_n_args(ap) = HALF_W_(0);
1976     StgAP_fun(ap) = R1;
1977     
1978     RET_P(ap);
1979 }
1980
1981 unpackClosurezh_fast
1982 {
1983 /* args: R1 = closure to analyze */
1984 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1985
1986     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1987     info  = %GET_STD_INFO(UNTAG(R1));
1988
1989     // Some closures have non-standard layout, so we omit those here.
1990     W_ type;
1991     type = TO_W_(%INFO_TYPE(info));
1992     switch [0 .. N_CLOSURE_TYPES] type {
1993     case THUNK_SELECTOR : {
1994         ptrs = 1;
1995         nptrs = 0;
1996         goto out;
1997     }
1998     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1999          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
2000         ptrs = 0;
2001         nptrs = 0;
2002         goto out;
2003     }
2004     default: {
2005         ptrs  = TO_W_(%INFO_PTRS(info)); 
2006         nptrs = TO_W_(%INFO_NPTRS(info));
2007         goto out;
2008     }}
2009 out:
2010
2011     W_ ptrs_arr_sz, nptrs_arr_sz;
2012     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
2013     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
2014
2015     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2016
2017     W_ clos;
2018     clos = UNTAG(R1);
2019
2020     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2021     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2022
2023     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2024     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2025     p = 0;
2026 for:
2027     if(p < ptrs) {
2028          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2029          p = p + 1;
2030          goto for;
2031     }
2032     
2033     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2034     StgArrWords_words(nptrs_arr) = nptrs;
2035     p = 0;
2036 for2:
2037     if(p < nptrs) {
2038          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2039          p = p + 1;
2040          goto for2;
2041     }
2042     RET_NPP(info, ptrs_arr, nptrs_arr);
2043 }
2044
2045 /* -----------------------------------------------------------------------------
2046    Thread I/O blocking primitives
2047    -------------------------------------------------------------------------- */
2048
2049 /* Add a thread to the end of the blocked queue. (C-- version of the C
2050  * macro in Schedule.h).
2051  */
2052 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2053     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2054     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2055       W_[blocked_queue_hd] = tso;                       \
2056     } else {                                            \
2057       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl], tso); \
2058     }                                                   \
2059     W_[blocked_queue_tl] = tso;
2060
2061 waitReadzh_fast
2062 {
2063     /* args: R1 */
2064 #ifdef THREADED_RTS
2065     foreign "C" barf("waitRead# on threaded RTS") never returns;
2066 #else
2067
2068     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2069     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2070     StgTSO_block_info(CurrentTSO) = R1;
2071     // No locking - we're not going to use this interface in the
2072     // threaded RTS anyway.
2073     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2074     jump stg_block_noregs;
2075 #endif
2076 }
2077
2078 waitWritezh_fast
2079 {
2080     /* args: R1 */
2081 #ifdef THREADED_RTS
2082     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2083 #else
2084
2085     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2086     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2087     StgTSO_block_info(CurrentTSO) = R1;
2088     // No locking - we're not going to use this interface in the
2089     // threaded RTS anyway.
2090     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2091     jump stg_block_noregs;
2092 #endif
2093 }
2094
2095
2096 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2097 delayzh_fast
2098 {
2099 #ifdef mingw32_HOST_OS
2100     W_ ares;
2101     CInt reqID;
2102 #else
2103     W_ t, prev, target;
2104 #endif
2105
2106 #ifdef THREADED_RTS
2107     foreign "C" barf("delay# on threaded RTS") never returns;
2108 #else
2109
2110     /* args: R1 (microsecond delay amount) */
2111     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2112     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2113
2114 #ifdef mingw32_HOST_OS
2115
2116     /* could probably allocate this on the heap instead */
2117     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2118                                             stg_delayzh_malloc_str);
2119     (reqID) = foreign "C" addDelayRequest(R1);
2120     StgAsyncIOResult_reqID(ares)   = reqID;
2121     StgAsyncIOResult_len(ares)     = 0;
2122     StgAsyncIOResult_errCode(ares) = 0;
2123     StgTSO_block_info(CurrentTSO)  = ares;
2124
2125     /* Having all async-blocked threads reside on the blocked_queue
2126      * simplifies matters, so change the status to OnDoProc put the
2127      * delayed thread on the blocked_queue.
2128      */
2129     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2130     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2131     jump stg_block_async_void;
2132
2133 #else
2134
2135     W_ time;
2136     W_ divisor;
2137     (time) = foreign "C" getourtimeofday() [R1];
2138     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2139     if (divisor == 0) {
2140         divisor = 50;
2141     }
2142     divisor = divisor * 1000;
2143     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2144            + time + 1; /* Add 1 as getourtimeofday rounds down */
2145     StgTSO_block_info(CurrentTSO) = target;
2146
2147     /* Insert the new thread in the sleeping queue. */
2148     prev = NULL;
2149     t = W_[sleeping_queue];
2150 while:
2151     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2152         prev = t;
2153         t = StgTSO__link(t);
2154         goto while;
2155     }
2156
2157     StgTSO__link(CurrentTSO) = t;
2158     if (prev == NULL) {
2159         W_[sleeping_queue] = CurrentTSO;
2160     } else {
2161         foreign "C" setTSOLink(MyCapability() "ptr", prev, CurrentTSO) [];
2162     }
2163     jump stg_block_noregs;
2164 #endif
2165 #endif /* !THREADED_RTS */
2166 }
2167
2168
2169 #ifdef mingw32_HOST_OS
2170 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2171 asyncReadzh_fast
2172 {
2173     W_ ares;
2174     CInt reqID;
2175
2176 #ifdef THREADED_RTS
2177     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2178 #else
2179
2180     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2181     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2182     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2183
2184     /* could probably allocate this on the heap instead */
2185     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2186                                             stg_asyncReadzh_malloc_str)
2187                         [R1,R2,R3,R4];
2188     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2189     StgAsyncIOResult_reqID(ares)   = reqID;
2190     StgAsyncIOResult_len(ares)     = 0;
2191     StgAsyncIOResult_errCode(ares) = 0;
2192     StgTSO_block_info(CurrentTSO)  = ares;
2193     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2194     jump stg_block_async;
2195 #endif
2196 }
2197
2198 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2199 asyncWritezh_fast
2200 {
2201     W_ ares;
2202     CInt reqID;
2203
2204 #ifdef THREADED_RTS
2205     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2206 #else
2207
2208     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2209     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2210     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2211
2212     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2213                                             stg_asyncWritezh_malloc_str)
2214                         [R1,R2,R3,R4];
2215     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2216
2217     StgAsyncIOResult_reqID(ares)   = reqID;
2218     StgAsyncIOResult_len(ares)     = 0;
2219     StgAsyncIOResult_errCode(ares) = 0;
2220     StgTSO_block_info(CurrentTSO)  = ares;
2221     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2222     jump stg_block_async;
2223 #endif
2224 }
2225
2226 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2227 asyncDoProczh_fast
2228 {
2229     W_ ares;
2230     CInt reqID;
2231
2232 #ifdef THREADED_RTS
2233     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2234 #else
2235
2236     /* args: R1 = proc, R2 = param */
2237     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2238     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2239
2240     /* could probably allocate this on the heap instead */
2241     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2242                                             stg_asyncDoProczh_malloc_str) 
2243                                 [R1,R2];
2244     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2245     StgAsyncIOResult_reqID(ares)   = reqID;
2246     StgAsyncIOResult_len(ares)     = 0;
2247     StgAsyncIOResult_errCode(ares) = 0;
2248     StgTSO_block_info(CurrentTSO) = ares;
2249     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2250     jump stg_block_async;
2251 #endif
2252 }
2253 #endif
2254
2255 // noDuplicate# tries to ensure that none of the thunks under
2256 // evaluation by the current thread are also under evaluation by
2257 // another thread.  It relies on *both* threads doing noDuplicate#;
2258 // the second one will get blocked if they are duplicating some work.
2259 noDuplicatezh_fast
2260 {
2261     SAVE_THREAD_STATE();
2262     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2263     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2264     
2265     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2266         jump stg_threadFinished;
2267     } else {
2268         LOAD_THREAD_STATE();
2269         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2270         jump %ENTRY_CODE(Sp(0));
2271     }
2272 }
2273
2274 getApStackValzh_fast
2275 {
2276    W_ ap_stack, offset, val, ok;
2277
2278    /* args: R1 = AP_STACK, R2 = offset */
2279    ap_stack = R1;
2280    offset   = R2;
2281
2282    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2283         ok = 1;
2284         val = StgAP_STACK_payload(ap_stack,offset); 
2285    } else {
2286         ok = 0;
2287         val = R1;
2288    }
2289    RET_NP(ok,val);
2290 }