add threadStatus# primop, for querying the status of a ThreadId#
[ghc-hetmet.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 #ifdef __PIC__
31 #ifndef mingw32_HOST_OS
32 import __gmpz_init;
33 import __gmpz_add;
34 import __gmpz_sub;
35 import __gmpz_mul;
36 import __gmpz_gcd;
37 import __gmpn_gcd_1;
38 import __gmpn_cmp;
39 import __gmpz_tdiv_q;
40 import __gmpz_tdiv_r;
41 import __gmpz_tdiv_qr;
42 import __gmpz_fdiv_qr;
43 import __gmpz_divexact;
44 import __gmpz_and;
45 import __gmpz_xor;
46 import __gmpz_ior;
47 import __gmpz_com;
48 #endif
49 import pthread_mutex_lock;
50 import pthread_mutex_unlock;
51 #endif
52 import base_GHCziIOBase_NestedAtomically_closure;
53 import EnterCriticalSection;
54 import LeaveCriticalSection;
55
56 /*-----------------------------------------------------------------------------
57   Array Primitives
58
59   Basically just new*Array - the others are all inline macros.
60
61   The size arg is always passed in R1, and the result returned in R1.
62
63   The slow entry point is for returning from a heap check, the saved
64   size argument must be re-loaded from the stack.
65   -------------------------------------------------------------------------- */
66
67 /* for objects that are *less* than the size of a word, make sure we
68  * round up to the nearest word for the size of the array.
69  */
70
71 newByteArrayzh_fast
72 {
73     W_ words, payload_words, n, p;
74     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
75     n = R1;
76     payload_words = ROUNDUP_BYTES_TO_WDS(n);
77     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
78     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
79     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
80     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
81     StgArrWords_words(p) = payload_words;
82     RET_P(p);
83 }
84
85 newPinnedByteArrayzh_fast
86 {
87     W_ words, payload_words, n, p;
88
89     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
90     n = R1;
91     payload_words = ROUNDUP_BYTES_TO_WDS(n);
92
93     // We want an 8-byte aligned array.  allocatePinned() gives us
94     // 8-byte aligned memory by default, but we want to align the
95     // *goods* inside the ArrWords object, so we have to check the
96     // size of the ArrWords header and adjust our size accordingly.
97     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
98     if ((SIZEOF_StgArrWords & 7) != 0) {
99         words = words + 1;
100     }
101
102     ("ptr" p) = foreign "C" allocatePinned(words) [];
103     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
104
105     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
106     // have to push the object forward one word so that the goods
107     // fall on an 8-byte boundary.
108     if ((SIZEOF_StgArrWords & 7) != 0) {
109         p = p + WDS(1);
110     }
111
112     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
113     StgArrWords_words(p) = payload_words;
114     RET_P(p);
115 }
116
117 newArrayzh_fast
118 {
119     W_ words, n, init, arr, p;
120     /* Args: R1 = words, R2 = initialisation value */
121
122     n = R1;
123     MAYBE_GC(R2_PTR,newArrayzh_fast);
124
125     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
126     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
127     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
128
129     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
130     StgMutArrPtrs_ptrs(arr) = n;
131
132     // Initialise all elements of the the array with the value in R2
133     init = R2;
134     p = arr + SIZEOF_StgMutArrPtrs;
135   for:
136     if (p < arr + WDS(words)) {
137         W_[p] = init;
138         p = p + WDS(1);
139         goto for;
140     }
141
142     RET_P(arr);
143 }
144
145 unsafeThawArrayzh_fast
146 {
147   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
148   //
149   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
150   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
151   // it on the mutable list for the GC to remove (removing something from
152   // the mutable list is not easy, because the mut_list is only singly-linked).
153   // 
154   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
155   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
156   // to indicate that it is still on the mutable list.
157   //
158   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
159   // either it is on a mut_list, or it isn't.  We adopt the convention that
160   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
161   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
162   // we put it on the mutable list more than once, but it would get scavenged
163   // multiple times during GC, which would be unnecessarily slow.
164   //
165   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
166         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
167         recordMutable(R1, R1);
168         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
169         RET_P(R1);
170   } else {
171         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
172         RET_P(R1);
173   }
174 }
175
176 /* -----------------------------------------------------------------------------
177    MutVar primitives
178    -------------------------------------------------------------------------- */
179
180 newMutVarzh_fast
181 {
182     W_ mv;
183     /* Args: R1 = initialisation value */
184
185     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
186
187     mv = Hp - SIZEOF_StgMutVar + WDS(1);
188     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
189     StgMutVar_var(mv) = R1;
190     
191     RET_P(mv);
192 }
193
194 atomicModifyMutVarzh_fast
195 {
196     W_ mv, z, x, y, r;
197     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
198
199     /* If x is the current contents of the MutVar#, then 
200        We want to make the new contents point to
201
202          (sel_0 (f x))
203  
204        and the return value is
205          
206          (sel_1 (f x))
207
208         obviously we can share (f x).
209
210          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
211          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
212          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
213     */
214
215 #if MIN_UPD_SIZE > 1
216 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
217 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
218 #else
219 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
220 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
221 #endif
222
223 #if MIN_UPD_SIZE > 2
224 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
225 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
226 #else
227 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
228 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
229 #endif
230
231 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
232
233    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
234
235 #if defined(THREADED_RTS)
236     ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
237 #endif
238
239    x = StgMutVar_var(R1);
240
241    TICK_ALLOC_THUNK_2();
242    CCCS_ALLOC(THUNK_2_SIZE);
243    z = Hp - THUNK_2_SIZE + WDS(1);
244    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
245    LDV_RECORD_CREATE(z);
246    StgThunk_payload(z,0) = R2;
247    StgThunk_payload(z,1) = x;
248
249    TICK_ALLOC_THUNK_1();
250    CCCS_ALLOC(THUNK_1_SIZE);
251    y = z - THUNK_1_SIZE;
252    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
253    LDV_RECORD_CREATE(y);
254    StgThunk_payload(y,0) = z;
255
256    StgMutVar_var(R1) = y;
257    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
258
259    TICK_ALLOC_THUNK_1();
260    CCCS_ALLOC(THUNK_1_SIZE);
261    r = y - THUNK_1_SIZE;
262    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
263    LDV_RECORD_CREATE(r);
264    StgThunk_payload(r,0) = z;
265
266 #if defined(THREADED_RTS)
267     RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
268 #endif
269
270    RET_P(r);
271 }
272
273 /* -----------------------------------------------------------------------------
274    Weak Pointer Primitives
275    -------------------------------------------------------------------------- */
276
277 STRING(stg_weak_msg,"New weak pointer at %p\n")
278
279 mkWeakzh_fast
280 {
281   /* R1 = key
282      R2 = value
283      R3 = finalizer (or NULL)
284   */
285   W_ w;
286
287   if (R3 == NULL) {
288     R3 = stg_NO_FINALIZER_closure;
289   }
290
291   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
292
293   w = Hp - SIZEOF_StgWeak + WDS(1);
294   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
295
296   StgWeak_key(w)       = R1;
297   StgWeak_value(w)     = R2;
298   StgWeak_finalizer(w) = R3;
299
300   StgWeak_link(w)       = W_[weak_ptr_list];
301   W_[weak_ptr_list]     = w;
302
303   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
304
305   RET_P(w);
306 }
307
308
309 finalizzeWeakzh_fast
310 {
311   /* R1 = weak ptr
312    */
313   W_ w, f;
314
315   w = R1;
316
317   // already dead?
318   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
319       RET_NP(0,stg_NO_FINALIZER_closure);
320   }
321
322   // kill it
323 #ifdef PROFILING
324   // @LDV profiling
325   // A weak pointer is inherently used, so we do not need to call
326   // LDV_recordDead_FILL_SLOP_DYNAMIC():
327   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
328   // or, LDV_recordDead():
329   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
330   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
331   // large as weak pointers, so there is no need to fill the slop, either.
332   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
333 #endif
334
335   //
336   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
337   //
338   SET_INFO(w,stg_DEAD_WEAK_info);
339   LDV_RECORD_CREATE(w);
340
341   f = StgWeak_finalizer(w);
342   StgDeadWeak_link(w) = StgWeak_link(w);
343
344   /* return the finalizer */
345   if (f == stg_NO_FINALIZER_closure) {
346       RET_NP(0,stg_NO_FINALIZER_closure);
347   } else {
348       RET_NP(1,f);
349   }
350 }
351
352 deRefWeakzh_fast
353 {
354   /* R1 = weak ptr */
355   W_ w, code, val;
356
357   w = R1;
358   if (GET_INFO(w) == stg_WEAK_info) {
359     code = 1;
360     val = StgWeak_value(w);
361   } else {
362     code = 0;
363     val = w;
364   }
365   RET_NP(code,val);
366 }
367
368 /* -----------------------------------------------------------------------------
369    Arbitrary-precision Integer operations.
370
371    There are some assumptions in this code that mp_limb_t == W_.  This is
372    the case for all the platforms that GHC supports, currently.
373    -------------------------------------------------------------------------- */
374
375 int2Integerzh_fast
376 {
377    /* arguments: R1 = Int# */
378
379    W_ val, s, p;        /* to avoid aliasing */
380
381    val = R1;
382    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
383
384    p = Hp - SIZEOF_StgArrWords;
385    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
386    StgArrWords_words(p) = 1;
387
388    /* mpz_set_si is inlined here, makes things simpler */
389    if (%lt(val,0)) { 
390         s  = -1;
391         Hp(0) = -val;
392    } else { 
393      if (%gt(val,0)) {
394         s = 1;
395         Hp(0) = val;
396      } else {
397         s = 0;
398      }
399   }
400
401    /* returns (# size  :: Int#, 
402                  data  :: ByteArray# 
403                #)
404    */
405    RET_NP(s,p);
406 }
407
408 word2Integerzh_fast
409 {
410    /* arguments: R1 = Word# */
411
412    W_ val, s, p;        /* to avoid aliasing */
413
414    val = R1;
415
416    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
417
418    p = Hp - SIZEOF_StgArrWords;
419    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
420    StgArrWords_words(p) = 1;
421
422    if (val != 0) {
423         s = 1;
424         W_[Hp] = val;
425    } else {
426         s = 0;
427    }
428
429    /* returns (# size  :: Int#, 
430                  data  :: ByteArray# #)
431    */
432    RET_NP(s,p);
433 }
434
435
436 /*
437  * 'long long' primops for converting to/from Integers.
438  */
439
440 #ifdef SUPPORT_LONG_LONGS
441
442 int64ToIntegerzh_fast
443 {
444    /* arguments: L1 = Int64# */
445
446    L_ val;
447    W_ hi, lo, s, neg, words_needed, p;
448
449    val = L1;
450    neg = 0;
451
452    hi = TO_W_(val >> 32);
453    lo = TO_W_(val);
454
455    if ( hi == 0 || (hi == 0xFFFFFFFF && lo != 0) )  {
456        // minimum is one word
457        words_needed = 1;
458    } else { 
459        words_needed = 2;
460    }
461
462    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
463                NO_PTRS, int64ToIntegerzh_fast );
464
465    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
466    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
467    StgArrWords_words(p) = words_needed;
468
469    if ( %lt(hi,0) ) {
470      neg = 1;
471      lo = -lo;
472      if(lo == 0) {
473        hi = -hi;
474      } else {
475        hi = -hi - 1;
476      }
477    }
478
479    if ( words_needed == 2 )  { 
480       s = 2;
481       Hp(-1) = lo;
482       Hp(0) = hi;
483    } else { 
484        if ( lo != 0 ) {
485            s = 1;
486            Hp(0) = lo;
487        } else /* val==0 */  {
488            s = 0;
489        }
490    }
491    if ( neg != 0 ) {
492         s = -s;
493    }
494
495    /* returns (# size  :: Int#, 
496                  data  :: ByteArray# #)
497    */
498    RET_NP(s,p);
499 }
500 word64ToIntegerzh_fast
501 {
502    /* arguments: L1 = Word64# */
503
504    L_ val;
505    W_ hi, lo, s, words_needed, p;
506
507    val = L1;
508    hi = TO_W_(val >> 32);
509    lo = TO_W_(val);
510
511    if ( hi != 0 ) {
512       words_needed = 2;
513    } else {
514       words_needed = 1;
515    }
516
517    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
518                NO_PTRS, word64ToIntegerzh_fast );
519
520    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
521    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
522    StgArrWords_words(p) = words_needed;
523
524    if ( hi != 0 ) { 
525      s = 2;
526      Hp(-1) = lo;
527      Hp(0)  = hi;
528    } else {
529       if ( lo != 0 ) {
530         s = 1;
531         Hp(0) = lo;
532      } else /* val==0 */  {
533       s = 0;
534      }
535   }
536
537    /* returns (# size  :: Int#, 
538                  data  :: ByteArray# #)
539    */
540    RET_NP(s,p);
541 }
542
543
544
545 #endif /* SUPPORT_LONG_LONGS */
546
547 /* ToDo: this is shockingly inefficient */
548
549 #ifndef THREADED_RTS
550 section "bss" {
551   mp_tmp1:
552     bits8 [SIZEOF_MP_INT];
553 }
554
555 section "bss" {
556   mp_tmp2:
557     bits8 [SIZEOF_MP_INT];
558 }
559
560 section "bss" {
561   mp_result1:
562     bits8 [SIZEOF_MP_INT];
563 }
564
565 section "bss" {
566   mp_result2:
567     bits8 [SIZEOF_MP_INT];
568 }
569 #endif
570
571 #ifdef THREADED_RTS
572 #define FETCH_MP_TEMP(X) \
573 W_ X; \
574 X = BaseReg + (OFFSET_StgRegTable_r ## X);
575 #else
576 #define FETCH_MP_TEMP(X) /* Nothing */
577 #endif
578
579 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
580 name                                                                    \
581 {                                                                       \
582   CInt s1, s2;                                                          \
583   W_ d1, d2;                                                            \
584   FETCH_MP_TEMP(mp_tmp1);                                               \
585   FETCH_MP_TEMP(mp_tmp2);                                               \
586   FETCH_MP_TEMP(mp_result1)                                             \
587   FETCH_MP_TEMP(mp_result2);                                            \
588                                                                         \
589   /* call doYouWantToGC() */                                            \
590   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
591                                                                         \
592   s1 = W_TO_INT(R1);                                                    \
593   d1 = R2;                                                              \
594   s2 = W_TO_INT(R3);                                                    \
595   d2 = R4;                                                              \
596                                                                         \
597   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
598   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
599   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
600   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
601   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
602   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
603                                                                         \
604   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
605                                                                         \
606   /* Perform the operation */                                           \
607   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
608                                                                         \
609   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
610          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
611 }
612
613 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
614 name                                                                    \
615 {                                                                       \
616   CInt s1;                                                              \
617   W_ d1;                                                                \
618   FETCH_MP_TEMP(mp_tmp1);                                               \
619   FETCH_MP_TEMP(mp_result1)                                             \
620                                                                         \
621   /* call doYouWantToGC() */                                            \
622   MAYBE_GC(R2_PTR, name);                                               \
623                                                                         \
624   d1 = R2;                                                              \
625   s1 = W_TO_INT(R1);                                                    \
626                                                                         \
627   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
628   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
629   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
630                                                                         \
631   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
632                                                                         \
633   /* Perform the operation */                                           \
634   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
635                                                                         \
636   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
637          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
638 }
639
640 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
641 name                                                                                    \
642 {                                                                                       \
643   CInt s1, s2;                                                                          \
644   W_ d1, d2;                                                                            \
645   FETCH_MP_TEMP(mp_tmp1);                                                               \
646   FETCH_MP_TEMP(mp_tmp2);                                                               \
647   FETCH_MP_TEMP(mp_result1)                                                             \
648   FETCH_MP_TEMP(mp_result2)                                                             \
649                                                                                         \
650   /* call doYouWantToGC() */                                                            \
651   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
652                                                                                         \
653   s1 = W_TO_INT(R1);                                                                    \
654   d1 = R2;                                                                              \
655   s2 = W_TO_INT(R3);                                                                    \
656   d2 = R4;                                                                              \
657                                                                                         \
658   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
659   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
660   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
661   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
662   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
663   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
664                                                                                         \
665   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
666   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
667                                                                                         \
668   /* Perform the operation */                                                           \
669   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
670                                                                                         \
671   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
672            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
673            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
674            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
675 }
676
677 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
678 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
679 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
680 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
681 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
682 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
683 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
684 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
685 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
686 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
687 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
688
689 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
690 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
691
692 #ifndef THREADED_RTS
693 section "bss" {
694   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
695 }
696 #endif
697
698 gcdIntzh_fast
699 {
700     /* R1 = the first Int#; R2 = the second Int# */
701     W_ r; 
702     FETCH_MP_TEMP(mp_tmp_w);
703
704     W_[mp_tmp_w] = R1;
705     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
706
707     R1 = r;
708     /* Result parked in R1, return via info-pointer at TOS */
709     jump %ENTRY_CODE(Sp(0));
710 }
711
712
713 gcdIntegerIntzh_fast
714 {
715     /* R1 = s1; R2 = d1; R3 = the int */
716     W_ s1;
717     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
718     R1 = s1;
719     
720     /* Result parked in R1, return via info-pointer at TOS */
721     jump %ENTRY_CODE(Sp(0));
722 }
723
724
725 cmpIntegerIntzh_fast
726 {
727     /* R1 = s1; R2 = d1; R3 = the int */
728     W_ usize, vsize, v_digit, u_digit;
729
730     usize = R1;
731     vsize = 0;
732     v_digit = R3;
733
734     // paraphrased from __gmpz_cmp_si() in the GMP sources
735     if (%gt(v_digit,0)) {
736         vsize = 1;
737     } else { 
738         if (%lt(v_digit,0)) {
739             vsize = -1;
740             v_digit = -v_digit;
741         }
742     }
743
744     if (usize != vsize) {
745         R1 = usize - vsize; 
746         jump %ENTRY_CODE(Sp(0));
747     }
748
749     if (usize == 0) {
750         R1 = 0; 
751         jump %ENTRY_CODE(Sp(0));
752     }
753
754     u_digit = W_[BYTE_ARR_CTS(R2)];
755
756     if (u_digit == v_digit) {
757         R1 = 0; 
758         jump %ENTRY_CODE(Sp(0));
759     }
760
761     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
762         R1 = usize; 
763     } else {
764         R1 = -usize; 
765     }
766
767     jump %ENTRY_CODE(Sp(0));
768 }
769
770 cmpIntegerzh_fast
771 {
772     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
773     W_ usize, vsize, size, up, vp;
774     CInt cmp;
775
776     // paraphrased from __gmpz_cmp() in the GMP sources
777     usize = R1;
778     vsize = R3;
779
780     if (usize != vsize) {
781         R1 = usize - vsize; 
782         jump %ENTRY_CODE(Sp(0));
783     }
784
785     if (usize == 0) {
786         R1 = 0; 
787         jump %ENTRY_CODE(Sp(0));
788     }
789
790     if (%lt(usize,0)) { // NB. not <, which is unsigned
791         size = -usize;
792     } else {
793         size = usize;
794     }
795
796     up = BYTE_ARR_CTS(R2);
797     vp = BYTE_ARR_CTS(R4);
798
799     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
800
801     if (cmp == 0 :: CInt) {
802         R1 = 0; 
803         jump %ENTRY_CODE(Sp(0));
804     }
805
806     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
807         R1 = 1;
808     } else {
809         R1 = (-1); 
810     }
811     /* Result parked in R1, return via info-pointer at TOS */
812     jump %ENTRY_CODE(Sp(0));
813 }
814
815 integer2Intzh_fast
816 {
817     /* R1 = s; R2 = d */
818     W_ r, s;
819
820     s = R1;
821     if (s == 0) {
822         r = 0;
823     } else {
824         r = W_[R2 + SIZEOF_StgArrWords];
825         if (%lt(s,0)) {
826             r = -r;
827         }
828     }
829     /* Result parked in R1, return via info-pointer at TOS */
830     R1 = r;
831     jump %ENTRY_CODE(Sp(0));
832 }
833
834 integer2Wordzh_fast
835 {
836   /* R1 = s; R2 = d */
837   W_ r, s;
838
839   s = R1;
840   if (s == 0) {
841     r = 0;
842   } else {
843     r = W_[R2 + SIZEOF_StgArrWords];
844     if (%lt(s,0)) {
845         r = -r;
846     }
847   }
848   /* Result parked in R1, return via info-pointer at TOS */
849   R1 = r;
850   jump %ENTRY_CODE(Sp(0));
851 }
852
853 decodeFloatzh_fast
854
855     W_ p;
856     F_ arg;
857     FETCH_MP_TEMP(mp_tmp1);
858     FETCH_MP_TEMP(mp_tmp_w);
859     
860     /* arguments: F1 = Float# */
861     arg = F1;
862     
863     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
864     
865     /* Be prepared to tell Lennart-coded __decodeFloat
866        where mantissa._mp_d can be put (it does not care about the rest) */
867     p = Hp - SIZEOF_StgArrWords;
868     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
869     StgArrWords_words(p) = 1;
870     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
871     
872     /* Perform the operation */
873     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
874     
875     /* returns: (Int# (expn), Int#, ByteArray#) */
876     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
877 }
878
879 decodeFloatzuIntzh_fast
880
881     W_ p;
882     F_ arg;
883     FETCH_MP_TEMP(mp_tmp1);
884     FETCH_MP_TEMP(mp_tmp_w);
885     
886     /* arguments: F1 = Float# */
887     arg = F1;
888     
889     /* Perform the operation */
890     foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) [];
891     
892     /* returns: (Int# (mantissa), Int# (exponent)) */
893     RET_NN(W_[mp_tmp1], W_[mp_tmp_w]);
894 }
895
896 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
897 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
898
899 decodeDoublezh_fast
900
901     D_ arg;
902     W_ p;
903     FETCH_MP_TEMP(mp_tmp1);
904     FETCH_MP_TEMP(mp_tmp_w);
905
906     /* arguments: D1 = Double# */
907     arg = D1;
908
909     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
910     
911     /* Be prepared to tell Lennart-coded __decodeDouble
912        where mantissa.d can be put (it does not care about the rest) */
913     p = Hp - ARR_SIZE + WDS(1);
914     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
915     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
916     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
917
918     /* Perform the operation */
919     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
920     
921     /* returns: (Int# (expn), Int#, ByteArray#) */
922     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
923 }
924
925 decodeDoublezu2Intzh_fast
926
927     D_ arg;
928     W_ p;
929     FETCH_MP_TEMP(mp_tmp1);
930     FETCH_MP_TEMP(mp_tmp2);
931     FETCH_MP_TEMP(mp_result1);
932     FETCH_MP_TEMP(mp_result2);
933
934     /* arguments: D1 = Double# */
935     arg = D1;
936
937     /* Perform the operation */
938     foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr",
939                                     mp_result1 "ptr", mp_result2 "ptr",
940                                     arg) [];
941
942     /* returns:
943        (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */
944     RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]);
945 }
946
947 /* -----------------------------------------------------------------------------
948  * Concurrency primitives
949  * -------------------------------------------------------------------------- */
950
951 forkzh_fast
952 {
953   /* args: R1 = closure to spark */
954
955   MAYBE_GC(R1_PTR, forkzh_fast);
956
957   W_ closure;
958   W_ threadid;
959   closure = R1;
960
961   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
962                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
963                                 closure "ptr") [];
964
965   /* start blocked if the current thread is blocked */
966   StgTSO_flags(threadid) = 
967      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
968                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
969
970   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
971
972   // switch at the earliest opportunity
973   CInt[context_switch] = 1 :: CInt;
974   
975   RET_P(threadid);
976 }
977
978 forkOnzh_fast
979 {
980   /* args: R1 = cpu, R2 = closure to spark */
981
982   MAYBE_GC(R2_PTR, forkOnzh_fast);
983
984   W_ cpu;
985   W_ closure;
986   W_ threadid;
987   cpu = R1;
988   closure = R2;
989
990   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
991                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
992                                 closure "ptr") [];
993
994   /* start blocked if the current thread is blocked */
995   StgTSO_flags(threadid) = 
996      StgTSO_flags(threadid) |  (StgTSO_flags(CurrentTSO) & 
997                                 (TSO_BLOCKEX::I32 | TSO_INTERRUPTIBLE::I32));
998
999   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
1000
1001   // switch at the earliest opportunity
1002   CInt[context_switch] = 1 :: CInt;
1003   
1004   RET_P(threadid);
1005 }
1006
1007 yieldzh_fast
1008 {
1009   jump stg_yield_noregs;
1010 }
1011
1012 myThreadIdzh_fast
1013 {
1014   /* no args. */
1015   RET_P(CurrentTSO);
1016 }
1017
1018 labelThreadzh_fast
1019 {
1020   /* args: 
1021         R1 = ThreadId#
1022         R2 = Addr# */
1023 #ifdef DEBUG
1024   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
1025 #endif
1026   jump %ENTRY_CODE(Sp(0));
1027 }
1028
1029 isCurrentThreadBoundzh_fast
1030 {
1031   /* no args */
1032   W_ r;
1033   (r) = foreign "C" isThreadBound(CurrentTSO) [];
1034   RET_N(r);
1035 }
1036
1037 threadStatuszh_fast
1038 {
1039     /* args: R1 :: ThreadId# */
1040     W_ tso;
1041     W_ why_blocked;
1042     W_ what_next;
1043     W_ ret;
1044
1045     tso = R1;
1046     loop:
1047       if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) {
1048           tso = StgTSO__link(tso);
1049           goto loop;
1050       }
1051
1052     what_next   = TO_W_(StgTSO_what_next(tso));
1053     why_blocked = TO_W_(StgTSO_why_blocked(tso));
1054     // Note: these two reads are not atomic, so they might end up
1055     // being inconsistent.  It doesn't matter, since we
1056     // only return one or the other.  If we wanted to return the
1057     // contents of block_info too, then we'd have to do some synchronisation.
1058
1059     if (what_next == ThreadComplete) {
1060         ret = 16;  // NB. magic, matches up with GHC.Conc.threadStatus
1061     } else {
1062         if (what_next == ThreadKilled) {
1063             ret = 17;
1064         } else {
1065             ret = why_blocked;
1066         }
1067     }
1068     RET_N(ret);
1069 }
1070
1071 /* -----------------------------------------------------------------------------
1072  * TVar primitives
1073  * -------------------------------------------------------------------------- */
1074
1075 #ifdef REG_R1
1076 #define SP_OFF 0
1077 #define IF_NOT_REG_R1(x) 
1078 #else
1079 #define SP_OFF 1
1080 #define IF_NOT_REG_R1(x) x
1081 #endif
1082
1083 // Catch retry frame ------------------------------------------------------------
1084
1085 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
1086 #if defined(PROFILING)
1087   W_ unused1, W_ unused2,
1088 #endif
1089   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1090 {
1091    W_ r, frame, trec, outer;
1092    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1093
1094    frame = Sp;
1095    trec = StgTSO_trec(CurrentTSO);
1096    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1097    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1098    if (r != 0) {
1099      /* Succeeded (either first branch or second branch) */
1100      StgTSO_trec(CurrentTSO) = outer;
1101      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1102      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1103      jump %ENTRY_CODE(Sp(SP_OFF));
1104    } else {
1105      /* Did not commit: re-execute */
1106      W_ new_trec;
1107      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1108      StgTSO_trec(CurrentTSO) = new_trec;
1109      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1110        R1 = StgCatchRetryFrame_alt_code(frame);
1111      } else {
1112        R1 = StgCatchRetryFrame_first_code(frame);
1113      }
1114      jump stg_ap_v_fast;
1115    }
1116 }
1117
1118
1119 // Atomically frame ------------------------------------------------------------
1120
1121 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1122 #if defined(PROFILING)
1123   W_ unused1, W_ unused2,
1124 #endif
1125   "ptr" W_ unused3, "ptr" W_ unused4)
1126 {
1127   W_ frame, trec, valid, next_invariant, q, outer;
1128   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1129
1130   frame = Sp;
1131   trec = StgTSO_trec(CurrentTSO);
1132   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1133
1134   if (outer == NO_TREC) {
1135     /* First time back at the atomically frame -- pick up invariants */
1136     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1137     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1138
1139   } else {
1140     /* Second/subsequent time back at the atomically frame -- abort the
1141      * tx that's checking the invariant and move on to the next one */
1142     StgTSO_trec(CurrentTSO) = outer;
1143     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1144     StgInvariantCheckQueue_my_execution(q) = trec;
1145     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1146     /* Don't free trec -- it's linked from q and will be stashed in the
1147      * invariant if we eventually commit. */
1148     q = StgInvariantCheckQueue_next_queue_entry(q);
1149     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1150     trec = outer;
1151   }
1152
1153   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1154
1155   if (q != END_INVARIANT_CHECK_QUEUE) {
1156     /* We can't commit yet: another invariant to check */
1157     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1158     StgTSO_trec(CurrentTSO) = trec;
1159
1160     next_invariant = StgInvariantCheckQueue_invariant(q);
1161     R1 = StgAtomicInvariant_code(next_invariant);
1162     jump stg_ap_v_fast;
1163
1164   } else {
1165
1166     /* We've got no more invariants to check, try to commit */
1167     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1168     if (valid != 0) {
1169       /* Transaction was valid: commit succeeded */
1170       StgTSO_trec(CurrentTSO) = NO_TREC;
1171       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1172       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1173       jump %ENTRY_CODE(Sp(SP_OFF));
1174     } else {
1175       /* Transaction was not valid: try again */
1176       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1177       StgTSO_trec(CurrentTSO) = trec;
1178       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1179       R1 = StgAtomicallyFrame_code(frame);
1180       jump stg_ap_v_fast;
1181     }
1182   }
1183 }
1184
1185 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1186 #if defined(PROFILING)
1187   W_ unused1, W_ unused2,
1188 #endif
1189   "ptr" W_ unused3, "ptr" W_ unused4)
1190 {
1191   W_ frame, trec, valid;
1192   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1193
1194   frame = Sp;
1195
1196   /* The TSO is currently waiting: should we stop waiting? */
1197   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1198   if (valid != 0) {
1199     /* Previous attempt is still valid: no point trying again yet */
1200           IF_NOT_REG_R1(Sp_adj(-2);
1201                         Sp(1) = stg_NO_FINALIZER_closure;
1202                         Sp(0) = stg_ut_1_0_unreg_info;)
1203     jump stg_block_noregs;
1204   } else {
1205     /* Previous attempt is no longer valid: try again */
1206     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1207     StgTSO_trec(CurrentTSO) = trec;
1208     StgHeader_info(frame) = stg_atomically_frame_info;
1209     R1 = StgAtomicallyFrame_code(frame);
1210     jump stg_ap_v_fast;
1211   }
1212 }
1213
1214 // STM catch frame --------------------------------------------------------------
1215
1216 #ifdef REG_R1
1217 #define SP_OFF 0
1218 #else
1219 #define SP_OFF 1
1220 #endif
1221
1222 /* Catch frames are very similar to update frames, but when entering
1223  * one we just pop the frame off the stack and perform the correct
1224  * kind of return to the activation record underneath us on the stack.
1225  */
1226
1227 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1228 #if defined(PROFILING)
1229   W_ unused1, W_ unused2,
1230 #endif
1231   "ptr" W_ unused3, "ptr" W_ unused4)
1232    {
1233       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1234       W_ r, frame, trec, outer;
1235       frame = Sp;
1236       trec = StgTSO_trec(CurrentTSO);
1237       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1238       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1239       if (r != 0) {
1240         /* Commit succeeded */
1241         StgTSO_trec(CurrentTSO) = outer;
1242         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1243         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1244         jump Sp(SP_OFF);
1245       } else {
1246         /* Commit failed */
1247         W_ new_trec;
1248         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1249         StgTSO_trec(CurrentTSO) = new_trec;
1250         R1 = StgCatchSTMFrame_code(frame);
1251         jump stg_ap_v_fast;
1252       }
1253    }
1254
1255
1256 // Primop definition ------------------------------------------------------------
1257
1258 atomicallyzh_fast
1259 {
1260   W_ frame;
1261   W_ old_trec;
1262   W_ new_trec;
1263   
1264   // stmStartTransaction may allocate
1265   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1266
1267   /* Args: R1 = m :: STM a */
1268   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1269
1270   old_trec = StgTSO_trec(CurrentTSO);
1271
1272   /* Nested transactions are not allowed; raise an exception */
1273   if (old_trec != NO_TREC) {
1274      R1 = base_GHCziIOBase_NestedAtomically_closure;
1275      jump raisezh_fast;
1276   }
1277
1278   /* Set up the atomically frame */
1279   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1280   frame = Sp;
1281
1282   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1283   StgAtomicallyFrame_code(frame) = R1;
1284   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1285
1286   /* Start the memory transcation */
1287   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1288   StgTSO_trec(CurrentTSO) = new_trec;
1289
1290   /* Apply R1 to the realworld token */
1291   jump stg_ap_v_fast;
1292 }
1293
1294
1295 catchSTMzh_fast
1296 {
1297   W_ frame;
1298   
1299   /* Args: R1 :: STM a */
1300   /* Args: R2 :: Exception -> STM a */
1301   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1302
1303   /* Set up the catch frame */
1304   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1305   frame = Sp;
1306
1307   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1308   StgCatchSTMFrame_handler(frame) = R2;
1309   StgCatchSTMFrame_code(frame) = R1;
1310
1311   /* Start a nested transaction to run the body of the try block in */
1312   W_ cur_trec;  
1313   W_ new_trec;
1314   cur_trec = StgTSO_trec(CurrentTSO);
1315   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1316   StgTSO_trec(CurrentTSO) = new_trec;
1317
1318   /* Apply R1 to the realworld token */
1319   jump stg_ap_v_fast;
1320 }
1321
1322
1323 catchRetryzh_fast
1324 {
1325   W_ frame;
1326   W_ new_trec;
1327   W_ trec;
1328
1329   // stmStartTransaction may allocate
1330   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1331
1332   /* Args: R1 :: STM a */
1333   /* Args: R2 :: STM a */
1334   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1335
1336   /* Start a nested transaction within which to run the first code */
1337   trec = StgTSO_trec(CurrentTSO);
1338   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1339   StgTSO_trec(CurrentTSO) = new_trec;
1340
1341   /* Set up the catch-retry frame */
1342   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1343   frame = Sp;
1344   
1345   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1346   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1347   StgCatchRetryFrame_first_code(frame) = R1;
1348   StgCatchRetryFrame_alt_code(frame) = R2;
1349
1350   /* Apply R1 to the realworld token */
1351   jump stg_ap_v_fast;
1352 }
1353
1354
1355 retryzh_fast
1356 {
1357   W_ frame_type;
1358   W_ frame;
1359   W_ trec;
1360   W_ outer;
1361   W_ r;
1362
1363   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1364
1365   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1366 retry_pop_stack:
1367   StgTSO_sp(CurrentTSO) = Sp;
1368   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1369   Sp = StgTSO_sp(CurrentTSO);
1370   frame = Sp;
1371   trec = StgTSO_trec(CurrentTSO);
1372   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1373
1374   if (frame_type == CATCH_RETRY_FRAME) {
1375     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1376     ASSERT(outer != NO_TREC);
1377     // Abort the transaction attempting the current branch
1378     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1379     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1380     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1381       // Retry in the first branch: try the alternative
1382       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1383       StgTSO_trec(CurrentTSO) = trec;
1384       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1385       R1 = StgCatchRetryFrame_alt_code(frame);
1386       jump stg_ap_v_fast;
1387     } else {
1388       // Retry in the alternative code: propagate the retry
1389       StgTSO_trec(CurrentTSO) = outer;
1390       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1391       goto retry_pop_stack;
1392     }
1393   }
1394
1395   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1396   ASSERT(frame_type == ATOMICALLY_FRAME);
1397   if (outer != NO_TREC) {
1398     // We called retry while checking invariants, so abort the current
1399     // invariant check (merging its TVar accesses into the parents read
1400     // set so we'll wait on them)
1401     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1402     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1403     trec = outer;
1404     StgTSO_trec(CurrentTSO) = trec;
1405     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1406   }
1407   ASSERT(outer == NO_TREC);
1408
1409   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1410   if (r != 0) {
1411     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1412     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1413     Sp = frame;
1414     // Fix up the stack in the unregisterised case: the return convention is different.
1415     IF_NOT_REG_R1(Sp_adj(-2); 
1416                   Sp(1) = stg_NO_FINALIZER_closure;
1417                   Sp(0) = stg_ut_1_0_unreg_info;)
1418     R3 = trec; // passing to stmWaitUnblock()
1419     jump stg_block_stmwait;
1420   } else {
1421     // Transaction was not valid: retry immediately
1422     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1423     StgTSO_trec(CurrentTSO) = trec;
1424     R1 = StgAtomicallyFrame_code(frame);
1425     Sp = frame;
1426     jump stg_ap_v_fast;
1427   }
1428 }
1429
1430
1431 checkzh_fast
1432 {
1433   W_ trec, closure;
1434
1435   /* Args: R1 = invariant closure */
1436   MAYBE_GC (R1_PTR, checkzh_fast); 
1437
1438   trec = StgTSO_trec(CurrentTSO);
1439   closure = R1;
1440   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1441                                      trec "ptr",
1442                                      closure "ptr") [];
1443
1444   jump %ENTRY_CODE(Sp(0));
1445 }
1446
1447
1448 newTVarzh_fast
1449 {
1450   W_ tv;
1451   W_ new_value;
1452
1453   /* Args: R1 = initialisation value */
1454
1455   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1456   new_value = R1;
1457   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1458   RET_P(tv);
1459 }
1460
1461
1462 readTVarzh_fast
1463 {
1464   W_ trec;
1465   W_ tvar;
1466   W_ result;
1467
1468   /* Args: R1 = TVar closure */
1469
1470   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1471   trec = StgTSO_trec(CurrentTSO);
1472   tvar = R1;
1473   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1474
1475   RET_P(result);
1476 }
1477
1478
1479 writeTVarzh_fast
1480 {
1481   W_ trec;
1482   W_ tvar;
1483   W_ new_value;
1484   
1485   /* Args: R1 = TVar closure */
1486   /*       R2 = New value    */
1487
1488   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1489   trec = StgTSO_trec(CurrentTSO);
1490   tvar = R1;
1491   new_value = R2;
1492   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1493
1494   jump %ENTRY_CODE(Sp(0));
1495 }
1496
1497
1498 /* -----------------------------------------------------------------------------
1499  * MVar primitives
1500  *
1501  * take & putMVar work as follows.  Firstly, an important invariant:
1502  *
1503  *    If the MVar is full, then the blocking queue contains only
1504  *    threads blocked on putMVar, and if the MVar is empty then the
1505  *    blocking queue contains only threads blocked on takeMVar.
1506  *
1507  * takeMvar:
1508  *    MVar empty : then add ourselves to the blocking queue
1509  *    MVar full  : remove the value from the MVar, and
1510  *                 blocking queue empty     : return
1511  *                 blocking queue non-empty : perform the first blocked putMVar
1512  *                                            from the queue, and wake up the
1513  *                                            thread (MVar is now full again)
1514  *
1515  * putMVar is just the dual of the above algorithm.
1516  *
1517  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1518  * the stack of the thread waiting to do the putMVar.  See
1519  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1520  * the stack layout, and the PerformPut and PerformTake macros below.
1521  *
1522  * It is important that a blocked take or put is woken up with the
1523  * take/put already performed, because otherwise there would be a
1524  * small window of vulnerability where the thread could receive an
1525  * exception and never perform its take or put, and we'd end up with a
1526  * deadlock.
1527  *
1528  * -------------------------------------------------------------------------- */
1529
1530 isEmptyMVarzh_fast
1531 {
1532     /* args: R1 = MVar closure */
1533
1534     if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) {
1535         RET_N(1);
1536     } else {
1537         RET_N(0);
1538     }
1539 }
1540
1541 newMVarzh_fast
1542 {
1543     /* args: none */
1544     W_ mvar;
1545
1546     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1547   
1548     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1549     SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
1550         // MVARs start dirty: generation 0 has no mutable list
1551     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1552     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1553     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1554     RET_P(mvar);
1555 }
1556
1557
1558 /* If R1 isn't available, pass it on the stack */
1559 #ifdef REG_R1
1560 #define PerformTake(tso, value)                         \
1561     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1562     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1563 #else
1564 #define PerformTake(tso, value)                                 \
1565     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1566     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1567 #endif
1568
1569 #define PerformPut(tso,lval)                    \
1570     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1571     lval = W_[StgTSO_sp(tso) - WDS(1)];
1572
1573 takeMVarzh_fast
1574 {
1575     W_ mvar, val, info, tso;
1576
1577     /* args: R1 = MVar closure */
1578     mvar = R1;
1579
1580 #if defined(THREADED_RTS)
1581     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1582 #else
1583     info = GET_INFO(mvar);
1584 #endif
1585         
1586     if (info == stg_MVAR_CLEAN_info) {
1587         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") [];
1588     }
1589
1590     /* If the MVar is empty, put ourselves on its blocking queue,
1591      * and wait until we're woken up.
1592      */
1593     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1594         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1595             StgMVar_head(mvar) = CurrentTSO;
1596         } else {
1597             foreign "C" setTSOLink(MyCapability() "ptr", 
1598                                    StgMVar_tail(mvar) "ptr",
1599                                    CurrentTSO) [];
1600         }
1601         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1602         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1603         StgTSO_block_info(CurrentTSO)  = mvar;
1604         StgMVar_tail(mvar) = CurrentTSO;
1605         
1606         R1 = mvar;
1607         jump stg_block_takemvar;
1608   }
1609
1610   /* we got the value... */
1611   val = StgMVar_value(mvar);
1612
1613   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1614   {
1615       /* There are putMVar(s) waiting... 
1616        * wake up the first thread on the queue
1617        */
1618       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1619
1620       /* actually perform the putMVar for the thread that we just woke up */
1621       tso = StgMVar_head(mvar);
1622       PerformPut(tso,StgMVar_value(mvar));
1623
1624       if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1625           foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1626       }
1627
1628       ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1629                                             StgMVar_head(mvar) "ptr", 1) [];
1630       StgMVar_head(mvar) = tso;
1631
1632       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1633           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1634       }
1635
1636 #if defined(THREADED_RTS)
1637       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1638 #else
1639       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1640 #endif
1641       RET_P(val);
1642   } 
1643   else
1644   {
1645       /* No further putMVars, MVar is now empty */
1646       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1647  
1648 #if defined(THREADED_RTS)
1649       unlockClosure(mvar, stg_MVAR_DIRTY_info);
1650 #else
1651       SET_INFO(mvar,stg_MVAR_DIRTY_info);
1652 #endif
1653
1654       RET_P(val);
1655   }
1656 }
1657
1658
1659 tryTakeMVarzh_fast
1660 {
1661     W_ mvar, val, info, tso;
1662
1663     /* args: R1 = MVar closure */
1664
1665     mvar = R1;
1666
1667 #if defined(THREADED_RTS)
1668     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1669 #else
1670     info = GET_INFO(mvar);
1671 #endif
1672
1673     if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) {
1674 #if defined(THREADED_RTS)
1675         unlockClosure(mvar, info);
1676 #endif
1677         /* HACK: we need a pointer to pass back, 
1678          * so we abuse NO_FINALIZER_closure
1679          */
1680         RET_NP(0, stg_NO_FINALIZER_closure);
1681     }
1682
1683     if (info == stg_MVAR_CLEAN_info) {
1684         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1685     }
1686
1687     /* we got the value... */
1688     val = StgMVar_value(mvar);
1689
1690     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1691
1692         /* There are putMVar(s) waiting... 
1693          * wake up the first thread on the queue
1694          */
1695         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1696
1697         /* actually perform the putMVar for the thread that we just woke up */
1698         tso = StgMVar_head(mvar);
1699         PerformPut(tso,StgMVar_value(mvar));
1700         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1701             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1702         }
1703
1704         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1705                                               StgMVar_head(mvar) "ptr", 1) [];
1706         StgMVar_head(mvar) = tso;
1707
1708         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1709             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1710         }
1711 #if defined(THREADED_RTS)
1712         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1713 #else
1714         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1715 #endif
1716     }
1717     else 
1718     {
1719         /* No further putMVars, MVar is now empty */
1720         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1721 #if defined(THREADED_RTS)
1722         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1723 #else
1724         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1725 #endif
1726     }
1727     
1728     RET_NP(1, val);
1729 }
1730
1731
1732 putMVarzh_fast
1733 {
1734     W_ mvar, val, info, tso;
1735
1736     /* args: R1 = MVar, R2 = value */
1737     mvar = R1;
1738     val  = R2;
1739
1740 #if defined(THREADED_RTS)
1741     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1742 #else
1743     info = GET_INFO(mvar);
1744 #endif
1745
1746     if (info == stg_MVAR_CLEAN_info) {
1747         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1748     }
1749
1750     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1751         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1752             StgMVar_head(mvar) = CurrentTSO;
1753         } else {
1754             foreign "C" setTSOLink(MyCapability() "ptr", 
1755                                    StgMVar_tail(mvar) "ptr",
1756                                    CurrentTSO) [];
1757         }
1758         StgTSO__link(CurrentTSO)       = stg_END_TSO_QUEUE_closure;
1759         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1760         StgTSO_block_info(CurrentTSO)  = mvar;
1761         StgMVar_tail(mvar) = CurrentTSO;
1762         
1763         R1 = mvar;
1764         R2 = val;
1765         jump stg_block_putmvar;
1766     }
1767   
1768     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1769
1770         /* There are takeMVar(s) waiting: wake up the first one
1771          */
1772         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1773
1774         /* actually perform the takeMVar */
1775         tso = StgMVar_head(mvar);
1776         PerformTake(tso, val);
1777         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1778             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1779         }
1780       
1781         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1782                                               StgMVar_head(mvar) "ptr", 1) [];
1783         StgMVar_head(mvar) = tso;
1784
1785         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1786             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1787         }
1788
1789 #if defined(THREADED_RTS)
1790         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1791 #else
1792         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1793 #endif
1794         jump %ENTRY_CODE(Sp(0));
1795     }
1796     else
1797     {
1798         /* No further takes, the MVar is now full. */
1799         StgMVar_value(mvar) = val;
1800
1801 #if defined(THREADED_RTS)
1802         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1803 #else
1804         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1805 #endif
1806         jump %ENTRY_CODE(Sp(0));
1807     }
1808     
1809     /* ToDo: yield afterward for better communication performance? */
1810 }
1811
1812
1813 tryPutMVarzh_fast
1814 {
1815     W_ mvar, info, tso;
1816
1817     /* args: R1 = MVar, R2 = value */
1818     mvar = R1;
1819
1820 #if defined(THREADED_RTS)
1821     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1822 #else
1823     info = GET_INFO(mvar);
1824 #endif
1825
1826     if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) {
1827 #if defined(THREADED_RTS)
1828         unlockClosure(mvar, info);
1829 #endif
1830         RET_N(0);
1831     }
1832   
1833     if (info == stg_MVAR_CLEAN_info) {
1834         foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr");
1835     }
1836
1837     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1838
1839         /* There are takeMVar(s) waiting: wake up the first one
1840          */
1841         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1842         
1843         /* actually perform the takeMVar */
1844         tso = StgMVar_head(mvar);
1845         PerformTake(tso, R2);
1846         if (TO_W_(StgTSO_flags(tso)) & TSO_DIRTY == 0) {
1847             foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") [];
1848         }
1849       
1850         ("ptr" tso) = foreign "C" unblockOne_(MyCapability() "ptr", 
1851                                               StgMVar_head(mvar) "ptr", 1) [];
1852         StgMVar_head(mvar) = tso;
1853
1854         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1855             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1856         }
1857
1858 #if defined(THREADED_RTS)
1859         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1860 #else
1861         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1862 #endif
1863     }
1864     else
1865     {
1866         /* No further takes, the MVar is now full. */
1867         StgMVar_value(mvar) = R2;
1868
1869 #if defined(THREADED_RTS)
1870         unlockClosure(mvar, stg_MVAR_DIRTY_info);
1871 #else
1872         SET_INFO(mvar,stg_MVAR_DIRTY_info);
1873 #endif
1874     }
1875     
1876     RET_N(1);
1877     /* ToDo: yield afterward for better communication performance? */
1878 }
1879
1880
1881 /* -----------------------------------------------------------------------------
1882    Stable pointer primitives
1883    -------------------------------------------------------------------------  */
1884
1885 makeStableNamezh_fast
1886 {
1887     W_ index, sn_obj;
1888
1889     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1890   
1891     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1892
1893     /* Is there already a StableName for this heap object?
1894      *  stable_ptr_table is a pointer to an array of snEntry structs.
1895      */
1896     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1897         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1898         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1899         StgStableName_sn(sn_obj) = index;
1900         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1901     } else {
1902         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1903     }
1904     
1905     RET_P(sn_obj);
1906 }
1907
1908
1909 makeStablePtrzh_fast
1910 {
1911     /* Args: R1 = a */
1912     W_ sp;
1913     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1914     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1915     RET_N(sp);
1916 }
1917
1918 deRefStablePtrzh_fast
1919 {
1920     /* Args: R1 = the stable ptr */
1921     W_ r, sp;
1922     sp = R1;
1923     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1924     RET_P(r);
1925 }
1926
1927 /* -----------------------------------------------------------------------------
1928    Bytecode object primitives
1929    -------------------------------------------------------------------------  */
1930
1931 newBCOzh_fast
1932 {
1933     /* R1 = instrs
1934        R2 = literals
1935        R3 = ptrs
1936        R4 = arity
1937        R5 = bitmap array
1938     */
1939     W_ bco, bitmap_arr, bytes, words;
1940     
1941     bitmap_arr = R5;
1942
1943     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1944     bytes = WDS(words);
1945
1946     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1947
1948     bco = Hp - bytes + WDS(1);
1949     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1950     
1951     StgBCO_instrs(bco)     = R1;
1952     StgBCO_literals(bco)   = R2;
1953     StgBCO_ptrs(bco)       = R3;
1954     StgBCO_arity(bco)      = HALF_W_(R4);
1955     StgBCO_size(bco)       = HALF_W_(words);
1956     
1957     // Copy the arity/bitmap info into the BCO
1958     W_ i;
1959     i = 0;
1960 for:
1961     if (i < StgArrWords_words(bitmap_arr)) {
1962         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1963         i = i + 1;
1964         goto for;
1965     }
1966     
1967     RET_P(bco);
1968 }
1969
1970
1971 mkApUpd0zh_fast
1972 {
1973     // R1 = the BCO# for the AP
1974     //  
1975     W_ ap;
1976
1977     // This function is *only* used to wrap zero-arity BCOs in an
1978     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1979     // saturated and always points directly to a FUN or BCO.
1980     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1981            StgBCO_arity(R1) == HALF_W_(0));
1982
1983     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1984     TICK_ALLOC_UP_THK(0, 0);
1985     CCCS_ALLOC(SIZEOF_StgAP);
1986
1987     ap = Hp - SIZEOF_StgAP + WDS(1);
1988     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1989     
1990     StgAP_n_args(ap) = HALF_W_(0);
1991     StgAP_fun(ap) = R1;
1992     
1993     RET_P(ap);
1994 }
1995
1996 unpackClosurezh_fast
1997 {
1998 /* args: R1 = closure to analyze */
1999 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
2000
2001     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
2002     info  = %GET_STD_INFO(UNTAG(R1));
2003
2004     // Some closures have non-standard layout, so we omit those here.
2005     W_ type;
2006     type = TO_W_(%INFO_TYPE(info));
2007     switch [0 .. N_CLOSURE_TYPES] type {
2008     case THUNK_SELECTOR : {
2009         ptrs = 1;
2010         nptrs = 0;
2011         goto out;
2012     }
2013     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
2014          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
2015         ptrs = 0;
2016         nptrs = 0;
2017         goto out;
2018     }
2019     default: {
2020         ptrs  = TO_W_(%INFO_PTRS(info)); 
2021         nptrs = TO_W_(%INFO_NPTRS(info));
2022         goto out;
2023     }}
2024 out:
2025
2026     W_ ptrs_arr_sz, nptrs_arr_sz;
2027     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
2028     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
2029
2030     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
2031
2032     W_ clos;
2033     clos = UNTAG(R1);
2034
2035     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
2036     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
2037
2038     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
2039     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
2040     p = 0;
2041 for:
2042     if(p < ptrs) {
2043          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
2044          p = p + 1;
2045          goto for;
2046     }
2047     
2048     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
2049     StgArrWords_words(nptrs_arr) = nptrs;
2050     p = 0;
2051 for2:
2052     if(p < nptrs) {
2053          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
2054          p = p + 1;
2055          goto for2;
2056     }
2057     RET_NPP(info, ptrs_arr, nptrs_arr);
2058 }
2059
2060 /* -----------------------------------------------------------------------------
2061    Thread I/O blocking primitives
2062    -------------------------------------------------------------------------- */
2063
2064 /* Add a thread to the end of the blocked queue. (C-- version of the C
2065  * macro in Schedule.h).
2066  */
2067 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
2068     ASSERT(StgTSO__link(tso) == END_TSO_QUEUE);         \
2069     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
2070       W_[blocked_queue_hd] = tso;                       \
2071     } else {                                            \
2072       foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \
2073     }                                                   \
2074     W_[blocked_queue_tl] = tso;
2075
2076 waitReadzh_fast
2077 {
2078     /* args: R1 */
2079 #ifdef THREADED_RTS
2080     foreign "C" barf("waitRead# on threaded RTS") never returns;
2081 #else
2082
2083     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2084     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2085     StgTSO_block_info(CurrentTSO) = R1;
2086     // No locking - we're not going to use this interface in the
2087     // threaded RTS anyway.
2088     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2089     jump stg_block_noregs;
2090 #endif
2091 }
2092
2093 waitWritezh_fast
2094 {
2095     /* args: R1 */
2096 #ifdef THREADED_RTS
2097     foreign "C" barf("waitWrite# on threaded RTS") never returns;
2098 #else
2099
2100     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2101     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2102     StgTSO_block_info(CurrentTSO) = R1;
2103     // No locking - we're not going to use this interface in the
2104     // threaded RTS anyway.
2105     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2106     jump stg_block_noregs;
2107 #endif
2108 }
2109
2110
2111 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2112 delayzh_fast
2113 {
2114 #ifdef mingw32_HOST_OS
2115     W_ ares;
2116     CInt reqID;
2117 #else
2118     W_ t, prev, target;
2119 #endif
2120
2121 #ifdef THREADED_RTS
2122     foreign "C" barf("delay# on threaded RTS") never returns;
2123 #else
2124
2125     /* args: R1 (microsecond delay amount) */
2126     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2127     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2128
2129 #ifdef mingw32_HOST_OS
2130
2131     /* could probably allocate this on the heap instead */
2132     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2133                                             stg_delayzh_malloc_str);
2134     (reqID) = foreign "C" addDelayRequest(R1);
2135     StgAsyncIOResult_reqID(ares)   = reqID;
2136     StgAsyncIOResult_len(ares)     = 0;
2137     StgAsyncIOResult_errCode(ares) = 0;
2138     StgTSO_block_info(CurrentTSO)  = ares;
2139
2140     /* Having all async-blocked threads reside on the blocked_queue
2141      * simplifies matters, so change the status to OnDoProc put the
2142      * delayed thread on the blocked_queue.
2143      */
2144     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2145     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2146     jump stg_block_async_void;
2147
2148 #else
2149
2150     W_ time;
2151     W_ divisor;
2152     (time) = foreign "C" getourtimeofday() [R1];
2153     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags));
2154     if (divisor == 0) {
2155         divisor = 50;
2156     }
2157     divisor = divisor * 1000;
2158     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2159            + time + 1; /* Add 1 as getourtimeofday rounds down */
2160     StgTSO_block_info(CurrentTSO) = target;
2161
2162     /* Insert the new thread in the sleeping queue. */
2163     prev = NULL;
2164     t = W_[sleeping_queue];
2165 while:
2166     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2167         prev = t;
2168         t = StgTSO__link(t);
2169         goto while;
2170     }
2171
2172     StgTSO__link(CurrentTSO) = t;
2173     if (prev == NULL) {
2174         W_[sleeping_queue] = CurrentTSO;
2175     } else {
2176         foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) [];
2177     }
2178     jump stg_block_noregs;
2179 #endif
2180 #endif /* !THREADED_RTS */
2181 }
2182
2183
2184 #ifdef mingw32_HOST_OS
2185 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2186 asyncReadzh_fast
2187 {
2188     W_ ares;
2189     CInt reqID;
2190
2191 #ifdef THREADED_RTS
2192     foreign "C" barf("asyncRead# on threaded RTS") never returns;
2193 #else
2194
2195     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2196     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2197     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2198
2199     /* could probably allocate this on the heap instead */
2200     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2201                                             stg_asyncReadzh_malloc_str)
2202                         [R1,R2,R3,R4];
2203     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2204     StgAsyncIOResult_reqID(ares)   = reqID;
2205     StgAsyncIOResult_len(ares)     = 0;
2206     StgAsyncIOResult_errCode(ares) = 0;
2207     StgTSO_block_info(CurrentTSO)  = ares;
2208     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2209     jump stg_block_async;
2210 #endif
2211 }
2212
2213 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2214 asyncWritezh_fast
2215 {
2216     W_ ares;
2217     CInt reqID;
2218
2219 #ifdef THREADED_RTS
2220     foreign "C" barf("asyncWrite# on threaded RTS") never returns;
2221 #else
2222
2223     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2224     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2225     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2226
2227     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2228                                             stg_asyncWritezh_malloc_str)
2229                         [R1,R2,R3,R4];
2230     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2231
2232     StgAsyncIOResult_reqID(ares)   = reqID;
2233     StgAsyncIOResult_len(ares)     = 0;
2234     StgAsyncIOResult_errCode(ares) = 0;
2235     StgTSO_block_info(CurrentTSO)  = ares;
2236     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2237     jump stg_block_async;
2238 #endif
2239 }
2240
2241 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2242 asyncDoProczh_fast
2243 {
2244     W_ ares;
2245     CInt reqID;
2246
2247 #ifdef THREADED_RTS
2248     foreign "C" barf("asyncDoProc# on threaded RTS") never returns;
2249 #else
2250
2251     /* args: R1 = proc, R2 = param */
2252     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2253     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2254
2255     /* could probably allocate this on the heap instead */
2256     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2257                                             stg_asyncDoProczh_malloc_str) 
2258                                 [R1,R2];
2259     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2260     StgAsyncIOResult_reqID(ares)   = reqID;
2261     StgAsyncIOResult_len(ares)     = 0;
2262     StgAsyncIOResult_errCode(ares) = 0;
2263     StgTSO_block_info(CurrentTSO) = ares;
2264     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2265     jump stg_block_async;
2266 #endif
2267 }
2268 #endif
2269
2270 // noDuplicate# tries to ensure that none of the thunks under
2271 // evaluation by the current thread are also under evaluation by
2272 // another thread.  It relies on *both* threads doing noDuplicate#;
2273 // the second one will get blocked if they are duplicating some work.
2274 noDuplicatezh_fast
2275 {
2276     SAVE_THREAD_STATE();
2277     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2278     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2279     
2280     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2281         jump stg_threadFinished;
2282     } else {
2283         LOAD_THREAD_STATE();
2284         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2285         jump %ENTRY_CODE(Sp(0));
2286     }
2287 }
2288
2289 getApStackValzh_fast
2290 {
2291    W_ ap_stack, offset, val, ok;
2292
2293    /* args: R1 = AP_STACK, R2 = offset */
2294    ap_stack = R1;
2295    offset   = R2;
2296
2297    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2298         ok = 1;
2299         val = StgAP_STACK_payload(ap_stack,offset); 
2300    } else {
2301         ok = 0;
2302         val = R1;
2303    }
2304    RET_NP(ok,val);
2305 }