f70457068aa71561a34e886448e5056f8ce4cbdb
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(BaseReg "ptr",words);
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words);
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(BaseReg "ptr",words);
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 to indicate
130   // that it is still on the mutable list.
131
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the mut_link field is NULL if it isn't on a mut_list, and the GC
135   // maintains this invariant.
136   //
137   if (%INFO_TYPE(%GET_STD_INFO(R1)) != HALF_W_(MUT_ARR_PTRS_FROZEN0)) {
138         foreign "C" recordMutableLock(R1 "ptr");
139   }
140
141   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
142
143   RET_P(R1);
144 }
145
146 /* -----------------------------------------------------------------------------
147    MutVar primitives
148    -------------------------------------------------------------------------- */
149
150 newMutVarzh_fast
151 {
152     W_ mv;
153     /* Args: R1 = initialisation value */
154
155     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
156
157     mv = Hp - SIZEOF_StgMutVar + WDS(1);
158     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
159     StgMutVar_var(mv) = R1;
160     
161     RET_P(mv);
162 }
163
164 atomicModifyMutVarzh_fast
165 {
166     W_ mv, z, x, y, r;
167     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
168
169     /* If x is the current contents of the MutVar#, then 
170        We want to make the new contents point to
171
172          (sel_0 (f x))
173  
174        and the return value is
175          
176          (sel_1 (f x))
177
178         obviously we can share (f x).
179
180          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
181          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
182          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
183     */
184
185 #if MIN_UPD_SIZE > 1
186 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
187 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
188 #else
189 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
190 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
191 #endif
192
193 #if MIN_UPD_SIZE > 2
194 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
195 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
196 #else
197 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
198 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
199 #endif
200
201 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
202
203    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
204
205 #if defined(SMP)
206     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
207 #endif
208
209    x = StgMutVar_var(R1);
210
211    TICK_ALLOC_THUNK_2();
212    CCCS_ALLOC(THUNK_2_SIZE);
213    z = Hp - THUNK_2_SIZE + WDS(1);
214    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(z);
216    StgThunk_payload(z,0) = R2;
217    StgThunk_payload(z,1) = x;
218
219    TICK_ALLOC_THUNK_1();
220    CCCS_ALLOC(THUNK_1_SIZE);
221    y = z - THUNK_1_SIZE;
222    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
223    LDV_RECORD_CREATE(y);
224    StgThunk_payload(y,0) = z;
225
226    StgMutVar_var(R1) = y;
227
228    TICK_ALLOC_THUNK_1();
229    CCCS_ALLOC(THUNK_1_SIZE);
230    r = y - THUNK_1_SIZE;
231    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
232    LDV_RECORD_CREATE(r);
233    StgThunk_payload(r,0) = z;
234
235 #if defined(SMP)
236     foreign "C" RELEASE_LOCK(sm_mutex "ptr");
237 #endif
238
239    RET_P(r);
240 }
241
242 /* -----------------------------------------------------------------------------
243    Foreign Object Primitives
244    -------------------------------------------------------------------------- */
245
246 mkForeignObjzh_fast
247 {
248   /* R1 = ptr to foreign object,
249   */
250   W_ result;
251
252   ALLOC_PRIM( SIZEOF_StgForeignObj, NO_PTRS, mkForeignObjzh_fast);
253
254   result = Hp - SIZEOF_StgForeignObj + WDS(1);
255   SET_HDR(result,stg_FOREIGN_info,W_[CCCS]);
256   StgForeignObj_data(result) = R1;
257
258   /* returns (# s#, ForeignObj# #) */
259   RET_P(result);
260 }
261
262 /* -----------------------------------------------------------------------------
263    Weak Pointer Primitives
264    -------------------------------------------------------------------------- */
265
266 STRING(stg_weak_msg,"New weak pointer at %p\n")
267
268 mkWeakzh_fast
269 {
270   /* R1 = key
271      R2 = value
272      R3 = finalizer (or NULL)
273   */
274   W_ w;
275
276   if (R3 == NULL) {
277     R3 = stg_NO_FINALIZER_closure;
278   }
279
280   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
281
282   w = Hp - SIZEOF_StgWeak + WDS(1);
283   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
284
285   StgWeak_key(w)       = R1;
286   StgWeak_value(w)     = R2;
287   StgWeak_finalizer(w) = R3;
288
289   StgWeak_link(w)       = W_[weak_ptr_list];
290   W_[weak_ptr_list]     = w;
291
292   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w));
293
294   RET_P(w);
295 }
296
297
298 finalizzeWeakzh_fast
299 {
300   /* R1 = weak ptr
301    */
302   W_ w, f;
303
304   w = R1;
305
306   // already dead?
307   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
308       RET_NP(0,stg_NO_FINALIZER_closure);
309   }
310
311   // kill it
312 #ifdef PROFILING
313   // @LDV profiling
314   // A weak pointer is inherently used, so we do not need to call
315   // LDV_recordDead_FILL_SLOP_DYNAMIC():
316   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
317   // or, LDV_recordDead():
318   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
319   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
320   // large as weak pointers, so there is no need to fill the slop, either.
321   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
322 #endif
323
324   //
325   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
326   //
327   SET_INFO(w,stg_DEAD_WEAK_info);
328   LDV_RECORD_CREATE(w);
329
330   f = StgWeak_finalizer(w);
331   StgDeadWeak_link(w) = StgWeak_link(w);
332
333   /* return the finalizer */
334   if (f == stg_NO_FINALIZER_closure) {
335       RET_NP(0,stg_NO_FINALIZER_closure);
336   } else {
337       RET_NP(1,f);
338   }
339 }
340
341 deRefWeakzh_fast
342 {
343   /* R1 = weak ptr */
344   W_ w, code, val;
345
346   w = R1;
347   if (GET_INFO(w) == stg_WEAK_info) {
348     code = 1;
349     val = StgWeak_value(w);
350   } else {
351     code = 0;
352     val = w;
353   }
354   RET_NP(code,val);
355 }
356
357 /* -----------------------------------------------------------------------------
358    Arbitrary-precision Integer operations.
359
360    There are some assumptions in this code that mp_limb_t == W_.  This is
361    the case for all the platforms that GHC supports, currently.
362    -------------------------------------------------------------------------- */
363
364 int2Integerzh_fast
365 {
366    /* arguments: R1 = Int# */
367
368    W_ val, s, p;        /* to avoid aliasing */
369
370    val = R1;
371    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
372
373    p = Hp - SIZEOF_StgArrWords;
374    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
375    StgArrWords_words(p) = 1;
376
377    /* mpz_set_si is inlined here, makes things simpler */
378    if (%lt(val,0)) { 
379         s  = -1;
380         Hp(0) = -val;
381    } else { 
382      if (%gt(val,0)) {
383         s = 1;
384         Hp(0) = val;
385      } else {
386         s = 0;
387      }
388   }
389
390    /* returns (# size  :: Int#, 
391                  data  :: ByteArray# 
392                #)
393    */
394    RET_NP(s,p);
395 }
396
397 word2Integerzh_fast
398 {
399    /* arguments: R1 = Word# */
400
401    W_ val, s, p;        /* to avoid aliasing */
402
403    val = R1;
404
405    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
406
407    p = Hp - SIZEOF_StgArrWords;
408    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
409    StgArrWords_words(p) = 1;
410
411    if (val != 0) {
412         s = 1;
413         W_[Hp] = val;
414    } else {
415         s = 0;
416    }
417
418    /* returns (# size  :: Int#, 
419                  data  :: ByteArray# #)
420    */
421    RET_NP(s,p);
422 }
423
424
425 /*
426  * 'long long' primops for converting to/from Integers.
427  */
428
429 #ifdef SUPPORT_LONG_LONGS
430
431 int64ToIntegerzh_fast
432 {
433    /* arguments: L1 = Int64# */
434
435    L_ val;
436    W_ hi, s, neg, words_needed, p;
437
438    val = L1;
439    neg = 0;
440
441    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
442        words_needed = 2;
443    } else { 
444        // minimum is one word
445        words_needed = 1;
446    }
447
448    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
449                NO_PTRS, int64ToIntegerzh_fast );
450
451    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
452    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
453    StgArrWords_words(p) = words_needed;
454
455    if ( %lt(val,0::L_) ) {
456      neg = 1;
457      val = -val;
458    }
459
460    hi = TO_W_(val >> 32);
461
462    if ( words_needed == 2 )  { 
463       s = 2;
464       Hp(-1) = TO_W_(val);
465       Hp(0) = hi;
466    } else { 
467        if ( val != 0::L_ ) {
468            s = 1;
469            Hp(0) = TO_W_(val);
470        } else /* val==0 */  {
471            s = 0;
472        }
473    }
474    if ( neg != 0 ) {
475         s = -s;
476    }
477
478    /* returns (# size  :: Int#, 
479                  data  :: ByteArray# #)
480    */
481    RET_NP(s,p);
482 }
483
484 word64ToIntegerzh_fast
485 {
486    /* arguments: L1 = Word64# */
487
488    L_ val;
489    W_ hi, s, words_needed, p;
490
491    val = L1;
492    if ( val >= 0x100000000::L_ ) {
493       words_needed = 2;
494    } else {
495       words_needed = 1;
496    }
497
498    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
499                NO_PTRS, word64ToIntegerzh_fast );
500
501    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
502    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
503    StgArrWords_words(p) = words_needed;
504
505    hi = TO_W_(val >> 32);
506    if ( val >= 0x100000000::L_ ) { 
507      s = 2;
508      Hp(-1) = TO_W_(val);
509      Hp(0)  = hi;
510    } else {
511       if ( val != 0::L_ ) {
512         s = 1;
513         Hp(0) = TO_W_(val);
514      } else /* val==0 */  {
515       s = 0;
516      }
517   }
518
519    /* returns (# size  :: Int#, 
520                  data  :: ByteArray# #)
521    */
522    RET_NP(s,p);
523 }
524
525
526 #endif /* SUPPORT_LONG_LONGS */
527
528 /* ToDo: this is shockingly inefficient */
529
530 section "bss" {
531   mp_tmp1:
532     bits8 [SIZEOF_MP_INT];
533 }
534
535 section "bss" {
536   mp_tmp2:
537     bits8 [SIZEOF_MP_INT];
538 }
539
540 section "bss" {
541   result1:
542     bits8 [SIZEOF_MP_INT];
543 }
544
545 section "bss" {
546   result2:
547     bits8 [SIZEOF_MP_INT];
548 }
549
550 #define GMP_TAKE2_RET1(name,mp_fun)                     \
551 name                                                    \
552 {                                                       \
553   CInt s1, s2;                                          \
554   W_ d1, d2;                                            \
555                                                         \
556   /* call doYouWantToGC() */                            \
557   MAYBE_GC(R2_PTR & R4_PTR, name);                      \
558                                                         \
559   s1 = W_TO_INT(R1);                                    \
560   d1 = R2;                                              \
561   s2 = W_TO_INT(R3);                                    \
562   d2 = R4;                                              \
563                                                         \
564   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));  \
565   MP_INT__mp_size(mp_tmp1)  = (s1);                     \
566   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);         \
567   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));  \
568   MP_INT__mp_size(mp_tmp2)  = (s2);                     \
569   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);         \
570                                                         \
571   foreign "C" mpz_init(result1);                        \
572                                                         \
573   /* Perform the operation */                           \
574   foreign "C" mp_fun(result1,mp_tmp1,mp_tmp2);          \
575                                                         \
576   RET_NP(TO_W_(MP_INT__mp_size(result1)),               \
577          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);   \
578 }
579
580 #define GMP_TAKE1_RET1(name,mp_fun)                             \
581 name                                                            \
582 {                                                               \
583   CInt s1;                                                      \
584   W_ d1;                                                        \
585                                                                 \
586   /* call doYouWantToGC() */                                    \
587   MAYBE_GC(R2_PTR, name);                                       \
588                                                                 \
589   d1 = R2;                                                      \
590   s1 = W_TO_INT(R1);                                            \
591                                                                 \
592   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
593   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
594   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
595                                                                 \
596   foreign "C" mpz_init(result1);                                \
597                                                                 \
598   /* Perform the operation */                                   \
599   foreign "C" mp_fun(result1,mp_tmp1);                          \
600                                                                 \
601   RET_NP(TO_W_(MP_INT__mp_size(result1)),                       \
602          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);           \
603 }
604
605 #define GMP_TAKE2_RET2(name,mp_fun)                             \
606 name                                                            \
607 {                                                               \
608   CInt s1, s2;                                                  \
609   W_ d1, d2;                                                    \
610                                                                 \
611   /* call doYouWantToGC() */                                    \
612   MAYBE_GC(R2_PTR & R4_PTR, name);                              \
613                                                                 \
614   s1 = W_TO_INT(R1);                                            \
615   d1 = R2;                                                      \
616   s2 = W_TO_INT(R3);                                            \
617   d2 = R4;                                                      \
618                                                                 \
619   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
620   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
621   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
622   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));      \
623   MP_INT__mp_size(mp_tmp2)      = (s2);                         \
624   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);             \
625                                                                 \
626   foreign "C" mpz_init(result1);                                \
627   foreign "C" mpz_init(result2);                                \
628                                                                 \
629   /* Perform the operation */                                   \
630   foreign "C" mp_fun(result1,result2,mp_tmp1,mp_tmp2);          \
631                                                                 \
632   RET_NPNP(TO_W_(MP_INT__mp_size(result1)),                     \
633            MP_INT__mp_d(result1) - SIZEOF_StgArrWords,          \
634            TO_W_(MP_INT__mp_size(result2)),                     \
635            MP_INT__mp_d(result2) - SIZEOF_StgArrWords);         \
636 }
637
638 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
639 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
640 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
641 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
642 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
643 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
644 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
645 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
646 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
647 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
648 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
649
650 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
651 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
652
653 section "bss" {
654   aa:  W_; // NB. aa is really an mp_limb_t
655 }
656
657 gcdIntzh_fast
658 {
659     /* R1 = the first Int#; R2 = the second Int# */
660     W_ r; 
661
662     W_[aa] = R1;
663     r = foreign "C" mpn_gcd_1(aa, 1, R2);
664
665     R1 = r;
666     /* Result parked in R1, return via info-pointer at TOS */
667     jump %ENTRY_CODE(Sp(0));
668 }
669
670
671 gcdIntegerIntzh_fast
672 {
673     /* R1 = s1; R2 = d1; R3 = the int */
674     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3);
675     
676     /* Result parked in R1, return via info-pointer at TOS */
677     jump %ENTRY_CODE(Sp(0));
678 }
679
680
681 cmpIntegerIntzh_fast
682 {
683     /* R1 = s1; R2 = d1; R3 = the int */
684     W_ usize, vsize, v_digit, u_digit;
685
686     usize = R1;
687     vsize = 0;
688     v_digit = R3;
689
690     // paraphrased from mpz_cmp_si() in the GMP sources
691     if (%gt(v_digit,0)) {
692         vsize = 1;
693     } else { 
694         if (%lt(v_digit,0)) {
695             vsize = -1;
696             v_digit = -v_digit;
697         }
698     }
699
700     if (usize != vsize) {
701         R1 = usize - vsize; 
702         jump %ENTRY_CODE(Sp(0));
703     }
704
705     if (usize == 0) {
706         R1 = 0; 
707         jump %ENTRY_CODE(Sp(0));
708     }
709
710     u_digit = W_[BYTE_ARR_CTS(R2)];
711
712     if (u_digit == v_digit) {
713         R1 = 0; 
714         jump %ENTRY_CODE(Sp(0));
715     }
716
717     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
718         R1 = usize; 
719     } else {
720         R1 = -usize; 
721     }
722
723     jump %ENTRY_CODE(Sp(0));
724 }
725
726 cmpIntegerzh_fast
727 {
728     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
729     W_ usize, vsize, size, up, vp;
730     CInt cmp;
731
732     // paraphrased from mpz_cmp() in the GMP sources
733     usize = R1;
734     vsize = R3;
735
736     if (usize != vsize) {
737         R1 = usize - vsize; 
738         jump %ENTRY_CODE(Sp(0));
739     }
740
741     if (usize == 0) {
742         R1 = 0; 
743         jump %ENTRY_CODE(Sp(0));
744     }
745
746     if (%lt(usize,0)) { // NB. not <, which is unsigned
747         size = -usize;
748     } else {
749         size = usize;
750     }
751
752     up = BYTE_ARR_CTS(R2);
753     vp = BYTE_ARR_CTS(R4);
754
755     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size);
756
757     if (cmp == 0 :: CInt) {
758         R1 = 0; 
759         jump %ENTRY_CODE(Sp(0));
760     }
761
762     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
763         R1 = 1;
764     } else {
765         R1 = (-1); 
766     }
767     /* Result parked in R1, return via info-pointer at TOS */
768     jump %ENTRY_CODE(Sp(0));
769 }
770
771 integer2Intzh_fast
772 {
773     /* R1 = s; R2 = d */
774     W_ r, s;
775
776     s = R1;
777     if (s == 0) {
778         r = 0;
779     } else {
780         r = W_[R2 + SIZEOF_StgArrWords];
781         if (%lt(s,0)) {
782             r = -r;
783         }
784     }
785     /* Result parked in R1, return via info-pointer at TOS */
786     R1 = r;
787     jump %ENTRY_CODE(Sp(0));
788 }
789
790 integer2Wordzh_fast
791 {
792   /* R1 = s; R2 = d */
793   W_ r, s;
794
795   s = R1;
796   if (s == 0) {
797     r = 0;
798   } else {
799     r = W_[R2 + SIZEOF_StgArrWords];
800     if (%lt(s,0)) {
801         r = -r;
802     }
803   }
804   /* Result parked in R1, return via info-pointer at TOS */
805   R1 = r;
806   jump %ENTRY_CODE(Sp(0));
807 }
808
809 section "bss" {
810   exponent:  W_;
811 }
812
813 decodeFloatzh_fast
814
815     W_ p;
816     F_ arg;
817     
818     /* arguments: F1 = Float# */
819     arg = F1;
820     
821     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
822     
823     /* Be prepared to tell Lennart-coded __decodeFloat
824        where mantissa._mp_d can be put (it does not care about the rest) */
825     p = Hp - SIZEOF_StgArrWords;
826     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
827     StgArrWords_words(p) = 1;
828     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
829     
830     /* Perform the operation */
831     foreign "C" __decodeFloat(mp_tmp1,exponent,arg);
832     
833     /* returns: (Int# (expn), Int#, ByteArray#) */
834     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
835 }
836
837 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
838 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
839
840 decodeDoublezh_fast
841
842     D_ arg;
843     W_ p;
844
845     /* arguments: D1 = Double# */
846     arg = D1;
847
848     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
849     
850     /* Be prepared to tell Lennart-coded __decodeDouble
851        where mantissa.d can be put (it does not care about the rest) */
852     p = Hp - ARR_SIZE + WDS(1);
853     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
854     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
855     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
856
857     /* Perform the operation */
858     foreign "C" __decodeDouble(mp_tmp1,exponent,arg);
859     
860     /* returns: (Int# (expn), Int#, ByteArray#) */
861     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
862 }
863
864 /* -----------------------------------------------------------------------------
865  * Concurrency primitives
866  * -------------------------------------------------------------------------- */
867
868 forkzh_fast
869 {
870   /* args: R1 = closure to spark */
871   
872   MAYBE_GC(R1_PTR, forkzh_fast);
873
874   // create it right now, return ThreadID in R1
875   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
876                                    R1 "ptr");
877   foreign "C" scheduleThread(R1 "ptr");
878
879   // switch at the earliest opportunity
880   CInt[context_switch] = 1 :: CInt;
881   
882   RET_P(R1);
883 }
884
885 yieldzh_fast
886 {
887   jump stg_yield_noregs;
888 }
889
890 myThreadIdzh_fast
891 {
892   /* no args. */
893   RET_P(CurrentTSO);
894 }
895
896 labelThreadzh_fast
897 {
898   /* args: 
899         R1 = ThreadId#
900         R2 = Addr# */
901 #ifdef DEBUG
902   foreign "C" labelThread(R1 "ptr", R2 "ptr");
903 #endif
904   jump %ENTRY_CODE(Sp(0));
905 }
906
907 isCurrentThreadBoundzh_fast
908 {
909   /* no args */
910   W_ r;
911   r = foreign "C" isThreadBound(CurrentTSO);
912   RET_N(r);
913 }
914
915
916 /* -----------------------------------------------------------------------------
917  * TVar primitives
918  * -------------------------------------------------------------------------- */
919
920 #ifdef REG_R1
921 #define SP_OFF 0
922 #define IF_NOT_REG_R1(x) 
923 #else
924 #define SP_OFF 1
925 #define IF_NOT_REG_R1(x) x
926 #endif
927
928 // Catch retry frame ------------------------------------------------------------
929
930 #define CATCH_RETRY_FRAME_ERROR(label) \
931   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
932
933 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
934 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
935 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
936 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
937 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
938 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
939 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
940 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
941
942 #if MAX_VECTORED_RTN > 8
943 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
944 #endif
945
946 #if defined(PROFILING)
947 #define CATCH_RETRY_FRAME_BITMAP 7
948 #define CATCH_RETRY_FRAME_WORDS  6
949 #else
950 #define CATCH_RETRY_FRAME_BITMAP 1
951 #define CATCH_RETRY_FRAME_WORDS  4
952 #endif
953
954 INFO_TABLE_RET(stg_catch_retry_frame,
955                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
956                CATCH_RETRY_FRAME,
957                stg_catch_retry_frame_0_ret,
958                stg_catch_retry_frame_1_ret,
959                stg_catch_retry_frame_2_ret,
960                stg_catch_retry_frame_3_ret,
961                stg_catch_retry_frame_4_ret,
962                stg_catch_retry_frame_5_ret,
963                stg_catch_retry_frame_6_ret,
964                stg_catch_retry_frame_7_ret)
965 {
966    W_ r, frame, trec, outer;
967    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
968
969    frame = Sp;
970    trec = StgTSO_trec(CurrentTSO);
971    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
972    r = foreign "C" stmCommitTransaction(trec "ptr");
973    if (r) {
974      /* Succeeded (either first branch or second branch) */
975      StgTSO_trec(CurrentTSO) = outer;
976      Sp = Sp + SIZEOF_StgCatchRetryFrame;
977      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
978      jump %ENTRY_CODE(Sp(SP_OFF));
979    } else {
980      /* Did not commit: retry */
981      W_ new_trec;
982      "ptr" new_trec = foreign "C" stmStartTransaction(outer "ptr");
983      StgTSO_trec(CurrentTSO) = new_trec;
984      if (StgCatchRetryFrame_running_alt_code(frame)) {
985        R1 = StgCatchRetryFrame_alt_code(frame);
986      } else {
987        R1 = StgCatchRetryFrame_first_code(frame);
988        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
989      }
990      Sp_adj(-1);
991      jump RET_LBL(stg_ap_v);
992    }
993 }
994
995
996 // Atomically frame -------------------------------------------------------------
997
998
999 #define ATOMICALLY_FRAME_ERROR(label) \
1000   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1001
1002 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1003 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1004 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1005 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1006 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1007 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1008 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1009 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1010
1011 #if MAX_VECTORED_RTN > 8
1012 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1013 #endif
1014
1015 #if defined(PROFILING)
1016 #define ATOMICALLY_FRAME_BITMAP 7
1017 #define ATOMICALLY_FRAME_WORDS  4
1018 #else
1019 #define ATOMICALLY_FRAME_BITMAP 1
1020 #define ATOMICALLY_FRAME_WORDS  2
1021 #endif
1022
1023
1024 INFO_TABLE_RET(stg_atomically_frame,
1025                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1026                ATOMICALLY_FRAME,
1027                stg_atomically_frame_0_ret,
1028                stg_atomically_frame_1_ret,
1029                stg_atomically_frame_2_ret,
1030                stg_atomically_frame_3_ret,
1031                stg_atomically_frame_4_ret,
1032                stg_atomically_frame_5_ret,
1033                stg_atomically_frame_6_ret,
1034                stg_atomically_frame_7_ret)
1035 {
1036    W_ frame, trec, valid;
1037    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1038
1039    frame = Sp;
1040    trec = StgTSO_trec(CurrentTSO);
1041    if (StgAtomicallyFrame_waiting(frame)) {
1042      /* The TSO is currently waiting: should we stop waiting? */
1043      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1044      if (valid) {
1045        /* Previous attempt is still valid: no point trying again yet */
1046           IF_NOT_REG_R1(Sp_adj(-2);
1047                         Sp(1) = stg_NO_FINALIZER_closure;
1048                         Sp(0) = stg_ut_1_0_unreg_info;)
1049        jump stg_block_noregs;
1050      } else {
1051        /* Previous attempt is no longer valid: try again */
1052        "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");
1053        StgTSO_trec(CurrentTSO) = trec;
1054        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1055        R1 = StgAtomicallyFrame_code(frame);
1056        Sp_adj(-1);
1057        jump RET_LBL(stg_ap_v);
1058      }
1059    } else {
1060      /* The TSO is not currently waiting: try to commit the transaction */
1061      valid = foreign "C" stmCommitTransaction(trec "ptr");
1062      if (valid) {
1063        /* Transaction was valid: commit succeeded */
1064        StgTSO_trec(CurrentTSO) = NO_TREC;
1065        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1066        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1067        jump %ENTRY_CODE(Sp(SP_OFF));
1068      } else {
1069        /* Transaction was not valid: try again */
1070        "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");
1071        StgTSO_trec(CurrentTSO) = trec;
1072        R1 = StgAtomicallyFrame_code(frame);
1073        Sp_adj(-1);
1074        jump RET_LBL(stg_ap_v);
1075      }
1076    }
1077 }
1078
1079
1080 // STM catch frame --------------------------------------------------------------
1081
1082 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1083    label                                                   \
1084    {                                                       \
1085       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1086       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1087       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1088       jump ret;                                            \
1089    }
1090
1091 #ifdef REG_R1
1092 #define SP_OFF 0
1093 #else
1094 #define SP_OFF 1
1095 #endif
1096
1097 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1098 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1099 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1100 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1101 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1102 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1103 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1104 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1105
1106 #if MAX_VECTORED_RTN > 8
1107 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1108 #endif
1109
1110 #if defined(PROFILING)
1111 #define CATCH_STM_FRAME_BITMAP 3
1112 #define CATCH_STM_FRAME_WORDS  3
1113 #else
1114 #define CATCH_STM_FRAME_BITMAP 0
1115 #define CATCH_STM_FRAME_WORDS  1
1116 #endif
1117
1118 /* Catch frames are very similar to update frames, but when entering
1119  * one we just pop the frame off the stack and perform the correct
1120  * kind of return to the activation record underneath us on the stack.
1121  */
1122
1123 INFO_TABLE_RET(stg_catch_stm_frame,
1124                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1125                CATCH_STM_FRAME,
1126                stg_catch_stm_frame_0_ret,
1127                stg_catch_stm_frame_1_ret,
1128                stg_catch_stm_frame_2_ret,
1129                stg_catch_stm_frame_3_ret,
1130                stg_catch_stm_frame_4_ret,
1131                stg_catch_stm_frame_5_ret,
1132                stg_catch_stm_frame_6_ret,
1133                stg_catch_stm_frame_7_ret)
1134 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1135
1136
1137 // Primop definition ------------------------------------------------------------
1138
1139 atomicallyzh_fast
1140 {
1141   W_ frame;
1142   W_ old_trec;
1143   W_ new_trec;
1144   
1145   /* Args: R1 = m :: STM a */
1146   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1147
1148   /* Set up the atomically frame */
1149   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1150   frame = Sp;
1151
1152   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1153   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1154   StgAtomicallyFrame_code(frame) = R1;
1155
1156   /* Start the memory transcation */
1157   old_trec = StgTSO_trec(CurrentTSO);
1158   "ptr" new_trec = foreign "C" stmStartTransaction(old_trec "ptr");
1159   StgTSO_trec(CurrentTSO) = new_trec;
1160
1161   /* Apply R1 to the realworld token */
1162   Sp_adj(-1);
1163   jump RET_LBL(stg_ap_v);
1164 }
1165
1166
1167 catchSTMzh_fast
1168 {
1169   W_ frame;
1170   
1171   /* Args: R1 :: STM a */
1172   /* Args: R2 :: Exception -> STM a */
1173   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1174
1175   /* Set up the catch frame */
1176   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1177   frame = Sp;
1178
1179   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1180   StgCatchSTMFrame_handler(frame) = R2;
1181
1182   /* Apply R1 to the realworld token */
1183   Sp_adj(-1);
1184   jump RET_LBL(stg_ap_v);
1185 }
1186
1187
1188 catchRetryzh_fast
1189 {
1190   W_ frame;
1191   W_ new_trec;
1192   W_ trec;
1193
1194   /* Args: R1 :: STM a */
1195   /* Args: R2 :: STM a */
1196   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1197
1198   /* Start a nested transaction within which to run the first code */
1199   trec = StgTSO_trec(CurrentTSO);
1200   "ptr" new_trec = foreign "C" stmStartTransaction(trec "ptr");
1201   StgTSO_trec(CurrentTSO) = new_trec;
1202
1203   /* Set up the catch-retry frame */
1204   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1205   frame = Sp;
1206   
1207   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1208   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1209   StgCatchRetryFrame_first_code(frame) = R1;
1210   StgCatchRetryFrame_alt_code(frame) = R2;
1211   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1212
1213   /* Apply R1 to the realworld token */
1214   Sp_adj(-1);
1215   jump RET_LBL(stg_ap_v);  
1216 }
1217
1218
1219 retryzh_fast
1220 {
1221   W_ frame_type;
1222   W_ frame;
1223   W_ trec;
1224   W_ outer;
1225   W_ r;
1226
1227   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1228
1229   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1230 retry_pop_stack:
1231   trec = StgTSO_trec(CurrentTSO);
1232   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1233   StgTSO_sp(CurrentTSO) = Sp;
1234   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1235   Sp = StgTSO_sp(CurrentTSO);
1236   frame = Sp;
1237
1238   if (frame_type == CATCH_RETRY_FRAME) {
1239     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1240     ASSERT(outer != NO_TREC);
1241     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1242       // Retry in the first code: try the alternative
1243       "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1244       StgTSO_trec(CurrentTSO) = trec;
1245       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1246       R1 = StgCatchRetryFrame_alt_code(frame);
1247       Sp_adj(-1);
1248       jump RET_LBL(stg_ap_v);
1249     } else {
1250       // Retry in the alternative code: propagate
1251       W_ other_trec;
1252       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1253       r = foreign "C" stmMergeForWaiting(trec "ptr", other_trec "ptr");
1254       if (r) {
1255         r = foreign "C" stmCommitTransaction(trec "ptr");
1256       }
1257       if (r) {
1258         // Merge between siblings succeeded: commit it back to enclosing transaction
1259         // and then propagate the retry
1260         StgTSO_trec(CurrentTSO) = outer;
1261         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1262         goto retry_pop_stack;
1263       } else {
1264         // Merge failed: we musn't propagate the retry.  Try both paths again.
1265         "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1266         StgCatchRetryFrame_first_code_trec(frame) = trec;
1267         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1268         StgTSO_trec(CurrentTSO) = trec;
1269         R1 = StgCatchRetryFrame_first_code(frame);
1270         Sp_adj(-1);
1271         jump RET_LBL(stg_ap_v);
1272       }
1273     }
1274   }
1275
1276   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1277   ASSERT(frame_type == ATOMICALLY_FRAME);
1278   ASSERT(outer == NO_TREC);
1279   r = foreign "C" stmWait(CurrentTSO "ptr", trec "ptr");
1280   if (r) {
1281     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1282     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1283     Sp = frame;
1284     // Fix up the stack in the unregisterised case: the return convention is different.
1285     IF_NOT_REG_R1(Sp_adj(-2); 
1286                   Sp(1) = stg_NO_FINALIZER_closure;
1287                   Sp(0) = stg_ut_1_0_unreg_info;)
1288     jump stg_block_noregs;
1289   } else {
1290     // Transaction was not valid: retry immediately
1291     "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1292     StgTSO_trec(CurrentTSO) = trec;
1293     R1 = StgAtomicallyFrame_code(frame);
1294     Sp = frame;
1295     Sp_adj(-1);
1296     jump RET_LBL(stg_ap_v);
1297   }
1298 }
1299
1300
1301 newTVarzh_fast
1302 {
1303   W_ tv;
1304
1305   /* Args: R1 = initialisation value */
1306
1307   ALLOC_PRIM( SIZEOF_StgTVar, R1_PTR, newTVarzh_fast);
1308   tv = Hp - SIZEOF_StgTVar + WDS(1);
1309   SET_HDR(tv,stg_TVAR_info,W_[CCCS]);
1310   StgTVar_current_value(tv) = R1;
1311   StgTVar_first_wait_queue_entry(tv) = stg_END_STM_WAIT_QUEUE_closure;
1312     
1313   RET_P(tv);
1314 }
1315
1316
1317 readTVarzh_fast
1318 {
1319   W_ trec;
1320   W_ tvar;
1321   W_ result;
1322
1323   /* Args: R1 = TVar closure */
1324
1325   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1326   trec = StgTSO_trec(CurrentTSO);
1327   tvar = R1;
1328   "ptr" result = foreign "C" stmReadTVar(trec "ptr", tvar "ptr");
1329
1330   RET_P(result);
1331 }
1332
1333
1334 writeTVarzh_fast
1335 {
1336   W_ trec;
1337   W_ tvar;
1338   W_ new_value;
1339   
1340   /* Args: R1 = TVar closure */
1341   /*       R2 = New value    */
1342
1343   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1344   trec = StgTSO_trec(CurrentTSO);
1345   tvar = R1;
1346   new_value = R2;
1347   foreign "C" stmWriteTVar(trec "ptr", tvar "ptr", new_value "ptr");
1348
1349   jump %ENTRY_CODE(Sp(0));
1350 }
1351
1352
1353 /* -----------------------------------------------------------------------------
1354  * MVar primitives
1355  *
1356  * take & putMVar work as follows.  Firstly, an important invariant:
1357  *
1358  *    If the MVar is full, then the blocking queue contains only
1359  *    threads blocked on putMVar, and if the MVar is empty then the
1360  *    blocking queue contains only threads blocked on takeMVar.
1361  *
1362  * takeMvar:
1363  *    MVar empty : then add ourselves to the blocking queue
1364  *    MVar full  : remove the value from the MVar, and
1365  *                 blocking queue empty     : return
1366  *                 blocking queue non-empty : perform the first blocked putMVar
1367  *                                            from the queue, and wake up the
1368  *                                            thread (MVar is now full again)
1369  *
1370  * putMVar is just the dual of the above algorithm.
1371  *
1372  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1373  * the stack of the thread waiting to do the putMVar.  See
1374  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1375  * the stack layout, and the PerformPut and PerformTake macros below.
1376  *
1377  * It is important that a blocked take or put is woken up with the
1378  * take/put already performed, because otherwise there would be a
1379  * small window of vulnerability where the thread could receive an
1380  * exception and never perform its take or put, and we'd end up with a
1381  * deadlock.
1382  *
1383  * -------------------------------------------------------------------------- */
1384
1385 isEmptyMVarzh_fast
1386 {
1387     /* args: R1 = MVar closure */
1388
1389     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1390         RET_N(1);
1391     } else {
1392         RET_N(0);
1393     }
1394 }
1395
1396 newMVarzh_fast
1397 {
1398     /* args: none */
1399     W_ mvar;
1400
1401     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1402   
1403     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1404     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1405     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1406     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1407     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1408     RET_P(mvar);
1409 }
1410
1411
1412 /* If R1 isn't available, pass it on the stack */
1413 #ifdef REG_R1
1414 #define PerformTake(tso, value)                         \
1415     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1416     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1417 #else
1418 #define PerformTake(tso, value)                                 \
1419     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1420     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1421 #endif
1422
1423 #define PerformPut(tso,lval)                    \
1424     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1425     lval = W_[StgTSO_sp(tso) - WDS(1)];
1426
1427 /*
1428  * Only in threaded mode: we have to be careful when manipulating another thread's TSO,
1429  * because the scheduler might also be manipulating it.
1430  */
1431 #if defined(RTS_SUPPORTS_THREADS)
1432 #define ACQUIRE_SCHED_LOCK   foreign "C" ACQUIRE_LOCK(sched_mutex "ptr");
1433 #define RELEASE_SCHED_LOCK   foreign "C" RELEASE_LOCK(sched_mutex "ptr");
1434 #else
1435 #define ACQUIRE_SCHED_LOCK
1436 #define RELEASE_SCHED_LOCK
1437 #endif
1438
1439 takeMVarzh_fast
1440 {
1441     W_ mvar, val, info, tso;
1442
1443     /* args: R1 = MVar closure */
1444     mvar = R1;
1445
1446 #if defined(SMP)
1447     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1448 #else
1449     info = GET_INFO(mvar);
1450 #endif
1451
1452     /* If the MVar is empty, put ourselves on its blocking queue,
1453      * and wait until we're woken up.
1454      */
1455     if (info == stg_EMPTY_MVAR_info) {
1456         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1457             StgMVar_head(mvar) = CurrentTSO;
1458         } else {
1459             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1460         }
1461         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1462         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1463         StgTSO_block_info(CurrentTSO)  = mvar;
1464         StgMVar_tail(mvar) = CurrentTSO;
1465         
1466         jump stg_block_takemvar;
1467   }
1468
1469   /* we got the value... */
1470   val = StgMVar_value(mvar);
1471
1472   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1473   {
1474       ACQUIRE_SCHED_LOCK;
1475
1476       /* There are putMVar(s) waiting... 
1477        * wake up the first thread on the queue
1478        */
1479       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1480
1481       /* actually perform the putMVar for the thread that we just woke up */
1482       tso = StgMVar_head(mvar);
1483       PerformPut(tso,StgMVar_value(mvar));
1484
1485 #if defined(GRAN) || defined(PAR)
1486       /* ToDo: check 2nd arg (mvar) is right */
1487       "ptr" tso = foreign "C" unblockOneLocked(StgMVar_head(mvar),mvar);
1488       StgMVar_head(mvar) = tso;
1489 #else
1490       "ptr" tso = foreign "C" unblockOneLocked(StgMVar_head(mvar) "ptr");
1491       StgMVar_head(mvar) = tso;
1492 #endif
1493
1494       RELEASE_SCHED_LOCK;
1495
1496       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1497           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1498       }
1499
1500 #if defined(SMP)
1501       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1502 #endif
1503       RET_P(val);
1504   } 
1505   else
1506   {
1507       /* No further putMVars, MVar is now empty */
1508       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1509  
1510 #if defined(SMP)
1511       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1512 #else
1513       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1514 #endif
1515
1516       RET_P(val);
1517   }
1518 }
1519
1520
1521 tryTakeMVarzh_fast
1522 {
1523     W_ mvar, val, info, tso;
1524
1525     /* args: R1 = MVar closure */
1526
1527     mvar = R1;
1528
1529 #if defined(SMP)
1530     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1531 #else
1532     info = GET_INFO(mvar);
1533 #endif
1534
1535     if (info == stg_EMPTY_MVAR_info) {
1536 #if defined(SMP)
1537         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1538 #endif
1539         /* HACK: we need a pointer to pass back, 
1540          * so we abuse NO_FINALIZER_closure
1541          */
1542         RET_NP(0, stg_NO_FINALIZER_closure);
1543     }
1544
1545     /* we got the value... */
1546     val = StgMVar_value(mvar);
1547
1548     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1549
1550         ACQUIRE_SCHED_LOCK;
1551
1552         /* There are putMVar(s) waiting... 
1553          * wake up the first thread on the queue
1554          */
1555         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1556
1557         /* actually perform the putMVar for the thread that we just woke up */
1558         tso = StgMVar_head(mvar);
1559         PerformPut(tso,StgMVar_value(mvar));
1560
1561 #if defined(GRAN) || defined(PAR)
1562         /* ToDo: check 2nd arg (mvar) is right */
1563         "ptr" tso = foreign "C" unblockOneLocked(StgMVar_head(mvar) "ptr", mvar "ptr");
1564         StgMVar_head(mvar) = tso;
1565 #else
1566         "ptr" tso = foreign "C" unblockOneLocked(StgMVar_head(mvar) "ptr");
1567         StgMVar_head(mvar) = tso;
1568 #endif
1569
1570         RELEASE_SCHED_LOCK;
1571
1572         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1573             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1574         }
1575 #if defined(SMP)
1576         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1577 #endif
1578     }
1579     else 
1580     {
1581         /* No further putMVars, MVar is now empty */
1582         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1583 #if defined(SMP)
1584         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1585 #else
1586         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1587 #endif
1588     }
1589     
1590     RET_NP(1, val);
1591 }
1592
1593
1594 putMVarzh_fast
1595 {
1596     W_ mvar, info, tso;
1597
1598     /* args: R1 = MVar, R2 = value */
1599     mvar = R1;
1600
1601 #if defined(SMP)
1602     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1603 #else
1604     info = GET_INFO(mvar);
1605 #endif
1606
1607     if (info == stg_FULL_MVAR_info) {
1608         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1609             StgMVar_head(mvar) = CurrentTSO;
1610         } else {
1611             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1612         }
1613         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1614         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1615         StgTSO_block_info(CurrentTSO)  = mvar;
1616         StgMVar_tail(mvar) = CurrentTSO;
1617         
1618         jump stg_block_putmvar;
1619     }
1620   
1621     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1622
1623         ACQUIRE_SCHED_LOCK;
1624
1625         /* There are takeMVar(s) waiting: wake up the first one
1626          */
1627         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1628
1629         /* actually perform the takeMVar */
1630         tso = StgMVar_head(mvar);
1631         PerformTake(tso, R2);
1632       
1633 #if defined(GRAN) || defined(PAR)
1634         /* ToDo: check 2nd arg (mvar) is right */
1635         "ptr" tso = foreign "C" unblockOneLocked(StgMVar_head(mvar) "ptr",mvar "ptr");
1636         StgMVar_head(mvar) = tso;
1637 #else
1638         "ptr" tso = foreign "C" unblockOneLocked(StgMVar_head(mvar) "ptr");
1639         StgMVar_head(mvar) = tso;
1640 #endif
1641
1642         RELEASE_SCHED_LOCK;
1643
1644         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1645             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1646         }
1647
1648 #if defined(SMP)
1649         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1650 #endif
1651         jump %ENTRY_CODE(Sp(0));
1652     }
1653     else
1654     {
1655         /* No further takes, the MVar is now full. */
1656         StgMVar_value(mvar) = R2;
1657
1658 #if defined(SMP)
1659         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1660 #else
1661         SET_INFO(mvar,stg_FULL_MVAR_info);
1662 #endif
1663         jump %ENTRY_CODE(Sp(0));
1664     }
1665     
1666     /* ToDo: yield afterward for better communication performance? */
1667 }
1668
1669
1670 tryPutMVarzh_fast
1671 {
1672     W_ mvar, info, tso;
1673
1674     /* args: R1 = MVar, R2 = value */
1675     mvar = R1;
1676
1677 #if defined(SMP)
1678     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1679 #else
1680     info = GET_INFO(mvar);
1681 #endif
1682
1683     if (info == stg_FULL_MVAR_info) {
1684 #if defined(SMP)
1685         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1686 #endif
1687         RET_N(0);
1688     }
1689   
1690     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1691
1692         ACQUIRE_SCHED_LOCK;
1693
1694         /* There are takeMVar(s) waiting: wake up the first one
1695          */
1696         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1697         
1698         /* actually perform the takeMVar */
1699         tso = StgMVar_head(mvar);
1700         PerformTake(tso, R2);
1701       
1702 #if defined(GRAN) || defined(PAR)
1703         /* ToDo: check 2nd arg (mvar) is right */
1704         "ptr" tso = foreign "C" unblockOneLocked(StgMVar_head(mvar) "ptr",mvar "ptr");
1705         StgMVar_head(mvar) = tso;
1706 #else
1707         "ptr" tso = foreign "C" unblockOneLocked(StgMVar_head(mvar) "ptr");
1708         StgMVar_head(mvar) = tso;
1709 #endif
1710
1711         RELEASE_SCHED_LOCK;
1712
1713         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1714             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1715         }
1716
1717 #if defined(SMP)
1718         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1719 #endif
1720         jump %ENTRY_CODE(Sp(0));
1721     }
1722     else
1723     {
1724         /* No further takes, the MVar is now full. */
1725         StgMVar_value(mvar) = R2;
1726
1727 #if defined(SMP)
1728         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1729 #else
1730         SET_INFO(mvar,stg_FULL_MVAR_info);
1731 #endif
1732         jump %ENTRY_CODE(Sp(0));
1733     }
1734     
1735     /* ToDo: yield afterward for better communication performance? */
1736 }
1737
1738
1739 /* -----------------------------------------------------------------------------
1740    Stable pointer primitives
1741    -------------------------------------------------------------------------  */
1742
1743 makeStableNamezh_fast
1744 {
1745     W_ index, sn_obj;
1746
1747     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1748   
1749     index = foreign "C" lookupStableName(R1 "ptr");
1750
1751     /* Is there already a StableName for this heap object?
1752      *  stable_ptr_table is a pointer to an array of snEntry structs.
1753      */
1754     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1755         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1756         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1757         StgStableName_sn(sn_obj) = index;
1758         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1759     } else {
1760         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1761     }
1762     
1763     RET_P(sn_obj);
1764 }
1765
1766
1767 makeStablePtrzh_fast
1768 {
1769     /* Args: R1 = a */
1770     W_ sp;
1771     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1772     "ptr" sp = foreign "C" getStablePtr(R1 "ptr");
1773     RET_N(sp);
1774 }
1775
1776 deRefStablePtrzh_fast
1777 {
1778     /* Args: R1 = the stable ptr */
1779     W_ r, sp;
1780     sp = R1;
1781     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1782     RET_P(r);
1783 }
1784
1785 /* -----------------------------------------------------------------------------
1786    Bytecode object primitives
1787    -------------------------------------------------------------------------  */
1788
1789 newBCOzh_fast
1790 {
1791     /* R1 = instrs
1792        R2 = literals
1793        R3 = ptrs
1794        R4 = itbls
1795        R5 = arity
1796        R6 = bitmap array
1797     */
1798     W_ bco, bitmap_arr, bytes, words;
1799     
1800     bitmap_arr = R6;
1801     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1802     bytes = WDS(words);
1803
1804     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1805
1806     bco = Hp - bytes + WDS(1);
1807     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1808     
1809     StgBCO_instrs(bco)     = R1;
1810     StgBCO_literals(bco)   = R2;
1811     StgBCO_ptrs(bco)       = R3;
1812     StgBCO_itbls(bco)      = R4;
1813     StgBCO_arity(bco)      = HALF_W_(R5);
1814     StgBCO_size(bco)       = HALF_W_(words);
1815     
1816     // Copy the arity/bitmap info into the BCO
1817     W_ i;
1818     i = 0;
1819 for:
1820     if (i < StgArrWords_words(bitmap_arr)) {
1821         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1822         i = i + 1;
1823         goto for;
1824     }
1825     
1826     RET_P(bco);
1827 }
1828
1829
1830 mkApUpd0zh_fast
1831 {
1832     // R1 = the BCO# for the AP
1833     //  
1834     W_ ap;
1835
1836     // This function is *only* used to wrap zero-arity BCOs in an
1837     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1838     // saturated and always points directly to a FUN or BCO.
1839     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1840            StgBCO_arity(R1) == HALF_W_(0));
1841
1842     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1843     TICK_ALLOC_UP_THK(0, 0);
1844     CCCS_ALLOC(SIZEOF_StgAP);
1845
1846     ap = Hp - SIZEOF_StgAP + WDS(1);
1847     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1848     
1849     StgAP_n_args(ap) = HALF_W_(0);
1850     StgAP_fun(ap) = R1;
1851     
1852     RET_P(ap);
1853 }
1854
1855 /* -----------------------------------------------------------------------------
1856    Thread I/O blocking primitives
1857    -------------------------------------------------------------------------- */
1858
1859 /* Add a thread to the end of the blocked queue. (C-- version of the C
1860  * macro in Schedule.h).
1861  */
1862 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1863     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1864     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1865       W_[blocked_queue_hd] = tso;                       \
1866     } else {                                            \
1867       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1868     }                                                   \
1869     W_[blocked_queue_tl] = tso;
1870
1871 waitReadzh_fast
1872 {
1873     /* args: R1 */
1874 #ifdef THREADED_RTS
1875     foreign "C" barf("waitRead# on threaded RTS");
1876 #endif
1877
1878     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1879     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1880     StgTSO_block_info(CurrentTSO) = R1;
1881     // No locking - we're not going to use this interface in the
1882     // threaded RTS anyway.
1883     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1884     jump stg_block_noregs;
1885 }
1886
1887 waitWritezh_fast
1888 {
1889     /* args: R1 */
1890 #ifdef THREADED_RTS
1891     foreign "C" barf("waitWrite# on threaded RTS");
1892 #endif
1893
1894     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1895     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1896     StgTSO_block_info(CurrentTSO) = R1;
1897     // No locking - we're not going to use this interface in the
1898     // threaded RTS anyway.
1899     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1900     jump stg_block_noregs;
1901 }
1902
1903
1904 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1905 delayzh_fast
1906 {
1907 #ifdef mingw32_HOST_OS
1908     W_ ares;
1909     CInt reqID;
1910 #else
1911     W_ t, prev, target;
1912 #endif
1913
1914 #ifdef THREADED_RTS
1915     foreign "C" barf("delay# on threaded RTS");
1916 #endif
1917
1918     /* args: R1 (microsecond delay amount) */
1919     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1920     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1921
1922 #ifdef mingw32_HOST_OS
1923
1924     /* could probably allocate this on the heap instead */
1925     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1926                                             stg_delayzh_malloc_str);
1927     reqID = foreign "C" addDelayRequest(R1);
1928     StgAsyncIOResult_reqID(ares)   = reqID;
1929     StgAsyncIOResult_len(ares)     = 0;
1930     StgAsyncIOResult_errCode(ares) = 0;
1931     StgTSO_block_info(CurrentTSO)  = ares;
1932
1933     /* Having all async-blocked threads reside on the blocked_queue
1934      * simplifies matters, so change the status to OnDoProc put the
1935      * delayed thread on the blocked_queue.
1936      */
1937     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1938     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1939     jump stg_block_async_void;
1940
1941 #else
1942
1943     W_ time;
1944     time = foreign "C" getourtimeofday();
1945     target = (R1 / (TICK_MILLISECS*1000)) + time;
1946     StgTSO_block_info(CurrentTSO) = target;
1947
1948     /* Insert the new thread in the sleeping queue. */
1949     prev = NULL;
1950     t = W_[sleeping_queue];
1951 while:
1952     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1953         prev = t;
1954         t = StgTSO_link(t);
1955         goto while;
1956     }
1957
1958     StgTSO_link(CurrentTSO) = t;
1959     if (prev == NULL) {
1960         W_[sleeping_queue] = CurrentTSO;
1961     } else {
1962         StgTSO_link(prev) = CurrentTSO;
1963     }
1964     jump stg_block_noregs;
1965 #endif
1966 }
1967
1968
1969 #ifdef mingw32_HOST_OS
1970 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1971 asyncReadzh_fast
1972 {
1973     W_ ares;
1974     CInt reqID;
1975
1976 #ifdef THREADED_RTS
1977     foreign "C" barf("asyncRead# on threaded RTS");
1978 #endif
1979
1980     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1981     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1982     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1983
1984     /* could probably allocate this on the heap instead */
1985     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1986                                             stg_asyncReadzh_malloc_str);
1987     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1988     StgAsyncIOResult_reqID(ares)   = reqID;
1989     StgAsyncIOResult_len(ares)     = 0;
1990     StgAsyncIOResult_errCode(ares) = 0;
1991     StgTSO_block_info(CurrentTSO)  = ares;
1992     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1993     jump stg_block_async;
1994 }
1995
1996 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1997 asyncWritezh_fast
1998 {
1999     W_ ares;
2000     CInt reqID;
2001
2002 #ifdef THREADED_RTS
2003     foreign "C" barf("asyncWrite# on threaded RTS");
2004 #endif
2005
2006     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2007     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2008     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2009
2010     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2011                                             stg_asyncWritezh_malloc_str);
2012     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
2013
2014     StgAsyncIOResult_reqID(ares)   = reqID;
2015     StgAsyncIOResult_len(ares)     = 0;
2016     StgAsyncIOResult_errCode(ares) = 0;
2017     StgTSO_block_info(CurrentTSO)  = ares;
2018     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2019     jump stg_block_async;
2020 }
2021
2022 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2023 asyncDoProczh_fast
2024 {
2025     W_ ares;
2026     CInt reqID;
2027
2028     /* args: R1 = proc, R2 = param */
2029     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2030     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2031
2032     /* could probably allocate this on the heap instead */
2033     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2034                                             stg_asyncDoProczh_malloc_str);
2035     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
2036     StgAsyncIOResult_reqID(ares)   = reqID;
2037     StgAsyncIOResult_len(ares)     = 0;
2038     StgAsyncIOResult_errCode(ares) = 0;
2039     StgTSO_block_info(CurrentTSO) = ares;
2040     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2041     jump stg_block_async;
2042 }
2043 #endif
2044
2045 /* -----------------------------------------------------------------------------
2046   ** temporary **
2047
2048    classes CCallable and CReturnable don't really exist, but the
2049    compiler insists on generating dictionaries containing references
2050    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2051    for these.  Some C compilers can't cope with zero-length static arrays,
2052    so we have to make these one element long.
2053   --------------------------------------------------------------------------- */
2054
2055 section "rodata" {
2056   GHC_ZCCCallable_static_info:   W_ 0;
2057 }
2058
2059 section "rodata" {
2060   GHC_ZCCReturnable_static_info: W_ 0;
2061 }