a7ba08ae7116c22d68ee0ffd9b65025568e0dd8c
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocate(words);
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words);
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocate(words);
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
122
123   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
124   //
125   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
126   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
127   // it on the mutable list for the GC to remove (removing something from
128   // the mutable list is not easy, because the mut_list is only singly-linked).
129   // 
130   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
131   // either it is on a mut_list, or it isn't.  We adopt the convention that
132   // the mut_link field is NULL if it isn't on a mut_list, and the GC
133   // maintains this invariant.
134   //
135   if (StgMutClosure_mut_link(R1) == NULL) {
136         foreign "C" recordMutable(R1 "ptr");
137   }
138
139   RET_P(R1);
140 }
141
142 /* -----------------------------------------------------------------------------
143    MutVar primitives
144    -------------------------------------------------------------------------- */
145
146 newMutVarzh_fast
147 {
148     W_ mv;
149     /* Args: R1 = initialisation value */
150
151     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
152
153     mv = Hp - SIZEOF_StgMutVar + WDS(1);
154     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
155     StgMutVar_var(mv) = R1;
156     
157     RET_P(mv);
158 }
159
160 atomicModifyMutVarzh_fast
161 {
162     W_ mv, z, x, y, r;
163     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
164
165     /* If x is the current contents of the MutVar#, then 
166        We want to make the new contents point to
167
168          (sel_0 (f x))
169  
170        and the return value is
171          
172          (sel_1 (f x))
173
174         obviously we can share (f x).
175
176          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
177          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
178          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
179     */
180
181 #if MIN_UPD_SIZE > 1
182 #define THUNK_1_SIZE (SIZEOF_StgHeader + WDS(MIN_UPD_SIZE))
183 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
184 #else
185 #define THUNK_1_SIZE (SIZEOF_StgHeader + WDS(1))
186 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
187 #endif
188
189 #if MIN_UPD_SIZE > 2
190 #define THUNK_2_SIZE (SIZEOF_StgHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
192 #else
193 #define THUNK_2_SIZE (SIZEOF_StgHeader + WDS(2))
194 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
195 #endif
196
197 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
198
199    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
200
201    x = StgMutVar_var(R1);
202
203    TICK_ALLOC_THUNK_2();
204    CCCS_ALLOC(THUNK_2_SIZE);
205    z = Hp - THUNK_2_SIZE + WDS(1);
206    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
207    LDV_RECORD_CREATE(z);
208    StgClosure_payload(z,0) = R2;
209    StgClosure_payload(z,1) = x;
210
211    TICK_ALLOC_THUNK_1();
212    CCCS_ALLOC(THUNK_1_SIZE);
213    y = z - THUNK_1_SIZE;
214    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(y);
216    StgClosure_payload(y,0) = z;
217
218    StgMutVar_var(R1) = y;
219
220    TICK_ALLOC_THUNK_1();
221    CCCS_ALLOC(THUNK_1_SIZE);
222    r = y - THUNK_1_SIZE;
223    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
224    LDV_RECORD_CREATE(r);
225    StgClosure_payload(r,0) = z;
226
227    RET_P(r);
228 }
229
230 /* -----------------------------------------------------------------------------
231    Foreign Object Primitives
232    -------------------------------------------------------------------------- */
233
234 mkForeignObjzh_fast
235 {
236   /* R1 = ptr to foreign object,
237   */
238   W_ result;
239
240   ALLOC_PRIM( SIZEOF_StgForeignObj, NO_PTRS, mkForeignObjzh_fast);
241
242   result = Hp - SIZEOF_StgForeignObj + WDS(1);
243   SET_HDR(result,stg_FOREIGN_info,W_[CCCS]);
244   StgForeignObj_data(result) = R1;
245
246   /* returns (# s#, ForeignObj# #) */
247   RET_P(result);
248 }
249
250 /* -----------------------------------------------------------------------------
251    Weak Pointer Primitives
252    -------------------------------------------------------------------------- */
253
254 STRING(stg_weak_msg,"New weak pointer at %p\n")
255
256 mkWeakzh_fast
257 {
258   /* R1 = key
259      R2 = value
260      R3 = finalizer (or NULL)
261   */
262   W_ w;
263
264   if (R3 == NULL) {
265     R3 = stg_NO_FINALIZER_closure;
266   }
267
268   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
269
270   w = Hp - SIZEOF_StgWeak + WDS(1);
271   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
272
273   StgWeak_key(w)       = R1;
274   StgWeak_value(w)     = R2;
275   StgWeak_finalizer(w) = R3;
276
277   StgWeak_link(w)       = W_[weak_ptr_list];
278   W_[weak_ptr_list]     = w;
279
280   IF_DEBUG(weak, foreign "C" fprintf(stderr,stg_weak_msg,w));
281
282   RET_P(w);
283 }
284
285
286 finalizzeWeakzh_fast
287 {
288   /* R1 = weak ptr
289    */
290   W_ w, f;
291
292   w = R1;
293
294   // already dead?
295   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
296       RET_NP(0,stg_NO_FINALIZER_closure);
297   }
298
299   // kill it
300 #ifdef PROFILING
301   // @LDV profiling
302   // A weak pointer is inherently used, so we do not need to call
303   // LDV_recordDead_FILL_SLOP_DYNAMIC():
304   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
305   // or, LDV_recordDead():
306   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
307   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
308   // large as weak pointers, so there is no need to fill the slop, either.
309   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
310 #endif
311
312   //
313   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
314   //
315   SET_INFO(w,stg_DEAD_WEAK_info);
316   LDV_RECORD_CREATE(w);
317
318   f = StgWeak_finalizer(w);
319   StgDeadWeak_link(w) = StgWeak_link(w);
320
321   /* return the finalizer */
322   if (f == stg_NO_FINALIZER_closure) {
323       RET_NP(0,stg_NO_FINALIZER_closure);
324   } else {
325       RET_NP(1,f);
326   }
327 }
328
329 deRefWeakzh_fast
330 {
331   /* R1 = weak ptr */
332   W_ w, code, val;
333
334   w = R1;
335   if (GET_INFO(w) == stg_WEAK_info) {
336     code = 1;
337     val = StgWeak_value(w);
338   } else {
339     code = 0;
340     val = w;
341   }
342   RET_NP(code,val);
343 }
344
345 /* -----------------------------------------------------------------------------
346    Arbitrary-precision Integer operations.
347
348    There are some assumptions in this code that mp_limb_t == W_.  This is
349    the case for all the platforms that GHC supports, currently.
350    -------------------------------------------------------------------------- */
351
352 int2Integerzh_fast
353 {
354    /* arguments: R1 = Int# */
355
356    W_ val, s, p;        /* to avoid aliasing */
357
358    val = R1;
359    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
360
361    p = Hp - SIZEOF_StgArrWords;
362    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
363    StgArrWords_words(p) = 1;
364
365    /* mpz_set_si is inlined here, makes things simpler */
366    if (%lt(val,0)) { 
367         s  = -1;
368         Hp(0) = -val;
369    } else { 
370      if (%gt(val,0)) {
371         s = 1;
372         Hp(0) = val;
373      } else {
374         s = 0;
375      }
376   }
377
378    /* returns (# size  :: Int#, 
379                  data  :: ByteArray# 
380                #)
381    */
382    RET_NP(s,p);
383 }
384
385 word2Integerzh_fast
386 {
387    /* arguments: R1 = Word# */
388
389    W_ val, s, p;        /* to avoid aliasing */
390
391    val = R1;
392
393    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
394
395    p = Hp - SIZEOF_StgArrWords;
396    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
397    StgArrWords_words(p) = 1;
398
399    if (val != 0) {
400         s = 1;
401         W_[Hp] = val;
402    } else {
403         s = 0;
404    }
405
406    /* returns (# size  :: Int#, 
407                  data  :: ByteArray# #)
408    */
409    RET_NP(s,p);
410 }
411
412
413 /*
414  * 'long long' primops for converting to/from Integers.
415  */
416
417 #ifdef SUPPORT_LONG_LONGS
418
419 int64ToIntegerzh_fast
420 {
421    /* arguments: L1 = Int64# */
422
423    L_ val;
424    W_ hi, s, neg, words_needed, p;
425
426    val = L1;
427    neg = 0;
428
429    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
430        words_needed = 2;
431    } else { 
432        // minimum is one word
433        words_needed = 1;
434    }
435
436    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
437                NO_PTRS, int64ToIntegerzh_fast );
438
439    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
440    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
441    StgArrWords_words(p) = words_needed;
442
443    if ( %lt(val,0::L_) ) {
444      neg = 1;
445      val = -val;
446    }
447
448    hi = TO_W_(val >> 32);
449
450    if ( words_needed == 2 )  { 
451       s = 2;
452       Hp(-1) = TO_W_(val);
453       Hp(0) = hi;
454    } else { 
455        if ( val != 0::L_ ) {
456            s = 1;
457            Hp(0) = TO_W_(val);
458        } else /* val==0 */  {
459            s = 0;
460        }
461    }
462    if ( neg != 0 ) {
463         s = -s;
464    }
465
466    /* returns (# size  :: Int#, 
467                  data  :: ByteArray# #)
468    */
469    RET_NP(s,p);
470 }
471
472 word64ToIntegerzh_fast
473 {
474    /* arguments: L1 = Word64# */
475
476    L_ val;
477    W_ hi, s, words_needed, p;
478
479    val = L1;
480    if ( val >= 0x100000000::L_ ) {
481       words_needed = 2;
482    } else {
483       words_needed = 1;
484    }
485
486    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
487                NO_PTRS, word64ToIntegerzh_fast );
488
489    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
490    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
491    StgArrWords_words(p) = words_needed;
492
493    hi = TO_W_(val >> 32);
494    if ( val >= 0x100000000::L_ ) { 
495      s = 2;
496      Hp(-1) = TO_W_(val);
497      Hp(0)  = hi;
498    } else {
499       if ( val != 0::L_ ) {
500         s = 1;
501         Hp(0) = TO_W_(val);
502      } else /* val==0 */  {
503       s = 0;
504      }
505   }
506
507    /* returns (# size  :: Int#, 
508                  data  :: ByteArray# #)
509    */
510    RET_NP(s,p);
511 }
512
513
514 #endif /* SUPPORT_LONG_LONGS */
515
516 /* ToDo: this is shockingly inefficient */
517
518 section "bss" {
519   mp_tmp1:
520     bits8 [SIZEOF_MP_INT];
521 }
522
523 section "bss" {
524   mp_tmp2:
525     bits8 [SIZEOF_MP_INT];
526 }
527
528 section "bss" {
529   result1:
530     bits8 [SIZEOF_MP_INT];
531 }
532
533 section "bss" {
534   result2:
535     bits8 [SIZEOF_MP_INT];
536 }
537
538 #define GMP_TAKE2_RET1(name,mp_fun)                     \
539 name                                                    \
540 {                                                       \
541   CInt s1, s2;                                          \
542   W_ d1, d2;                                            \
543                                                         \
544   /* call doYouWantToGC() */                            \
545   MAYBE_GC(R2_PTR & R4_PTR, name);                      \
546                                                         \
547   s1 = W_TO_INT(R1);                                    \
548   d1 = R2;                                              \
549   s2 = W_TO_INT(R3);                                    \
550   d2 = R4;                                              \
551                                                         \
552   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));  \
553   MP_INT__mp_size(mp_tmp1)  = (s1);                     \
554   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);         \
555   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));  \
556   MP_INT__mp_size(mp_tmp2)  = (s2);                     \
557   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);         \
558                                                         \
559   foreign "C" mpz_init(result1);                        \
560                                                         \
561   /* Perform the operation */                           \
562   foreign "C" mp_fun(result1,mp_tmp1,mp_tmp2);          \
563                                                         \
564   RET_NP(TO_W_(MP_INT__mp_size(result1)),               \
565          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);   \
566 }
567
568 #define GMP_TAKE1_RET1(name,mp_fun)                             \
569 name                                                            \
570 {                                                               \
571   CInt s1;                                                      \
572   W_ d1;                                                        \
573                                                                 \
574   /* call doYouWantToGC() */                                    \
575   MAYBE_GC(R2_PTR, name);                                       \
576                                                                 \
577   d1 = R2;                                                      \
578   s1 = W_TO_INT(R1);                                            \
579                                                                 \
580   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
581   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
582   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
583                                                                 \
584   foreign "C" mpz_init(result1);                                \
585                                                                 \
586   /* Perform the operation */                                   \
587   foreign "C" mp_fun(result1,mp_tmp1);                          \
588                                                                 \
589   RET_NP(TO_W_(MP_INT__mp_size(result1)),                       \
590          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);           \
591 }
592
593 #define GMP_TAKE2_RET2(name,mp_fun)                             \
594 name                                                            \
595 {                                                               \
596   CInt s1, s2;                                                  \
597   W_ d1, d2;                                                    \
598                                                                 \
599   /* call doYouWantToGC() */                                    \
600   MAYBE_GC(R2_PTR & R4_PTR, name);                              \
601                                                                 \
602   s1 = W_TO_INT(R1);                                            \
603   d1 = R2;                                                      \
604   s2 = W_TO_INT(R3);                                            \
605   d2 = R4;                                                      \
606                                                                 \
607   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
608   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
609   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
610   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));      \
611   MP_INT__mp_size(mp_tmp2)      = (s2);                         \
612   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);             \
613                                                                 \
614   foreign "C" mpz_init(result1);                                \
615   foreign "C" mpz_init(result2);                                \
616                                                                 \
617   /* Perform the operation */                                   \
618   foreign "C" mp_fun(result1,result2,mp_tmp1,mp_tmp2);          \
619                                                                 \
620   RET_NPNP(TO_W_(MP_INT__mp_size(result1)),                     \
621            MP_INT__mp_d(result1) - SIZEOF_StgArrWords,          \
622            TO_W_(MP_INT__mp_size(result2)),                     \
623            MP_INT__mp_d(result2) - SIZEOF_StgArrWords);         \
624 }
625
626 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
627 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
628 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
629 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
630 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
631 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
632 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
633 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
634 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
635 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
636 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
637
638 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
639 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
640
641 section "bss" {
642   aa:  W_; // NB. aa is really an mp_limb_t
643 }
644
645 gcdIntzh_fast
646 {
647     /* R1 = the first Int#; R2 = the second Int# */
648     W_ r; 
649
650     W_[aa] = R1;
651     r = foreign "C" mpn_gcd_1(aa, 1, R2);
652
653     R1 = r;
654     /* Result parked in R1, return via info-pointer at TOS */
655     jump %ENTRY_CODE(Sp(0));
656 }
657
658
659 gcdIntegerIntzh_fast
660 {
661     /* R1 = s1; R2 = d1; R3 = the int */
662     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3);
663     
664     /* Result parked in R1, return via info-pointer at TOS */
665     jump %ENTRY_CODE(Sp(0));
666 }
667
668
669 cmpIntegerIntzh_fast
670 {
671     /* R1 = s1; R2 = d1; R3 = the int */
672     W_ usize, vsize, v_digit, u_digit;
673
674     usize = R1;
675     vsize = 0;
676     v_digit = R3;
677
678     // paraphrased from mpz_cmp_si() in the GMP sources
679     if (%gt(v_digit,0)) {
680         vsize = 1;
681     } else { 
682         if (%lt(v_digit,0)) {
683             vsize = -1;
684             v_digit = -v_digit;
685         }
686     }
687
688     if (usize != vsize) {
689         R1 = usize - vsize; 
690         jump %ENTRY_CODE(Sp(0));
691     }
692
693     if (usize == 0) {
694         R1 = 0; 
695         jump %ENTRY_CODE(Sp(0));
696     }
697
698     u_digit = W_[BYTE_ARR_CTS(R2)];
699
700     if (u_digit == v_digit) {
701         R1 = 0; 
702         jump %ENTRY_CODE(Sp(0));
703     }
704
705     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
706         R1 = usize; 
707     } else {
708         R1 = -usize; 
709     }
710
711     jump %ENTRY_CODE(Sp(0));
712 }
713
714 cmpIntegerzh_fast
715 {
716     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
717     W_ usize, vsize, size, up, vp;
718     CInt cmp;
719
720     // paraphrased from mpz_cmp() in the GMP sources
721     usize = R1;
722     vsize = R3;
723
724     if (usize != vsize) {
725         R1 = usize - vsize; 
726         jump %ENTRY_CODE(Sp(0));
727     }
728
729     if (usize == 0) {
730         R1 = 0; 
731         jump %ENTRY_CODE(Sp(0));
732     }
733
734     if (%lt(usize,0)) { // NB. not <, which is unsigned
735         size = -usize;
736     } else {
737         size = usize;
738     }
739
740     up = BYTE_ARR_CTS(R2);
741     vp = BYTE_ARR_CTS(R4);
742
743     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size);
744
745     if (cmp == 0 :: CInt) {
746         R1 = 0; 
747         jump %ENTRY_CODE(Sp(0));
748     }
749
750     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
751         R1 = 1;
752     } else {
753         R1 = (-1); 
754     }
755     /* Result parked in R1, return via info-pointer at TOS */
756     jump %ENTRY_CODE(Sp(0));
757 }
758
759 integer2Intzh_fast
760 {
761     /* R1 = s; R2 = d */
762     W_ r, s;
763
764     s = R1;
765     if (s == 0) {
766         r = 0;
767     } else {
768         r = W_[R2 + SIZEOF_StgArrWords];
769         if (%lt(s,0)) {
770             r = -r;
771         }
772     }
773     /* Result parked in R1, return via info-pointer at TOS */
774     R1 = r;
775     jump %ENTRY_CODE(Sp(0));
776 }
777
778 integer2Wordzh_fast
779 {
780   /* R1 = s; R2 = d */
781   W_ r, s;
782
783   s = R1;
784   if (s == 0) {
785     r = 0;
786   } else {
787     r = W_[R2 + SIZEOF_StgArrWords];
788     if (%lt(s,0)) {
789         r = -r;
790     }
791   }
792   /* Result parked in R1, return via info-pointer at TOS */
793   R1 = r;
794   jump %ENTRY_CODE(Sp(0));
795 }
796
797 section "bss" {
798   exponent:  W_;
799 }
800
801 decodeFloatzh_fast
802
803     W_ p;
804     F_ arg;
805     
806     /* arguments: F1 = Float# */
807     arg = F1;
808     
809     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
810     
811     /* Be prepared to tell Lennart-coded __decodeFloat
812        where mantissa._mp_d can be put (it does not care about the rest) */
813     p = Hp - SIZEOF_StgArrWords;
814     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
815     StgArrWords_words(p) = 1;
816     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
817     
818     /* Perform the operation */
819     foreign "C" __decodeFloat(mp_tmp1,exponent,arg);
820     
821     /* returns: (Int# (expn), Int#, ByteArray#) */
822     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
823 }
824
825 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
826 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
827
828 decodeDoublezh_fast
829
830     D_ arg;
831     W_ p;
832
833     /* arguments: D1 = Double# */
834     arg = D1;
835
836     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
837     
838     /* Be prepared to tell Lennart-coded __decodeDouble
839        where mantissa.d can be put (it does not care about the rest) */
840     p = Hp - ARR_SIZE + WDS(1);
841     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
842     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
843     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
844
845     /* Perform the operation */
846     foreign "C" __decodeDouble(mp_tmp1,exponent,arg);
847     
848     /* returns: (Int# (expn), Int#, ByteArray#) */
849     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
850 }
851
852 /* -----------------------------------------------------------------------------
853  * Concurrency primitives
854  * -------------------------------------------------------------------------- */
855
856 forkzh_fast
857 {
858   /* args: R1 = closure to spark */
859   
860   MAYBE_GC(R1_PTR, forkzh_fast);
861
862   // create it right now, return ThreadID in R1
863   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
864                                    R1 "ptr");
865   foreign "C" scheduleThread(R1 "ptr");
866
867   // switch at the earliest opportunity
868   CInt[context_switch] = 1 :: CInt;
869   
870   RET_P(R1);
871 }
872
873 yieldzh_fast
874 {
875   jump stg_yield_noregs;
876 }
877
878 myThreadIdzh_fast
879 {
880   /* no args. */
881   RET_P(CurrentTSO);
882 }
883
884 labelThreadzh_fast
885 {
886   /* args: 
887         R1 = ThreadId#
888         R2 = Addr# */
889 #ifdef DEBUG
890   foreign "C" labelThread(R1 "ptr", R2 "ptr");
891 #endif
892   jump %ENTRY_CODE(Sp(0));
893 }
894
895 isCurrentThreadBoundzh_fast
896 {
897   /* no args */
898   W_ r;
899   r = foreign "C" isThreadBound(CurrentTSO);
900   RET_N(r);
901 }
902
903
904 /* -----------------------------------------------------------------------------
905  * TVar primitives
906  * -------------------------------------------------------------------------- */
907
908 #ifdef REG_R1
909 #define SP_OFF 0
910 #define IF_NOT_REG_R1(x) 
911 #else
912 #define SP_OFF 1
913 #define IF_NOT_REG_R1(x) x
914 #endif
915
916 // Catch retry frame ------------------------------------------------------------
917
918 #define CATCH_RETRY_FRAME_ERROR(label) \
919   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
920
921 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
922 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
923 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
924 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
925 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
926 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
927 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
928 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
929
930 #if MAX_VECTORED_RTN > 8
931 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
932 #endif
933
934 #if defined(PROFILING)
935 #define CATCH_RETRY_FRAME_BITMAP 7
936 #define CATCH_RETRY_FRAME_WORDS  6
937 #else
938 #define CATCH_RETRY_FRAME_BITMAP 1
939 #define CATCH_RETRY_FRAME_WORDS  4
940 #endif
941
942 INFO_TABLE_RET(stg_catch_retry_frame,
943                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
944                CATCH_RETRY_FRAME,
945                stg_catch_retry_frame_0_ret,
946                stg_catch_retry_frame_1_ret,
947                stg_catch_retry_frame_2_ret,
948                stg_catch_retry_frame_3_ret,
949                stg_catch_retry_frame_4_ret,
950                stg_catch_retry_frame_5_ret,
951                stg_catch_retry_frame_6_ret,
952                stg_catch_retry_frame_7_ret)
953 {
954    W_ r, frame, trec, outer;
955    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
956
957    frame = Sp;
958    trec = StgTSO_trec(CurrentTSO);
959    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
960    r = foreign "C" stmCommitTransaction(trec "ptr");
961    if (r) {
962      /* Succeeded (either first branch or second branch) */
963      StgTSO_trec(CurrentTSO) = outer;
964      Sp = Sp + SIZEOF_StgCatchRetryFrame;
965      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
966      jump %ENTRY_CODE(Sp(SP_OFF));
967    } else {
968      /* Did not commit: retry */
969      W_ new_trec;
970      "ptr" new_trec = foreign "C" stmStartTransaction(outer "ptr");
971      StgTSO_trec(CurrentTSO) = new_trec;
972      if (StgCatchRetryFrame_running_alt_code(frame)) {
973        R1 = StgCatchRetryFrame_alt_code(frame);
974      } else {
975        R1 = StgCatchRetryFrame_first_code(frame);
976        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
977      }
978      Sp_adj(-1);
979      jump RET_LBL(stg_ap_v);
980    }
981 }
982
983
984 // Atomically frame -------------------------------------------------------------
985
986
987 #define ATOMICALLY_FRAME_ERROR(label) \
988   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
989
990 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
991 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
992 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
993 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
994 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
995 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
996 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
997 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
998
999 #if MAX_VECTORED_RTN > 8
1000 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1001 #endif
1002
1003 #if defined(PROFILING)
1004 #define ATOMICALLY_FRAME_BITMAP 7
1005 #define ATOMICALLY_FRAME_WORDS  4
1006 #else
1007 #define ATOMICALLY_FRAME_BITMAP 1
1008 #define ATOMICALLY_FRAME_WORDS  2
1009 #endif
1010
1011
1012 INFO_TABLE_RET(stg_atomically_frame,
1013                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1014                ATOMICALLY_FRAME,
1015                stg_atomically_frame_0_ret,
1016                stg_atomically_frame_1_ret,
1017                stg_atomically_frame_2_ret,
1018                stg_atomically_frame_3_ret,
1019                stg_atomically_frame_4_ret,
1020                stg_atomically_frame_5_ret,
1021                stg_atomically_frame_6_ret,
1022                stg_atomically_frame_7_ret)
1023 {
1024    W_ frame, trec, valid;
1025    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1026
1027    frame = Sp;
1028    trec = StgTSO_trec(CurrentTSO);
1029    if (StgAtomicallyFrame_waiting(frame)) {
1030      /* The TSO is currently waiting: should we stop waiting? */
1031      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1032      if (valid) {
1033        /* Previous attempt is still valid: no point trying again yet */
1034           IF_NOT_REG_R1(Sp_adj(-2);
1035                         Sp(1) = stg_NO_FINALIZER_closure;
1036                         Sp(0) = stg_ut_1_0_unreg_info;)
1037        jump stg_block_noregs;
1038      } else {
1039        /* Previous attempt is no longer valid: try again */
1040        "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");
1041        StgTSO_trec(CurrentTSO) = trec;
1042        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1043        R1 = StgAtomicallyFrame_code(frame);
1044        Sp_adj(-1);
1045        jump RET_LBL(stg_ap_v);
1046      }
1047    } else {
1048      /* The TSO is not currently waiting: try to commit the transaction */
1049      valid = foreign "C" stmCommitTransaction(trec "ptr");
1050      if (valid) {
1051        /* Transaction was valid: commit succeeded */
1052        StgTSO_trec(CurrentTSO) = NO_TREC;
1053        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1054        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1055        jump %ENTRY_CODE(Sp(SP_OFF));
1056      } else {
1057        /* Transaction was not valid: try again */
1058        "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");
1059        StgTSO_trec(CurrentTSO) = trec;
1060        R1 = StgAtomicallyFrame_code(frame);
1061        Sp_adj(-1);
1062        jump RET_LBL(stg_ap_v);
1063      }
1064    }
1065 }
1066
1067
1068 // STM catch frame --------------------------------------------------------------
1069
1070 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1071    label                                                   \
1072    {                                                       \
1073       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1074       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1075       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1076       jump ret;                                            \
1077    }
1078
1079 #ifdef REG_R1
1080 #define SP_OFF 0
1081 #else
1082 #define SP_OFF 1
1083 #endif
1084
1085 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1086 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1087 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1088 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1089 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1090 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1091 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1092 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1093
1094 #if MAX_VECTORED_RTN > 8
1095 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1096 #endif
1097
1098 #if defined(PROFILING)
1099 #define CATCH_STM_FRAME_BITMAP 3
1100 #define CATCH_STM_FRAME_WORDS  3
1101 #else
1102 #define CATCH_STM_FRAME_BITMAP 0
1103 #define CATCH_STM_FRAME_WORDS  1
1104 #endif
1105
1106 /* Catch frames are very similar to update frames, but when entering
1107  * one we just pop the frame off the stack and perform the correct
1108  * kind of return to the activation record underneath us on the stack.
1109  */
1110
1111 INFO_TABLE_RET(stg_catch_stm_frame,
1112                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1113                CATCH_STM_FRAME,
1114                stg_catch_stm_frame_0_ret,
1115                stg_catch_stm_frame_1_ret,
1116                stg_catch_stm_frame_2_ret,
1117                stg_catch_stm_frame_3_ret,
1118                stg_catch_stm_frame_4_ret,
1119                stg_catch_stm_frame_5_ret,
1120                stg_catch_stm_frame_6_ret,
1121                stg_catch_stm_frame_7_ret)
1122 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1123
1124
1125 // Primop definition ------------------------------------------------------------
1126
1127 atomicallyzh_fast
1128 {
1129   W_ frame;
1130   W_ old_trec;
1131   W_ new_trec;
1132   
1133   /* Args: R1 = m :: STM a */
1134   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1135
1136   /* Set up the atomically frame */
1137   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1138   frame = Sp;
1139
1140   SET_HDR(frame,stg_atomically_frame_info,CCCS);
1141   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1142   StgAtomicallyFrame_code(frame) = R1;
1143
1144   /* Start the memory transcation */
1145   old_trec = StgTSO_trec(CurrentTSO);
1146   "ptr" new_trec = foreign "C" stmStartTransaction(old_trec "ptr");
1147   StgTSO_trec(CurrentTSO) = new_trec;
1148
1149   /* Apply R1 to the realworld token */
1150   Sp_adj(-1);
1151   jump RET_LBL(stg_ap_v);
1152 }
1153
1154
1155 catchSTMzh_fast
1156 {
1157   W_ frame;
1158   
1159   /* Args: R1 :: STM a */
1160   /* Args: R2 :: Exception -> STM a */
1161   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1162
1163   /* Set up the catch frame */
1164   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1165   frame = Sp;
1166
1167   SET_HDR(frame, stg_catch_stm_frame_info, CCCS);
1168   StgCatchSTMFrame_handler(frame) = R2;
1169
1170   /* Apply R1 to the realworld token */
1171   Sp_adj(-1);
1172   jump RET_LBL(stg_ap_v);
1173 }
1174
1175
1176 catchRetryzh_fast
1177 {
1178   W_ frame;
1179   W_ new_trec;
1180   W_ trec;
1181
1182   /* Args: R1 :: STM a */
1183   /* Args: R2 :: STM a */
1184   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1185
1186   /* Start a nested transaction within which to run the first code */
1187   trec = StgTSO_trec(CurrentTSO);
1188   "ptr" new_trec = foreign "C" stmStartTransaction(trec "ptr");
1189   StgTSO_trec(CurrentTSO) = new_trec;
1190
1191   /* Set up the catch-retry frame */
1192   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1193   frame = Sp;
1194   
1195   SET_HDR(frame, stg_catch_retry_frame_info, CCCS);
1196   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1197   StgCatchRetryFrame_first_code(frame) = R1;
1198   StgCatchRetryFrame_alt_code(frame) = R2;
1199   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1200
1201   /* Apply R1 to the realworld token */
1202   Sp_adj(-1);
1203   jump RET_LBL(stg_ap_v);  
1204 }
1205
1206
1207 retryzh_fast
1208 {
1209   W_ frame_type;
1210   W_ frame;
1211   W_ trec;
1212   W_ outer;
1213   W_ r;
1214
1215   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1216
1217   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1218 retry_pop_stack:
1219   trec = StgTSO_trec(CurrentTSO);
1220   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1221   StgTSO_sp(CurrentTSO) = Sp;
1222   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1223   Sp = StgTSO_sp(CurrentTSO);
1224   frame = Sp;
1225
1226   if (frame_type == CATCH_RETRY_FRAME) {
1227     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1228     ASSERT(outer != NO_TREC);
1229     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1230       // Retry in the first code: try the alternative
1231       "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1232       StgTSO_trec(CurrentTSO) = trec;
1233       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1234       R1 = StgCatchRetryFrame_alt_code(frame);
1235       Sp_adj(-1);
1236       jump RET_LBL(stg_ap_v);
1237     } else {
1238       // Retry in the alternative code: propagate
1239       W_ other_trec;
1240       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1241       r = foreign "C" stmMergeForWaiting(trec "ptr", other_trec "ptr");
1242       if (r) {
1243         r = foreign "C" stmCommitTransaction(trec "ptr");
1244       }
1245       if (r) {
1246         // Merge between siblings succeeded: commit it back to enclosing transaction
1247         // and then propagate the retry
1248         StgTSO_trec(CurrentTSO) = outer;
1249         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1250         goto retry_pop_stack;
1251       } else {
1252         // Merge failed: we musn't propagate the retry.  Try both paths again.
1253         "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1254         StgCatchRetryFrame_first_code_trec(frame) = trec;
1255         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1256         StgTSO_trec(CurrentTSO) = trec;
1257         R1 = StgCatchRetryFrame_first_code(frame);
1258         Sp_adj(-1);
1259         jump RET_LBL(stg_ap_v);
1260       }
1261     }
1262   }
1263
1264   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1265   ASSERT(frame_type == ATOMICALLY_FRAME);
1266   ASSERT(outer == NO_TREC);
1267   r = foreign "C" stmWait(CurrentTSO "ptr", trec "ptr");
1268   if (r) {
1269     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1270     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1271     Sp = frame;
1272     // Fix up the stack in the unregisterised case: the return convention is different.
1273     IF_NOT_REG_R1(Sp_adj(-2); 
1274                   Sp(1) = stg_NO_FINALIZER_closure;
1275                   Sp(0) = stg_ut_1_0_unreg_info;)
1276     jump stg_block_noregs;
1277   } else {
1278     // Transaction was not valid: retry immediately
1279     "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1280     StgTSO_trec(CurrentTSO) = trec;
1281     R1 = StgAtomicallyFrame_code(frame);
1282     Sp = frame;
1283     Sp_adj(-1);
1284     jump RET_LBL(stg_ap_v);
1285   }
1286 }
1287
1288
1289 newTVarzh_fast
1290 {
1291   W_ tv;
1292
1293   /* Args: R1 = initialisation value */
1294
1295   ALLOC_PRIM( SIZEOF_StgTVar, R1_PTR, newTVarzh_fast);
1296   tv = Hp - SIZEOF_StgTVar + WDS(1);
1297   SET_HDR(tv,stg_TVAR_info,W_[CCCS]);
1298   StgTVar_current_value(tv) = R1;
1299   StgTVar_first_wait_queue_entry(tv) = stg_END_STM_WAIT_QUEUE_closure;
1300     
1301   RET_P(tv);
1302 }
1303
1304
1305 readTVarzh_fast
1306 {
1307   W_ trec;
1308   W_ tvar;
1309   W_ result;
1310
1311   /* Args: R1 = TVar closure */
1312
1313   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1314   trec = StgTSO_trec(CurrentTSO);
1315   tvar = R1;
1316   "ptr" result = foreign "C" stmReadTVar(trec "ptr", tvar "ptr");
1317
1318   RET_P(result);
1319 }
1320
1321
1322 writeTVarzh_fast
1323 {
1324   W_ trec;
1325   W_ tvar;
1326   W_ new_value;
1327   
1328   /* Args: R1 = TVar closure */
1329   /*       R2 = New value    */
1330
1331   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1332   trec = StgTSO_trec(CurrentTSO);
1333   tvar = R1;
1334   new_value = R2;
1335   foreign "C" stmWriteTVar(trec "ptr", tvar "ptr", new_value "ptr");
1336
1337   jump %ENTRY_CODE(Sp(0));
1338 }
1339
1340
1341 /* -----------------------------------------------------------------------------
1342  * MVar primitives
1343  *
1344  * take & putMVar work as follows.  Firstly, an important invariant:
1345  *
1346  *    If the MVar is full, then the blocking queue contains only
1347  *    threads blocked on putMVar, and if the MVar is empty then the
1348  *    blocking queue contains only threads blocked on takeMVar.
1349  *
1350  * takeMvar:
1351  *    MVar empty : then add ourselves to the blocking queue
1352  *    MVar full  : remove the value from the MVar, and
1353  *                 blocking queue empty     : return
1354  *                 blocking queue non-empty : perform the first blocked putMVar
1355  *                                            from the queue, and wake up the
1356  *                                            thread (MVar is now full again)
1357  *
1358  * putMVar is just the dual of the above algorithm.
1359  *
1360  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1361  * the stack of the thread waiting to do the putMVar.  See
1362  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1363  * the stack layout, and the PerformPut and PerformTake macros below.
1364  *
1365  * It is important that a blocked take or put is woken up with the
1366  * take/put already performed, because otherwise there would be a
1367  * small window of vulnerability where the thread could receive an
1368  * exception and never perform its take or put, and we'd end up with a
1369  * deadlock.
1370  *
1371  * -------------------------------------------------------------------------- */
1372
1373 isEmptyMVarzh_fast
1374 {
1375     /* args: R1 = MVar closure */
1376
1377     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1378         RET_N(1);
1379     } else {
1380         RET_N(0);
1381     }
1382 }
1383
1384 newMVarzh_fast
1385 {
1386     /* args: none */
1387     W_ mvar;
1388
1389     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1390   
1391     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1392     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1393     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1394     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1395     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1396     RET_P(mvar);
1397 }
1398
1399
1400 /* If R1 isn't available, pass it on the stack */
1401 #ifdef REG_R1
1402 #define PerformTake(tso, value)                         \
1403     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1404     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1405 #else
1406 #define PerformTake(tso, value)                                 \
1407     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1408     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1409 #endif
1410
1411 #define PerformPut(tso,lval)                    \
1412     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1413     lval = W_[StgTSO_sp(tso) - WDS(1)];
1414
1415
1416 takeMVarzh_fast
1417 {
1418     W_ mvar, val, info, tso;
1419
1420     /* args: R1 = MVar closure */
1421     mvar = R1;
1422
1423     info = GET_INFO(mvar);
1424
1425     /* If the MVar is empty, put ourselves on its blocking queue,
1426      * and wait until we're woken up.
1427      */
1428     if (info == stg_EMPTY_MVAR_info) {
1429         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1430             StgMVar_head(mvar) = CurrentTSO;
1431         } else {
1432             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1433         }
1434         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1435         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1436         StgTSO_block_info(CurrentTSO)  = mvar;
1437         StgMVar_tail(mvar) = CurrentTSO;
1438         
1439         jump stg_block_takemvar;
1440   }
1441
1442   /* we got the value... */
1443   val = StgMVar_value(mvar);
1444
1445   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1446   {
1447       /* There are putMVar(s) waiting... 
1448        * wake up the first thread on the queue
1449        */
1450       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1451
1452       /* actually perform the putMVar for the thread that we just woke up */
1453       tso = StgMVar_head(mvar);
1454       PerformPut(tso,StgMVar_value(mvar));
1455
1456 #if defined(GRAN) || defined(PAR)
1457       /* ToDo: check 2nd arg (mvar) is right */
1458       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar);
1459       StgMVar_head(mvar) = tso;
1460 #else
1461       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1462       StgMVar_head(mvar) = tso;
1463 #endif
1464       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1465           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1466       }
1467       RET_P(val);
1468   } 
1469   else
1470   {
1471       /* No further putMVars, MVar is now empty */
1472       
1473       /* do this last... we might have locked the MVar in the SMP case,
1474        * and writing the info pointer will unlock it.
1475        */
1476       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1477       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1478       RET_P(val);
1479   }
1480 }
1481
1482
1483 tryTakeMVarzh_fast
1484 {
1485     W_ mvar, val, info, tso;
1486
1487     /* args: R1 = MVar closure */
1488
1489     mvar = R1;
1490
1491     info = GET_INFO(mvar);
1492
1493     if (info == stg_EMPTY_MVAR_info) {
1494         /* HACK: we need a pointer to pass back, 
1495          * so we abuse NO_FINALIZER_closure
1496          */
1497         RET_NP(0, stg_NO_FINALIZER_closure);
1498     }
1499
1500     /* we got the value... */
1501     val = StgMVar_value(mvar);
1502
1503     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1504         /* There are putMVar(s) waiting... 
1505          * wake up the first thread on the queue
1506          */
1507         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1508
1509         /* actually perform the putMVar for the thread that we just woke up */
1510         tso = StgMVar_head(mvar);
1511         PerformPut(tso,StgMVar_value(mvar));
1512
1513 #if defined(GRAN) || defined(PAR)
1514         /* ToDo: check 2nd arg (mvar) is right */
1515         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr");
1516         StgMVar_head(mvar) = tso;
1517 #else
1518         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1519         StgMVar_head(mvar) = tso;
1520 #endif
1521
1522         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1523             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1524         }
1525     }
1526     else 
1527     {
1528         /* No further putMVars, MVar is now empty */
1529         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1530         
1531         /* do this last... we might have locked the MVar in the SMP case,
1532          * and writing the info pointer will unlock it.
1533          */
1534         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1535     }
1536     
1537     RET_NP(1, val);
1538 }
1539
1540
1541 putMVarzh_fast
1542 {
1543     W_ mvar, info, tso;
1544
1545     /* args: R1 = MVar, R2 = value */
1546     mvar = R1;
1547
1548     info = GET_INFO(mvar);
1549
1550     if (info == stg_FULL_MVAR_info) {
1551         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1552             StgMVar_head(mvar) = CurrentTSO;
1553         } else {
1554             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1555         }
1556         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1557         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1558         StgTSO_block_info(CurrentTSO)  = mvar;
1559         StgMVar_tail(mvar) = CurrentTSO;
1560         
1561         jump stg_block_putmvar;
1562     }
1563   
1564     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1565         /* There are takeMVar(s) waiting: wake up the first one
1566          */
1567         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1568
1569         /* actually perform the takeMVar */
1570         tso = StgMVar_head(mvar);
1571         PerformTake(tso, R2);
1572       
1573 #if defined(GRAN) || defined(PAR)
1574         /* ToDo: check 2nd arg (mvar) is right */
1575         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1576         StgMVar_head(mvar) = tso;
1577 #else
1578         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1579         StgMVar_head(mvar) = tso;
1580 #endif
1581
1582         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1583             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1584         }
1585
1586         jump %ENTRY_CODE(Sp(0));
1587     }
1588     else
1589     {
1590         /* No further takes, the MVar is now full. */
1591         StgMVar_value(mvar) = R2;
1592         /* unlocks the MVar in the SMP case */
1593         SET_INFO(mvar,stg_FULL_MVAR_info);
1594         jump %ENTRY_CODE(Sp(0));
1595     }
1596     
1597     /* ToDo: yield afterward for better communication performance? */
1598 }
1599
1600
1601 tryPutMVarzh_fast
1602 {
1603     W_ mvar, info, tso;
1604
1605     /* args: R1 = MVar, R2 = value */
1606     mvar = R1;
1607
1608     info = GET_INFO(mvar);
1609
1610     if (info == stg_FULL_MVAR_info) {
1611         RET_N(0);
1612     }
1613   
1614     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1615         /* There are takeMVar(s) waiting: wake up the first one
1616          */
1617         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1618         
1619         /* actually perform the takeMVar */
1620         tso = StgMVar_head(mvar);
1621         PerformTake(tso, R2);
1622       
1623 #if defined(GRAN) || defined(PAR)
1624         /* ToDo: check 2nd arg (mvar) is right */
1625         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1626         StgMVar_head(mvar) = tso;
1627 #else
1628         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1629         StgMVar_head(mvar) = tso;
1630 #endif
1631
1632         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1633             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1634         }
1635
1636         jump %ENTRY_CODE(Sp(0));
1637     }
1638     else
1639     {
1640         /* No further takes, the MVar is now full. */
1641         StgMVar_value(mvar) = R2;
1642         /* unlocks the MVar in the SMP case */
1643         SET_INFO(mvar,stg_FULL_MVAR_info);
1644         jump %ENTRY_CODE(Sp(0));
1645     }
1646     
1647     /* ToDo: yield afterward for better communication performance? */
1648 }
1649
1650
1651 /* -----------------------------------------------------------------------------
1652    Stable pointer primitives
1653    -------------------------------------------------------------------------  */
1654
1655 makeStableNamezh_fast
1656 {
1657     W_ index, sn_obj;
1658
1659     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1660   
1661     index = foreign "C" lookupStableName(R1 "ptr");
1662
1663     /* Is there already a StableName for this heap object?
1664      *  stable_ptr_table is an array of snEntry structs.
1665      */
1666     if ( snEntry_sn_obj(stable_ptr_table + index*SIZEOF_snEntry) == NULL ) {
1667         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1668         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1669         StgStableName_sn(sn_obj) = index;
1670         snEntry_sn_obj(stable_ptr_table + index*SIZEOF_snEntry) = sn_obj;
1671     } else {
1672         sn_obj = snEntry_sn_obj(stable_ptr_table + index*SIZEOF_snEntry);
1673     }
1674     
1675     RET_P(sn_obj);
1676 }
1677
1678
1679 makeStablePtrzh_fast
1680 {
1681     /* Args: R1 = a */
1682     W_ sp;
1683     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1684     "ptr" sp = foreign "C" getStablePtr(R1 "ptr");
1685     RET_N(sp);
1686 }
1687
1688 deRefStablePtrzh_fast
1689 {
1690     /* Args: R1 = the stable ptr */
1691     W_ r, sp;
1692     sp = R1;
1693     r = snEntry_addr(stable_ptr_table + sp*SIZEOF_snEntry);
1694     RET_P(r);
1695 }
1696
1697 /* -----------------------------------------------------------------------------
1698    Bytecode object primitives
1699    -------------------------------------------------------------------------  */
1700
1701 newBCOzh_fast
1702 {
1703     /* R1 = instrs
1704        R2 = literals
1705        R3 = ptrs
1706        R4 = itbls
1707        R5 = arity
1708        R6 = bitmap array
1709     */
1710     W_ bco, bitmap_arr, bytes, words;
1711     
1712     bitmap_arr = R6;
1713     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1714     bytes = WDS(words);
1715
1716     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1717
1718     bco = Hp - bytes + WDS(1);
1719     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1720     
1721     StgBCO_instrs(bco)     = R1;
1722     StgBCO_literals(bco)   = R2;
1723     StgBCO_ptrs(bco)       = R3;
1724     StgBCO_itbls(bco)      = R4;
1725     StgBCO_arity(bco)      = HALF_W_(R5);
1726     StgBCO_size(bco)       = HALF_W_(words);
1727     
1728     // Copy the arity/bitmap info into the BCO
1729     W_ i;
1730     i = 0;
1731 for:
1732     if (i < StgArrWords_words(bitmap_arr)) {
1733         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1734         i = i + 1;
1735         goto for;
1736     }
1737     
1738     RET_P(bco);
1739 }
1740
1741
1742 mkApUpd0zh_fast
1743 {
1744     // R1 = the BCO# for the AP
1745     //  
1746     W_ ap;
1747
1748     // This function is *only* used to wrap zero-arity BCOs in an
1749     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1750     // saturated and always points directly to a FUN or BCO.
1751     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1752            StgBCO_arity(R1) == HALF_W_(0));
1753
1754     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1755     TICK_ALLOC_UP_THK(0, 0);
1756     CCCS_ALLOC(SIZEOF_StgAP);
1757
1758     ap = Hp - SIZEOF_StgAP + WDS(1);
1759     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1760     
1761     StgAP_n_args(ap) = HALF_W_(0);
1762     StgAP_fun(ap) = R1;
1763     
1764     RET_P(ap);
1765 }
1766
1767 /* -----------------------------------------------------------------------------
1768    Thread I/O blocking primitives
1769    -------------------------------------------------------------------------- */
1770
1771 /* Add a thread to the end of the blocked queue. (C-- version of the C
1772  * macro in Schedule.h).
1773  */
1774 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1775     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1776     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1777       W_[blocked_queue_hd] = tso;                       \
1778     } else {                                            \
1779       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1780     }                                                   \
1781     W_[blocked_queue_tl] = tso;
1782
1783 waitReadzh_fast
1784 {
1785     /* args: R1 */
1786 #ifdef THREADED_RTS
1787     foreign "C" barf("waitRead# on threaded RTS");
1788 #endif
1789
1790     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1791     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1792     StgTSO_block_info(CurrentTSO) = R1;
1793     // No locking - we're not going to use this interface in the
1794     // threaded RTS anyway.
1795     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1796     jump stg_block_noregs;
1797 }
1798
1799 waitWritezh_fast
1800 {
1801     /* args: R1 */
1802 #ifdef THREADED_RTS
1803     foreign "C" barf("waitWrite# on threaded RTS");
1804 #endif
1805
1806     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1807     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1808     StgTSO_block_info(CurrentTSO) = R1;
1809     // No locking - we're not going to use this interface in the
1810     // threaded RTS anyway.
1811     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1812     jump stg_block_noregs;
1813 }
1814
1815
1816 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1817 delayzh_fast
1818 {
1819 #ifdef mingw32_HOST_OS
1820     W_ ares;
1821     CInt reqID;
1822 #else
1823     W_ t, prev, target;
1824 #endif
1825
1826 #ifdef THREADED_RTS
1827     foreign "C" barf("delay# on threaded RTS");
1828 #endif
1829
1830     /* args: R1 (microsecond delay amount) */
1831     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1832     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1833
1834 #ifdef mingw32_HOST_OS
1835
1836     /* could probably allocate this on the heap instead */
1837     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1838                                             stg_delayzh_malloc_str);
1839     reqID = foreign "C" addDelayRequest(R1);
1840     StgAsyncIOResult_reqID(ares)   = reqID;
1841     StgAsyncIOResult_len(ares)     = 0;
1842     StgAsyncIOResult_errCode(ares) = 0;
1843     StgTSO_block_info(CurrentTSO)  = ares;
1844
1845     /* Having all async-blocked threads reside on the blocked_queue
1846      * simplifies matters, so change the status to OnDoProc put the
1847      * delayed thread on the blocked_queue.
1848      */
1849     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1850     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1851     jump stg_block_async_void;
1852
1853 #else
1854
1855     W_ time;
1856     time = foreign "C" getourtimeofday();
1857     target = (R1 / (TICK_MILLISECS*1000)) + time;
1858     StgTSO_block_info(CurrentTSO) = target;
1859
1860     /* Insert the new thread in the sleeping queue. */
1861     prev = NULL;
1862     t = W_[sleeping_queue];
1863 while:
1864     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1865         prev = t;
1866         t = StgTSO_link(t);
1867         goto while;
1868     }
1869
1870     StgTSO_link(CurrentTSO) = t;
1871     if (prev == NULL) {
1872         W_[sleeping_queue] = CurrentTSO;
1873     } else {
1874         StgTSO_link(prev) = CurrentTSO;
1875     }
1876     jump stg_block_noregs;
1877 #endif
1878 }
1879
1880
1881 #ifdef mingw32_HOST_OS
1882 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1883 asyncReadzh_fast
1884 {
1885     W_ ares;
1886     CInt reqID;
1887
1888 #ifdef THREADED_RTS
1889     foreign "C" barf("asyncRead# on threaded RTS");
1890 #endif
1891
1892     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1893     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1894     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1895
1896     /* could probably allocate this on the heap instead */
1897     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1898                                             stg_asyncReadzh_malloc_str);
1899     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1900     StgAsyncIOResult_reqID(ares)   = reqID;
1901     StgAsyncIOResult_len(ares)     = 0;
1902     StgAsyncIOResult_errCode(ares) = 0;
1903     StgTSO_block_info(CurrentTSO)  = ares;
1904     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1905     jump stg_block_async;
1906 }
1907
1908 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1909 asyncWritezh_fast
1910 {
1911     W_ ares;
1912     CInt reqID;
1913
1914 #ifdef THREADED_RTS
1915     foreign "C" barf("asyncWrite# on threaded RTS");
1916 #endif
1917
1918     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1919     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1920     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1921
1922     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1923                                             stg_asyncWritezh_malloc_str);
1924     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
1925
1926     StgAsyncIOResult_reqID(ares)   = reqID;
1927     StgAsyncIOResult_len(ares)     = 0;
1928     StgAsyncIOResult_errCode(ares) = 0;
1929     StgTSO_block_info(CurrentTSO)  = ares;
1930     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1931     jump stg_block_async;
1932 }
1933
1934 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
1935 asyncDoProczh_fast
1936 {
1937     W_ ares;
1938     CInt reqID;
1939
1940     /* args: R1 = proc, R2 = param */
1941     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1942     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1943
1944     /* could probably allocate this on the heap instead */
1945     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1946                                             stg_asyncDoProczh_malloc_str);
1947     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
1948     StgAsyncIOResult_reqID(ares)   = reqID;
1949     StgAsyncIOResult_len(ares)     = 0;
1950     StgAsyncIOResult_errCode(ares) = 0;
1951     StgTSO_block_info(CurrentTSO) = ares;
1952     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1953     jump stg_block_async;
1954 }
1955 #endif
1956
1957 /* -----------------------------------------------------------------------------
1958   ** temporary **
1959
1960    classes CCallable and CReturnable don't really exist, but the
1961    compiler insists on generating dictionaries containing references
1962    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
1963    for these.  Some C compilers can't cope with zero-length static arrays,
1964    so we have to make these one element long.
1965   --------------------------------------------------------------------------- */
1966
1967 section "rodata" {
1968   GHC_ZCCCallable_static_info:   W_ 0;
1969 }
1970
1971 section "rodata" {
1972   GHC_ZCCReturnable_static_info: W_ 0;
1973 }