[project @ 2005-04-22 12:28:00 by simonmar]
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocate(words);
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words);
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocate(words);
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 to indicate
130   // that it is still on the mutable list.
131
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the mut_link field is NULL if it isn't on a mut_list, and the GC
135   // maintains this invariant.
136   //
137   if (%INFO_TYPE(%GET_STD_INFO(R1)) != HALF_W_(MUT_ARR_PTRS_FROZEN0)) {
138         foreign "C" recordMutable(R1 "ptr");
139   }
140
141   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
142
143   RET_P(R1);
144 }
145
146 /* -----------------------------------------------------------------------------
147    MutVar primitives
148    -------------------------------------------------------------------------- */
149
150 newMutVarzh_fast
151 {
152     W_ mv;
153     /* Args: R1 = initialisation value */
154
155     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
156
157     mv = Hp - SIZEOF_StgMutVar + WDS(1);
158     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
159     StgMutVar_var(mv) = R1;
160     
161     RET_P(mv);
162 }
163
164 atomicModifyMutVarzh_fast
165 {
166     W_ mv, z, x, y, r;
167     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
168
169 #if defined(SMP)
170     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
171 #endif
172
173     /* If x is the current contents of the MutVar#, then 
174        We want to make the new contents point to
175
176          (sel_0 (f x))
177  
178        and the return value is
179          
180          (sel_1 (f x))
181
182         obviously we can share (f x).
183
184          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
185          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
186          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
187     */
188
189 #if MIN_UPD_SIZE > 1
190 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
192 #else
193 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
194 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
195 #endif
196
197 #if MIN_UPD_SIZE > 2
198 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
199 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
200 #else
201 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
202 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
203 #endif
204
205 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
206
207    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
208
209    x = StgMutVar_var(R1);
210
211    TICK_ALLOC_THUNK_2();
212    CCCS_ALLOC(THUNK_2_SIZE);
213    z = Hp - THUNK_2_SIZE + WDS(1);
214    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(z);
216    StgThunk_payload(z,0) = R2;
217    StgThunk_payload(z,1) = x;
218
219    TICK_ALLOC_THUNK_1();
220    CCCS_ALLOC(THUNK_1_SIZE);
221    y = z - THUNK_1_SIZE;
222    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
223    LDV_RECORD_CREATE(y);
224    StgThunk_payload(y,0) = z;
225
226    StgMutVar_var(R1) = y;
227
228    TICK_ALLOC_THUNK_1();
229    CCCS_ALLOC(THUNK_1_SIZE);
230    r = y - THUNK_1_SIZE;
231    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
232    LDV_RECORD_CREATE(r);
233    StgThunk_payload(r,0) = z;
234
235 #if defined(SMP)
236     foreign "C" RELEASE_LOCK(sm_mutex "ptr");
237 #endif
238
239    RET_P(r);
240 }
241
242 /* -----------------------------------------------------------------------------
243    Foreign Object Primitives
244    -------------------------------------------------------------------------- */
245
246 mkForeignObjzh_fast
247 {
248   /* R1 = ptr to foreign object,
249   */
250   W_ result;
251
252   ALLOC_PRIM( SIZEOF_StgForeignObj, NO_PTRS, mkForeignObjzh_fast);
253
254   result = Hp - SIZEOF_StgForeignObj + WDS(1);
255   SET_HDR(result,stg_FOREIGN_info,W_[CCCS]);
256   StgForeignObj_data(result) = R1;
257
258   /* returns (# s#, ForeignObj# #) */
259   RET_P(result);
260 }
261
262 /* -----------------------------------------------------------------------------
263    Weak Pointer Primitives
264    -------------------------------------------------------------------------- */
265
266 STRING(stg_weak_msg,"New weak pointer at %p\n")
267
268 mkWeakzh_fast
269 {
270   /* R1 = key
271      R2 = value
272      R3 = finalizer (or NULL)
273   */
274   W_ w;
275
276   if (R3 == NULL) {
277     R3 = stg_NO_FINALIZER_closure;
278   }
279
280   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
281
282   w = Hp - SIZEOF_StgWeak + WDS(1);
283   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
284
285   StgWeak_key(w)       = R1;
286   StgWeak_value(w)     = R2;
287   StgWeak_finalizer(w) = R3;
288
289   StgWeak_link(w)       = W_[weak_ptr_list];
290   W_[weak_ptr_list]     = w;
291
292   IF_DEBUG(weak, foreign "C" fprintf(stderr,stg_weak_msg,w));
293
294   RET_P(w);
295 }
296
297
298 finalizzeWeakzh_fast
299 {
300   /* R1 = weak ptr
301    */
302   W_ w, f;
303
304   w = R1;
305
306   // already dead?
307   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
308       RET_NP(0,stg_NO_FINALIZER_closure);
309   }
310
311   // kill it
312 #ifdef PROFILING
313   // @LDV profiling
314   // A weak pointer is inherently used, so we do not need to call
315   // LDV_recordDead_FILL_SLOP_DYNAMIC():
316   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
317   // or, LDV_recordDead():
318   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
319   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
320   // large as weak pointers, so there is no need to fill the slop, either.
321   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
322 #endif
323
324   //
325   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
326   //
327   SET_INFO(w,stg_DEAD_WEAK_info);
328   LDV_RECORD_CREATE(w);
329
330   f = StgWeak_finalizer(w);
331   StgDeadWeak_link(w) = StgWeak_link(w);
332
333   /* return the finalizer */
334   if (f == stg_NO_FINALIZER_closure) {
335       RET_NP(0,stg_NO_FINALIZER_closure);
336   } else {
337       RET_NP(1,f);
338   }
339 }
340
341 deRefWeakzh_fast
342 {
343   /* R1 = weak ptr */
344   W_ w, code, val;
345
346   w = R1;
347   if (GET_INFO(w) == stg_WEAK_info) {
348     code = 1;
349     val = StgWeak_value(w);
350   } else {
351     code = 0;
352     val = w;
353   }
354   RET_NP(code,val);
355 }
356
357 /* -----------------------------------------------------------------------------
358    Arbitrary-precision Integer operations.
359
360    There are some assumptions in this code that mp_limb_t == W_.  This is
361    the case for all the platforms that GHC supports, currently.
362    -------------------------------------------------------------------------- */
363
364 int2Integerzh_fast
365 {
366    /* arguments: R1 = Int# */
367
368    W_ val, s, p;        /* to avoid aliasing */
369
370    val = R1;
371    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
372
373    p = Hp - SIZEOF_StgArrWords;
374    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
375    StgArrWords_words(p) = 1;
376
377    /* mpz_set_si is inlined here, makes things simpler */
378    if (%lt(val,0)) { 
379         s  = -1;
380         Hp(0) = -val;
381    } else { 
382      if (%gt(val,0)) {
383         s = 1;
384         Hp(0) = val;
385      } else {
386         s = 0;
387      }
388   }
389
390    /* returns (# size  :: Int#, 
391                  data  :: ByteArray# 
392                #)
393    */
394    RET_NP(s,p);
395 }
396
397 word2Integerzh_fast
398 {
399    /* arguments: R1 = Word# */
400
401    W_ val, s, p;        /* to avoid aliasing */
402
403    val = R1;
404
405    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
406
407    p = Hp - SIZEOF_StgArrWords;
408    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
409    StgArrWords_words(p) = 1;
410
411    if (val != 0) {
412         s = 1;
413         W_[Hp] = val;
414    } else {
415         s = 0;
416    }
417
418    /* returns (# size  :: Int#, 
419                  data  :: ByteArray# #)
420    */
421    RET_NP(s,p);
422 }
423
424
425 /*
426  * 'long long' primops for converting to/from Integers.
427  */
428
429 #ifdef SUPPORT_LONG_LONGS
430
431 int64ToIntegerzh_fast
432 {
433    /* arguments: L1 = Int64# */
434
435    L_ val;
436    W_ hi, s, neg, words_needed, p;
437
438    val = L1;
439    neg = 0;
440
441    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
442        words_needed = 2;
443    } else { 
444        // minimum is one word
445        words_needed = 1;
446    }
447
448    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
449                NO_PTRS, int64ToIntegerzh_fast );
450
451    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
452    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
453    StgArrWords_words(p) = words_needed;
454
455    if ( %lt(val,0::L_) ) {
456      neg = 1;
457      val = -val;
458    }
459
460    hi = TO_W_(val >> 32);
461
462    if ( words_needed == 2 )  { 
463       s = 2;
464       Hp(-1) = TO_W_(val);
465       Hp(0) = hi;
466    } else { 
467        if ( val != 0::L_ ) {
468            s = 1;
469            Hp(0) = TO_W_(val);
470        } else /* val==0 */  {
471            s = 0;
472        }
473    }
474    if ( neg != 0 ) {
475         s = -s;
476    }
477
478    /* returns (# size  :: Int#, 
479                  data  :: ByteArray# #)
480    */
481    RET_NP(s,p);
482 }
483
484 word64ToIntegerzh_fast
485 {
486    /* arguments: L1 = Word64# */
487
488    L_ val;
489    W_ hi, s, words_needed, p;
490
491    val = L1;
492    if ( val >= 0x100000000::L_ ) {
493       words_needed = 2;
494    } else {
495       words_needed = 1;
496    }
497
498    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
499                NO_PTRS, word64ToIntegerzh_fast );
500
501    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
502    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
503    StgArrWords_words(p) = words_needed;
504
505    hi = TO_W_(val >> 32);
506    if ( val >= 0x100000000::L_ ) { 
507      s = 2;
508      Hp(-1) = TO_W_(val);
509      Hp(0)  = hi;
510    } else {
511       if ( val != 0::L_ ) {
512         s = 1;
513         Hp(0) = TO_W_(val);
514      } else /* val==0 */  {
515       s = 0;
516      }
517   }
518
519    /* returns (# size  :: Int#, 
520                  data  :: ByteArray# #)
521    */
522    RET_NP(s,p);
523 }
524
525
526 #endif /* SUPPORT_LONG_LONGS */
527
528 /* ToDo: this is shockingly inefficient */
529
530 section "bss" {
531   mp_tmp1:
532     bits8 [SIZEOF_MP_INT];
533 }
534
535 section "bss" {
536   mp_tmp2:
537     bits8 [SIZEOF_MP_INT];
538 }
539
540 section "bss" {
541   result1:
542     bits8 [SIZEOF_MP_INT];
543 }
544
545 section "bss" {
546   result2:
547     bits8 [SIZEOF_MP_INT];
548 }
549
550 #define GMP_TAKE2_RET1(name,mp_fun)                     \
551 name                                                    \
552 {                                                       \
553   CInt s1, s2;                                          \
554   W_ d1, d2;                                            \
555                                                         \
556   /* call doYouWantToGC() */                            \
557   MAYBE_GC(R2_PTR & R4_PTR, name);                      \
558                                                         \
559   s1 = W_TO_INT(R1);                                    \
560   d1 = R2;                                              \
561   s2 = W_TO_INT(R3);                                    \
562   d2 = R4;                                              \
563                                                         \
564   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));  \
565   MP_INT__mp_size(mp_tmp1)  = (s1);                     \
566   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);         \
567   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));  \
568   MP_INT__mp_size(mp_tmp2)  = (s2);                     \
569   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);         \
570                                                         \
571   foreign "C" mpz_init(result1);                        \
572                                                         \
573   /* Perform the operation */                           \
574   foreign "C" mp_fun(result1,mp_tmp1,mp_tmp2);          \
575                                                         \
576   RET_NP(TO_W_(MP_INT__mp_size(result1)),               \
577          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);   \
578 }
579
580 #define GMP_TAKE1_RET1(name,mp_fun)                             \
581 name                                                            \
582 {                                                               \
583   CInt s1;                                                      \
584   W_ d1;                                                        \
585                                                                 \
586   /* call doYouWantToGC() */                                    \
587   MAYBE_GC(R2_PTR, name);                                       \
588                                                                 \
589   d1 = R2;                                                      \
590   s1 = W_TO_INT(R1);                                            \
591                                                                 \
592   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
593   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
594   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
595                                                                 \
596   foreign "C" mpz_init(result1);                                \
597                                                                 \
598   /* Perform the operation */                                   \
599   foreign "C" mp_fun(result1,mp_tmp1);                          \
600                                                                 \
601   RET_NP(TO_W_(MP_INT__mp_size(result1)),                       \
602          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);           \
603 }
604
605 #define GMP_TAKE2_RET2(name,mp_fun)                             \
606 name                                                            \
607 {                                                               \
608   CInt s1, s2;                                                  \
609   W_ d1, d2;                                                    \
610                                                                 \
611   /* call doYouWantToGC() */                                    \
612   MAYBE_GC(R2_PTR & R4_PTR, name);                              \
613                                                                 \
614   s1 = W_TO_INT(R1);                                            \
615   d1 = R2;                                                      \
616   s2 = W_TO_INT(R3);                                            \
617   d2 = R4;                                                      \
618                                                                 \
619   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
620   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
621   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
622   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));      \
623   MP_INT__mp_size(mp_tmp2)      = (s2);                         \
624   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);             \
625                                                                 \
626   foreign "C" mpz_init(result1);                                \
627   foreign "C" mpz_init(result2);                                \
628                                                                 \
629   /* Perform the operation */                                   \
630   foreign "C" mp_fun(result1,result2,mp_tmp1,mp_tmp2);          \
631                                                                 \
632   RET_NPNP(TO_W_(MP_INT__mp_size(result1)),                     \
633            MP_INT__mp_d(result1) - SIZEOF_StgArrWords,          \
634            TO_W_(MP_INT__mp_size(result2)),                     \
635            MP_INT__mp_d(result2) - SIZEOF_StgArrWords);         \
636 }
637
638 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
639 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
640 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
641 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
642 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
643 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
644 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
645 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
646 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
647 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
648 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
649
650 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
651 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
652
653 section "bss" {
654   aa:  W_; // NB. aa is really an mp_limb_t
655 }
656
657 gcdIntzh_fast
658 {
659     /* R1 = the first Int#; R2 = the second Int# */
660     W_ r; 
661
662     W_[aa] = R1;
663     r = foreign "C" mpn_gcd_1(aa, 1, R2);
664
665     R1 = r;
666     /* Result parked in R1, return via info-pointer at TOS */
667     jump %ENTRY_CODE(Sp(0));
668 }
669
670
671 gcdIntegerIntzh_fast
672 {
673     /* R1 = s1; R2 = d1; R3 = the int */
674     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3);
675     
676     /* Result parked in R1, return via info-pointer at TOS */
677     jump %ENTRY_CODE(Sp(0));
678 }
679
680
681 cmpIntegerIntzh_fast
682 {
683     /* R1 = s1; R2 = d1; R3 = the int */
684     W_ usize, vsize, v_digit, u_digit;
685
686     usize = R1;
687     vsize = 0;
688     v_digit = R3;
689
690     // paraphrased from mpz_cmp_si() in the GMP sources
691     if (%gt(v_digit,0)) {
692         vsize = 1;
693     } else { 
694         if (%lt(v_digit,0)) {
695             vsize = -1;
696             v_digit = -v_digit;
697         }
698     }
699
700     if (usize != vsize) {
701         R1 = usize - vsize; 
702         jump %ENTRY_CODE(Sp(0));
703     }
704
705     if (usize == 0) {
706         R1 = 0; 
707         jump %ENTRY_CODE(Sp(0));
708     }
709
710     u_digit = W_[BYTE_ARR_CTS(R2)];
711
712     if (u_digit == v_digit) {
713         R1 = 0; 
714         jump %ENTRY_CODE(Sp(0));
715     }
716
717     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
718         R1 = usize; 
719     } else {
720         R1 = -usize; 
721     }
722
723     jump %ENTRY_CODE(Sp(0));
724 }
725
726 cmpIntegerzh_fast
727 {
728     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
729     W_ usize, vsize, size, up, vp;
730     CInt cmp;
731
732     // paraphrased from mpz_cmp() in the GMP sources
733     usize = R1;
734     vsize = R3;
735
736     if (usize != vsize) {
737         R1 = usize - vsize; 
738         jump %ENTRY_CODE(Sp(0));
739     }
740
741     if (usize == 0) {
742         R1 = 0; 
743         jump %ENTRY_CODE(Sp(0));
744     }
745
746     if (%lt(usize,0)) { // NB. not <, which is unsigned
747         size = -usize;
748     } else {
749         size = usize;
750     }
751
752     up = BYTE_ARR_CTS(R2);
753     vp = BYTE_ARR_CTS(R4);
754
755     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size);
756
757     if (cmp == 0 :: CInt) {
758         R1 = 0; 
759         jump %ENTRY_CODE(Sp(0));
760     }
761
762     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
763         R1 = 1;
764     } else {
765         R1 = (-1); 
766     }
767     /* Result parked in R1, return via info-pointer at TOS */
768     jump %ENTRY_CODE(Sp(0));
769 }
770
771 integer2Intzh_fast
772 {
773     /* R1 = s; R2 = d */
774     W_ r, s;
775
776     s = R1;
777     if (s == 0) {
778         r = 0;
779     } else {
780         r = W_[R2 + SIZEOF_StgArrWords];
781         if (%lt(s,0)) {
782             r = -r;
783         }
784     }
785     /* Result parked in R1, return via info-pointer at TOS */
786     R1 = r;
787     jump %ENTRY_CODE(Sp(0));
788 }
789
790 integer2Wordzh_fast
791 {
792   /* R1 = s; R2 = d */
793   W_ r, s;
794
795   s = R1;
796   if (s == 0) {
797     r = 0;
798   } else {
799     r = W_[R2 + SIZEOF_StgArrWords];
800     if (%lt(s,0)) {
801         r = -r;
802     }
803   }
804   /* Result parked in R1, return via info-pointer at TOS */
805   R1 = r;
806   jump %ENTRY_CODE(Sp(0));
807 }
808
809 section "bss" {
810   exponent:  W_;
811 }
812
813 decodeFloatzh_fast
814
815     W_ p;
816     F_ arg;
817     
818     /* arguments: F1 = Float# */
819     arg = F1;
820     
821     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
822     
823     /* Be prepared to tell Lennart-coded __decodeFloat
824        where mantissa._mp_d can be put (it does not care about the rest) */
825     p = Hp - SIZEOF_StgArrWords;
826     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
827     StgArrWords_words(p) = 1;
828     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
829     
830     /* Perform the operation */
831     foreign "C" __decodeFloat(mp_tmp1,exponent,arg);
832     
833     /* returns: (Int# (expn), Int#, ByteArray#) */
834     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
835 }
836
837 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
838 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
839
840 decodeDoublezh_fast
841
842     D_ arg;
843     W_ p;
844
845     /* arguments: D1 = Double# */
846     arg = D1;
847
848     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
849     
850     /* Be prepared to tell Lennart-coded __decodeDouble
851        where mantissa.d can be put (it does not care about the rest) */
852     p = Hp - ARR_SIZE + WDS(1);
853     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
854     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
855     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
856
857     /* Perform the operation */
858     foreign "C" __decodeDouble(mp_tmp1,exponent,arg);
859     
860     /* returns: (Int# (expn), Int#, ByteArray#) */
861     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
862 }
863
864 /* -----------------------------------------------------------------------------
865  * Concurrency primitives
866  * -------------------------------------------------------------------------- */
867
868 forkzh_fast
869 {
870   /* args: R1 = closure to spark */
871   
872   MAYBE_GC(R1_PTR, forkzh_fast);
873
874   // create it right now, return ThreadID in R1
875   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
876                                    R1 "ptr");
877   foreign "C" scheduleThread(R1 "ptr");
878
879   // switch at the earliest opportunity
880   CInt[context_switch] = 1 :: CInt;
881   
882   RET_P(R1);
883 }
884
885 yieldzh_fast
886 {
887   jump stg_yield_noregs;
888 }
889
890 myThreadIdzh_fast
891 {
892   /* no args. */
893   RET_P(CurrentTSO);
894 }
895
896 labelThreadzh_fast
897 {
898   /* args: 
899         R1 = ThreadId#
900         R2 = Addr# */
901 #ifdef DEBUG
902   foreign "C" labelThread(R1 "ptr", R2 "ptr");
903 #endif
904   jump %ENTRY_CODE(Sp(0));
905 }
906
907 isCurrentThreadBoundzh_fast
908 {
909   /* no args */
910   W_ r;
911   r = foreign "C" isThreadBound(CurrentTSO);
912   RET_N(r);
913 }
914
915
916 /* -----------------------------------------------------------------------------
917  * TVar primitives
918  * -------------------------------------------------------------------------- */
919
920 #ifdef REG_R1
921 #define SP_OFF 0
922 #define IF_NOT_REG_R1(x) 
923 #else
924 #define SP_OFF 1
925 #define IF_NOT_REG_R1(x) x
926 #endif
927
928 // Catch retry frame ------------------------------------------------------------
929
930 #define CATCH_RETRY_FRAME_ERROR(label) \
931   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
932
933 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
934 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
935 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
936 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
937 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
938 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
939 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
940 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
941
942 #if MAX_VECTORED_RTN > 8
943 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
944 #endif
945
946 #if defined(PROFILING)
947 #define CATCH_RETRY_FRAME_BITMAP 7
948 #define CATCH_RETRY_FRAME_WORDS  6
949 #else
950 #define CATCH_RETRY_FRAME_BITMAP 1
951 #define CATCH_RETRY_FRAME_WORDS  4
952 #endif
953
954 INFO_TABLE_RET(stg_catch_retry_frame,
955                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
956                CATCH_RETRY_FRAME,
957                stg_catch_retry_frame_0_ret,
958                stg_catch_retry_frame_1_ret,
959                stg_catch_retry_frame_2_ret,
960                stg_catch_retry_frame_3_ret,
961                stg_catch_retry_frame_4_ret,
962                stg_catch_retry_frame_5_ret,
963                stg_catch_retry_frame_6_ret,
964                stg_catch_retry_frame_7_ret)
965 {
966    W_ r, frame, trec, outer;
967    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
968
969    frame = Sp;
970    trec = StgTSO_trec(CurrentTSO);
971    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
972    r = foreign "C" stmCommitTransaction(trec "ptr");
973    if (r) {
974      /* Succeeded (either first branch or second branch) */
975      StgTSO_trec(CurrentTSO) = outer;
976      Sp = Sp + SIZEOF_StgCatchRetryFrame;
977      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
978      jump %ENTRY_CODE(Sp(SP_OFF));
979    } else {
980      /* Did not commit: retry */
981      W_ new_trec;
982      "ptr" new_trec = foreign "C" stmStartTransaction(outer "ptr");
983      StgTSO_trec(CurrentTSO) = new_trec;
984      if (StgCatchRetryFrame_running_alt_code(frame)) {
985        R1 = StgCatchRetryFrame_alt_code(frame);
986      } else {
987        R1 = StgCatchRetryFrame_first_code(frame);
988        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
989      }
990      Sp_adj(-1);
991      jump RET_LBL(stg_ap_v);
992    }
993 }
994
995
996 // Atomically frame -------------------------------------------------------------
997
998
999 #define ATOMICALLY_FRAME_ERROR(label) \
1000   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1001
1002 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1003 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1004 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1005 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1006 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1007 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1008 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1009 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1010
1011 #if MAX_VECTORED_RTN > 8
1012 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1013 #endif
1014
1015 #if defined(PROFILING)
1016 #define ATOMICALLY_FRAME_BITMAP 7
1017 #define ATOMICALLY_FRAME_WORDS  4
1018 #else
1019 #define ATOMICALLY_FRAME_BITMAP 1
1020 #define ATOMICALLY_FRAME_WORDS  2
1021 #endif
1022
1023
1024 INFO_TABLE_RET(stg_atomically_frame,
1025                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1026                ATOMICALLY_FRAME,
1027                stg_atomically_frame_0_ret,
1028                stg_atomically_frame_1_ret,
1029                stg_atomically_frame_2_ret,
1030                stg_atomically_frame_3_ret,
1031                stg_atomically_frame_4_ret,
1032                stg_atomically_frame_5_ret,
1033                stg_atomically_frame_6_ret,
1034                stg_atomically_frame_7_ret)
1035 {
1036    W_ frame, trec, valid;
1037    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1038
1039    frame = Sp;
1040    trec = StgTSO_trec(CurrentTSO);
1041    if (StgAtomicallyFrame_waiting(frame)) {
1042      /* The TSO is currently waiting: should we stop waiting? */
1043      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1044      if (valid) {
1045        /* Previous attempt is still valid: no point trying again yet */
1046           IF_NOT_REG_R1(Sp_adj(-2);
1047                         Sp(1) = stg_NO_FINALIZER_closure;
1048                         Sp(0) = stg_ut_1_0_unreg_info;)
1049        jump stg_block_noregs;
1050      } else {
1051        /* Previous attempt is no longer valid: try again */
1052        "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");
1053        StgTSO_trec(CurrentTSO) = trec;
1054        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1055        R1 = StgAtomicallyFrame_code(frame);
1056        Sp_adj(-1);
1057        jump RET_LBL(stg_ap_v);
1058      }
1059    } else {
1060      /* The TSO is not currently waiting: try to commit the transaction */
1061      valid = foreign "C" stmCommitTransaction(trec "ptr");
1062      if (valid) {
1063        /* Transaction was valid: commit succeeded */
1064        StgTSO_trec(CurrentTSO) = NO_TREC;
1065        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1066        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1067        jump %ENTRY_CODE(Sp(SP_OFF));
1068      } else {
1069        /* Transaction was not valid: try again */
1070        "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");
1071        StgTSO_trec(CurrentTSO) = trec;
1072        R1 = StgAtomicallyFrame_code(frame);
1073        Sp_adj(-1);
1074        jump RET_LBL(stg_ap_v);
1075      }
1076    }
1077 }
1078
1079
1080 // STM catch frame --------------------------------------------------------------
1081
1082 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1083    label                                                   \
1084    {                                                       \
1085       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1086       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1087       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1088       jump ret;                                            \
1089    }
1090
1091 #ifdef REG_R1
1092 #define SP_OFF 0
1093 #else
1094 #define SP_OFF 1
1095 #endif
1096
1097 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1098 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1099 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1100 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1101 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1102 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1103 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1104 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1105
1106 #if MAX_VECTORED_RTN > 8
1107 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1108 #endif
1109
1110 #if defined(PROFILING)
1111 #define CATCH_STM_FRAME_BITMAP 3
1112 #define CATCH_STM_FRAME_WORDS  3
1113 #else
1114 #define CATCH_STM_FRAME_BITMAP 0
1115 #define CATCH_STM_FRAME_WORDS  1
1116 #endif
1117
1118 /* Catch frames are very similar to update frames, but when entering
1119  * one we just pop the frame off the stack and perform the correct
1120  * kind of return to the activation record underneath us on the stack.
1121  */
1122
1123 INFO_TABLE_RET(stg_catch_stm_frame,
1124                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1125                CATCH_STM_FRAME,
1126                stg_catch_stm_frame_0_ret,
1127                stg_catch_stm_frame_1_ret,
1128                stg_catch_stm_frame_2_ret,
1129                stg_catch_stm_frame_3_ret,
1130                stg_catch_stm_frame_4_ret,
1131                stg_catch_stm_frame_5_ret,
1132                stg_catch_stm_frame_6_ret,
1133                stg_catch_stm_frame_7_ret)
1134 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1135
1136
1137 // Primop definition ------------------------------------------------------------
1138
1139 atomicallyzh_fast
1140 {
1141   W_ frame;
1142   W_ old_trec;
1143   W_ new_trec;
1144   
1145   /* Args: R1 = m :: STM a */
1146   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1147
1148   /* Set up the atomically frame */
1149   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1150   frame = Sp;
1151
1152   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1153   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1154   StgAtomicallyFrame_code(frame) = R1;
1155
1156   /* Start the memory transcation */
1157   old_trec = StgTSO_trec(CurrentTSO);
1158   "ptr" new_trec = foreign "C" stmStartTransaction(old_trec "ptr");
1159   StgTSO_trec(CurrentTSO) = new_trec;
1160
1161   /* Apply R1 to the realworld token */
1162   Sp_adj(-1);
1163   jump RET_LBL(stg_ap_v);
1164 }
1165
1166
1167 catchSTMzh_fast
1168 {
1169   W_ frame;
1170   
1171   /* Args: R1 :: STM a */
1172   /* Args: R2 :: Exception -> STM a */
1173   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1174
1175   /* Set up the catch frame */
1176   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1177   frame = Sp;
1178
1179   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1180   StgCatchSTMFrame_handler(frame) = R2;
1181
1182   /* Apply R1 to the realworld token */
1183   Sp_adj(-1);
1184   jump RET_LBL(stg_ap_v);
1185 }
1186
1187
1188 catchRetryzh_fast
1189 {
1190   W_ frame;
1191   W_ new_trec;
1192   W_ trec;
1193
1194   /* Args: R1 :: STM a */
1195   /* Args: R2 :: STM a */
1196   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1197
1198   /* Start a nested transaction within which to run the first code */
1199   trec = StgTSO_trec(CurrentTSO);
1200   "ptr" new_trec = foreign "C" stmStartTransaction(trec "ptr");
1201   StgTSO_trec(CurrentTSO) = new_trec;
1202
1203   /* Set up the catch-retry frame */
1204   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1205   frame = Sp;
1206   
1207   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1208   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1209   StgCatchRetryFrame_first_code(frame) = R1;
1210   StgCatchRetryFrame_alt_code(frame) = R2;
1211   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1212
1213   /* Apply R1 to the realworld token */
1214   Sp_adj(-1);
1215   jump RET_LBL(stg_ap_v);  
1216 }
1217
1218
1219 retryzh_fast
1220 {
1221   W_ frame_type;
1222   W_ frame;
1223   W_ trec;
1224   W_ outer;
1225   W_ r;
1226
1227   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1228
1229   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1230 retry_pop_stack:
1231   trec = StgTSO_trec(CurrentTSO);
1232   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1233   StgTSO_sp(CurrentTSO) = Sp;
1234   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1235   Sp = StgTSO_sp(CurrentTSO);
1236   frame = Sp;
1237
1238   if (frame_type == CATCH_RETRY_FRAME) {
1239     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1240     ASSERT(outer != NO_TREC);
1241     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1242       // Retry in the first code: try the alternative
1243       "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1244       StgTSO_trec(CurrentTSO) = trec;
1245       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1246       R1 = StgCatchRetryFrame_alt_code(frame);
1247       Sp_adj(-1);
1248       jump RET_LBL(stg_ap_v);
1249     } else {
1250       // Retry in the alternative code: propagate
1251       W_ other_trec;
1252       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1253       r = foreign "C" stmMergeForWaiting(trec "ptr", other_trec "ptr");
1254       if (r) {
1255         r = foreign "C" stmCommitTransaction(trec "ptr");
1256       }
1257       if (r) {
1258         // Merge between siblings succeeded: commit it back to enclosing transaction
1259         // and then propagate the retry
1260         StgTSO_trec(CurrentTSO) = outer;
1261         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1262         goto retry_pop_stack;
1263       } else {
1264         // Merge failed: we musn't propagate the retry.  Try both paths again.
1265         "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1266         StgCatchRetryFrame_first_code_trec(frame) = trec;
1267         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1268         StgTSO_trec(CurrentTSO) = trec;
1269         R1 = StgCatchRetryFrame_first_code(frame);
1270         Sp_adj(-1);
1271         jump RET_LBL(stg_ap_v);
1272       }
1273     }
1274   }
1275
1276   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1277   ASSERT(frame_type == ATOMICALLY_FRAME);
1278   ASSERT(outer == NO_TREC);
1279   r = foreign "C" stmWait(CurrentTSO "ptr", trec "ptr");
1280   if (r) {
1281     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1282     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1283     Sp = frame;
1284     // Fix up the stack in the unregisterised case: the return convention is different.
1285     IF_NOT_REG_R1(Sp_adj(-2); 
1286                   Sp(1) = stg_NO_FINALIZER_closure;
1287                   Sp(0) = stg_ut_1_0_unreg_info;)
1288     jump stg_block_noregs;
1289   } else {
1290     // Transaction was not valid: retry immediately
1291     "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1292     StgTSO_trec(CurrentTSO) = trec;
1293     R1 = StgAtomicallyFrame_code(frame);
1294     Sp = frame;
1295     Sp_adj(-1);
1296     jump RET_LBL(stg_ap_v);
1297   }
1298 }
1299
1300
1301 newTVarzh_fast
1302 {
1303   W_ tv;
1304
1305   /* Args: R1 = initialisation value */
1306
1307   ALLOC_PRIM( SIZEOF_StgTVar, R1_PTR, newTVarzh_fast);
1308   tv = Hp - SIZEOF_StgTVar + WDS(1);
1309   SET_HDR(tv,stg_TVAR_info,W_[CCCS]);
1310   StgTVar_current_value(tv) = R1;
1311   StgTVar_first_wait_queue_entry(tv) = stg_END_STM_WAIT_QUEUE_closure;
1312     
1313   RET_P(tv);
1314 }
1315
1316
1317 readTVarzh_fast
1318 {
1319   W_ trec;
1320   W_ tvar;
1321   W_ result;
1322
1323   /* Args: R1 = TVar closure */
1324
1325   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1326   trec = StgTSO_trec(CurrentTSO);
1327   tvar = R1;
1328   "ptr" result = foreign "C" stmReadTVar(trec "ptr", tvar "ptr");
1329
1330   RET_P(result);
1331 }
1332
1333
1334 writeTVarzh_fast
1335 {
1336   W_ trec;
1337   W_ tvar;
1338   W_ new_value;
1339   
1340   /* Args: R1 = TVar closure */
1341   /*       R2 = New value    */
1342
1343   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1344   trec = StgTSO_trec(CurrentTSO);
1345   tvar = R1;
1346   new_value = R2;
1347   foreign "C" stmWriteTVar(trec "ptr", tvar "ptr", new_value "ptr");
1348
1349   jump %ENTRY_CODE(Sp(0));
1350 }
1351
1352
1353 /* -----------------------------------------------------------------------------
1354  * MVar primitives
1355  *
1356  * take & putMVar work as follows.  Firstly, an important invariant:
1357  *
1358  *    If the MVar is full, then the blocking queue contains only
1359  *    threads blocked on putMVar, and if the MVar is empty then the
1360  *    blocking queue contains only threads blocked on takeMVar.
1361  *
1362  * takeMvar:
1363  *    MVar empty : then add ourselves to the blocking queue
1364  *    MVar full  : remove the value from the MVar, and
1365  *                 blocking queue empty     : return
1366  *                 blocking queue non-empty : perform the first blocked putMVar
1367  *                                            from the queue, and wake up the
1368  *                                            thread (MVar is now full again)
1369  *
1370  * putMVar is just the dual of the above algorithm.
1371  *
1372  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1373  * the stack of the thread waiting to do the putMVar.  See
1374  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1375  * the stack layout, and the PerformPut and PerformTake macros below.
1376  *
1377  * It is important that a blocked take or put is woken up with the
1378  * take/put already performed, because otherwise there would be a
1379  * small window of vulnerability where the thread could receive an
1380  * exception and never perform its take or put, and we'd end up with a
1381  * deadlock.
1382  *
1383  * -------------------------------------------------------------------------- */
1384
1385 isEmptyMVarzh_fast
1386 {
1387     /* args: R1 = MVar closure */
1388
1389     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1390         RET_N(1);
1391     } else {
1392         RET_N(0);
1393     }
1394 }
1395
1396 newMVarzh_fast
1397 {
1398     /* args: none */
1399     W_ mvar;
1400
1401     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1402   
1403     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1404     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1405     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1406     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1407     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1408     RET_P(mvar);
1409 }
1410
1411
1412 /* If R1 isn't available, pass it on the stack */
1413 #ifdef REG_R1
1414 #define PerformTake(tso, value)                         \
1415     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1416     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1417 #else
1418 #define PerformTake(tso, value)                                 \
1419     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1420     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1421 #endif
1422
1423 #define PerformPut(tso,lval)                    \
1424     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1425     lval = W_[StgTSO_sp(tso) - WDS(1)];
1426
1427
1428 takeMVarzh_fast
1429 {
1430     W_ mvar, val, info, tso;
1431
1432     /* args: R1 = MVar closure */
1433     mvar = R1;
1434
1435     info = GET_INFO(mvar);
1436
1437     /* If the MVar is empty, put ourselves on its blocking queue,
1438      * and wait until we're woken up.
1439      */
1440     if (info == stg_EMPTY_MVAR_info) {
1441         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1442             StgMVar_head(mvar) = CurrentTSO;
1443         } else {
1444             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1445         }
1446         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1447         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1448         StgTSO_block_info(CurrentTSO)  = mvar;
1449         StgMVar_tail(mvar) = CurrentTSO;
1450         
1451         jump stg_block_takemvar;
1452   }
1453
1454   /* we got the value... */
1455   val = StgMVar_value(mvar);
1456
1457   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1458   {
1459       /* There are putMVar(s) waiting... 
1460        * wake up the first thread on the queue
1461        */
1462       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1463
1464       /* actually perform the putMVar for the thread that we just woke up */
1465       tso = StgMVar_head(mvar);
1466       PerformPut(tso,StgMVar_value(mvar));
1467
1468 #if defined(GRAN) || defined(PAR)
1469       /* ToDo: check 2nd arg (mvar) is right */
1470       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar);
1471       StgMVar_head(mvar) = tso;
1472 #else
1473       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1474       StgMVar_head(mvar) = tso;
1475 #endif
1476       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1477           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1478       }
1479       RET_P(val);
1480   } 
1481   else
1482   {
1483       /* No further putMVars, MVar is now empty */
1484       
1485       /* do this last... we might have locked the MVar in the SMP case,
1486        * and writing the info pointer will unlock it.
1487        */
1488       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1489       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1490       RET_P(val);
1491   }
1492 }
1493
1494
1495 tryTakeMVarzh_fast
1496 {
1497     W_ mvar, val, info, tso;
1498
1499     /* args: R1 = MVar closure */
1500
1501     mvar = R1;
1502
1503     info = GET_INFO(mvar);
1504
1505     if (info == stg_EMPTY_MVAR_info) {
1506         /* HACK: we need a pointer to pass back, 
1507          * so we abuse NO_FINALIZER_closure
1508          */
1509         RET_NP(0, stg_NO_FINALIZER_closure);
1510     }
1511
1512     /* we got the value... */
1513     val = StgMVar_value(mvar);
1514
1515     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1516         /* There are putMVar(s) waiting... 
1517          * wake up the first thread on the queue
1518          */
1519         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1520
1521         /* actually perform the putMVar for the thread that we just woke up */
1522         tso = StgMVar_head(mvar);
1523         PerformPut(tso,StgMVar_value(mvar));
1524
1525 #if defined(GRAN) || defined(PAR)
1526         /* ToDo: check 2nd arg (mvar) is right */
1527         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr");
1528         StgMVar_head(mvar) = tso;
1529 #else
1530         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1531         StgMVar_head(mvar) = tso;
1532 #endif
1533
1534         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1535             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1536         }
1537     }
1538     else 
1539     {
1540         /* No further putMVars, MVar is now empty */
1541         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1542         
1543         /* do this last... we might have locked the MVar in the SMP case,
1544          * and writing the info pointer will unlock it.
1545          */
1546         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1547     }
1548     
1549     RET_NP(1, val);
1550 }
1551
1552
1553 putMVarzh_fast
1554 {
1555     W_ mvar, info, tso;
1556
1557     /* args: R1 = MVar, R2 = value */
1558     mvar = R1;
1559
1560     info = GET_INFO(mvar);
1561
1562     if (info == stg_FULL_MVAR_info) {
1563         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1564             StgMVar_head(mvar) = CurrentTSO;
1565         } else {
1566             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1567         }
1568         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1569         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1570         StgTSO_block_info(CurrentTSO)  = mvar;
1571         StgMVar_tail(mvar) = CurrentTSO;
1572         
1573         jump stg_block_putmvar;
1574     }
1575   
1576     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1577         /* There are takeMVar(s) waiting: wake up the first one
1578          */
1579         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1580
1581         /* actually perform the takeMVar */
1582         tso = StgMVar_head(mvar);
1583         PerformTake(tso, R2);
1584       
1585 #if defined(GRAN) || defined(PAR)
1586         /* ToDo: check 2nd arg (mvar) is right */
1587         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1588         StgMVar_head(mvar) = tso;
1589 #else
1590         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1591         StgMVar_head(mvar) = tso;
1592 #endif
1593
1594         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1595             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1596         }
1597
1598         jump %ENTRY_CODE(Sp(0));
1599     }
1600     else
1601     {
1602         /* No further takes, the MVar is now full. */
1603         StgMVar_value(mvar) = R2;
1604         /* unlocks the MVar in the SMP case */
1605         SET_INFO(mvar,stg_FULL_MVAR_info);
1606         jump %ENTRY_CODE(Sp(0));
1607     }
1608     
1609     /* ToDo: yield afterward for better communication performance? */
1610 }
1611
1612
1613 tryPutMVarzh_fast
1614 {
1615     W_ mvar, info, tso;
1616
1617     /* args: R1 = MVar, R2 = value */
1618     mvar = R1;
1619
1620     info = GET_INFO(mvar);
1621
1622     if (info == stg_FULL_MVAR_info) {
1623         RET_N(0);
1624     }
1625   
1626     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1627         /* There are takeMVar(s) waiting: wake up the first one
1628          */
1629         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1630         
1631         /* actually perform the takeMVar */
1632         tso = StgMVar_head(mvar);
1633         PerformTake(tso, R2);
1634       
1635 #if defined(GRAN) || defined(PAR)
1636         /* ToDo: check 2nd arg (mvar) is right */
1637         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1638         StgMVar_head(mvar) = tso;
1639 #else
1640         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1641         StgMVar_head(mvar) = tso;
1642 #endif
1643
1644         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1645             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1646         }
1647
1648         jump %ENTRY_CODE(Sp(0));
1649     }
1650     else
1651     {
1652         /* No further takes, the MVar is now full. */
1653         StgMVar_value(mvar) = R2;
1654         /* unlocks the MVar in the SMP case */
1655         SET_INFO(mvar,stg_FULL_MVAR_info);
1656         jump %ENTRY_CODE(Sp(0));
1657     }
1658     
1659     /* ToDo: yield afterward for better communication performance? */
1660 }
1661
1662
1663 /* -----------------------------------------------------------------------------
1664    Stable pointer primitives
1665    -------------------------------------------------------------------------  */
1666
1667 makeStableNamezh_fast
1668 {
1669     W_ index, sn_obj;
1670
1671     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1672   
1673     index = foreign "C" lookupStableName(R1 "ptr");
1674
1675     /* Is there already a StableName for this heap object?
1676      *  stable_ptr_table is an array of snEntry structs.
1677      */
1678     if ( snEntry_sn_obj(stable_ptr_table + index*SIZEOF_snEntry) == NULL ) {
1679         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1680         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1681         StgStableName_sn(sn_obj) = index;
1682         snEntry_sn_obj(stable_ptr_table + index*SIZEOF_snEntry) = sn_obj;
1683     } else {
1684         sn_obj = snEntry_sn_obj(stable_ptr_table + index*SIZEOF_snEntry);
1685     }
1686     
1687     RET_P(sn_obj);
1688 }
1689
1690
1691 makeStablePtrzh_fast
1692 {
1693     /* Args: R1 = a */
1694     W_ sp;
1695     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1696     "ptr" sp = foreign "C" getStablePtr(R1 "ptr");
1697     RET_N(sp);
1698 }
1699
1700 deRefStablePtrzh_fast
1701 {
1702     /* Args: R1 = the stable ptr */
1703     W_ r, sp;
1704     sp = R1;
1705     r = snEntry_addr(stable_ptr_table + sp*SIZEOF_snEntry);
1706     RET_P(r);
1707 }
1708
1709 /* -----------------------------------------------------------------------------
1710    Bytecode object primitives
1711    -------------------------------------------------------------------------  */
1712
1713 newBCOzh_fast
1714 {
1715     /* R1 = instrs
1716        R2 = literals
1717        R3 = ptrs
1718        R4 = itbls
1719        R5 = arity
1720        R6 = bitmap array
1721     */
1722     W_ bco, bitmap_arr, bytes, words;
1723     
1724     bitmap_arr = R6;
1725     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1726     bytes = WDS(words);
1727
1728     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1729
1730     bco = Hp - bytes + WDS(1);
1731     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1732     
1733     StgBCO_instrs(bco)     = R1;
1734     StgBCO_literals(bco)   = R2;
1735     StgBCO_ptrs(bco)       = R3;
1736     StgBCO_itbls(bco)      = R4;
1737     StgBCO_arity(bco)      = HALF_W_(R5);
1738     StgBCO_size(bco)       = HALF_W_(words);
1739     
1740     // Copy the arity/bitmap info into the BCO
1741     W_ i;
1742     i = 0;
1743 for:
1744     if (i < StgArrWords_words(bitmap_arr)) {
1745         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1746         i = i + 1;
1747         goto for;
1748     }
1749     
1750     RET_P(bco);
1751 }
1752
1753
1754 mkApUpd0zh_fast
1755 {
1756     // R1 = the BCO# for the AP
1757     //  
1758     W_ ap;
1759
1760     // This function is *only* used to wrap zero-arity BCOs in an
1761     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1762     // saturated and always points directly to a FUN or BCO.
1763     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1764            StgBCO_arity(R1) == HALF_W_(0));
1765
1766     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1767     TICK_ALLOC_UP_THK(0, 0);
1768     CCCS_ALLOC(SIZEOF_StgAP);
1769
1770     ap = Hp - SIZEOF_StgAP + WDS(1);
1771     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1772     
1773     StgAP_n_args(ap) = HALF_W_(0);
1774     StgAP_fun(ap) = R1;
1775     
1776     RET_P(ap);
1777 }
1778
1779 /* -----------------------------------------------------------------------------
1780    Thread I/O blocking primitives
1781    -------------------------------------------------------------------------- */
1782
1783 /* Add a thread to the end of the blocked queue. (C-- version of the C
1784  * macro in Schedule.h).
1785  */
1786 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1787     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1788     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1789       W_[blocked_queue_hd] = tso;                       \
1790     } else {                                            \
1791       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1792     }                                                   \
1793     W_[blocked_queue_tl] = tso;
1794
1795 waitReadzh_fast
1796 {
1797     /* args: R1 */
1798 #ifdef THREADED_RTS
1799     foreign "C" barf("waitRead# on threaded RTS");
1800 #endif
1801
1802     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1803     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1804     StgTSO_block_info(CurrentTSO) = R1;
1805     // No locking - we're not going to use this interface in the
1806     // threaded RTS anyway.
1807     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1808     jump stg_block_noregs;
1809 }
1810
1811 waitWritezh_fast
1812 {
1813     /* args: R1 */
1814 #ifdef THREADED_RTS
1815     foreign "C" barf("waitWrite# on threaded RTS");
1816 #endif
1817
1818     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1819     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1820     StgTSO_block_info(CurrentTSO) = R1;
1821     // No locking - we're not going to use this interface in the
1822     // threaded RTS anyway.
1823     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1824     jump stg_block_noregs;
1825 }
1826
1827
1828 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1829 delayzh_fast
1830 {
1831 #ifdef mingw32_HOST_OS
1832     W_ ares;
1833     CInt reqID;
1834 #else
1835     W_ t, prev, target;
1836 #endif
1837
1838 #ifdef THREADED_RTS
1839     foreign "C" barf("delay# on threaded RTS");
1840 #endif
1841
1842     /* args: R1 (microsecond delay amount) */
1843     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1844     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1845
1846 #ifdef mingw32_HOST_OS
1847
1848     /* could probably allocate this on the heap instead */
1849     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1850                                             stg_delayzh_malloc_str);
1851     reqID = foreign "C" addDelayRequest(R1);
1852     StgAsyncIOResult_reqID(ares)   = reqID;
1853     StgAsyncIOResult_len(ares)     = 0;
1854     StgAsyncIOResult_errCode(ares) = 0;
1855     StgTSO_block_info(CurrentTSO)  = ares;
1856
1857     /* Having all async-blocked threads reside on the blocked_queue
1858      * simplifies matters, so change the status to OnDoProc put the
1859      * delayed thread on the blocked_queue.
1860      */
1861     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1862     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1863     jump stg_block_async_void;
1864
1865 #else
1866
1867     W_ time;
1868     time = foreign "C" getourtimeofday();
1869     target = (R1 / (TICK_MILLISECS*1000)) + time;
1870     StgTSO_block_info(CurrentTSO) = target;
1871
1872     /* Insert the new thread in the sleeping queue. */
1873     prev = NULL;
1874     t = W_[sleeping_queue];
1875 while:
1876     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1877         prev = t;
1878         t = StgTSO_link(t);
1879         goto while;
1880     }
1881
1882     StgTSO_link(CurrentTSO) = t;
1883     if (prev == NULL) {
1884         W_[sleeping_queue] = CurrentTSO;
1885     } else {
1886         StgTSO_link(prev) = CurrentTSO;
1887     }
1888     jump stg_block_noregs;
1889 #endif
1890 }
1891
1892
1893 #ifdef mingw32_HOST_OS
1894 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1895 asyncReadzh_fast
1896 {
1897     W_ ares;
1898     CInt reqID;
1899
1900 #ifdef THREADED_RTS
1901     foreign "C" barf("asyncRead# on threaded RTS");
1902 #endif
1903
1904     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1905     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1906     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1907
1908     /* could probably allocate this on the heap instead */
1909     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1910                                             stg_asyncReadzh_malloc_str);
1911     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1912     StgAsyncIOResult_reqID(ares)   = reqID;
1913     StgAsyncIOResult_len(ares)     = 0;
1914     StgAsyncIOResult_errCode(ares) = 0;
1915     StgTSO_block_info(CurrentTSO)  = ares;
1916     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1917     jump stg_block_async;
1918 }
1919
1920 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1921 asyncWritezh_fast
1922 {
1923     W_ ares;
1924     CInt reqID;
1925
1926 #ifdef THREADED_RTS
1927     foreign "C" barf("asyncWrite# on threaded RTS");
1928 #endif
1929
1930     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1931     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1932     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1933
1934     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1935                                             stg_asyncWritezh_malloc_str);
1936     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
1937
1938     StgAsyncIOResult_reqID(ares)   = reqID;
1939     StgAsyncIOResult_len(ares)     = 0;
1940     StgAsyncIOResult_errCode(ares) = 0;
1941     StgTSO_block_info(CurrentTSO)  = ares;
1942     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1943     jump stg_block_async;
1944 }
1945
1946 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
1947 asyncDoProczh_fast
1948 {
1949     W_ ares;
1950     CInt reqID;
1951
1952     /* args: R1 = proc, R2 = param */
1953     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1954     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1955
1956     /* could probably allocate this on the heap instead */
1957     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1958                                             stg_asyncDoProczh_malloc_str);
1959     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
1960     StgAsyncIOResult_reqID(ares)   = reqID;
1961     StgAsyncIOResult_len(ares)     = 0;
1962     StgAsyncIOResult_errCode(ares) = 0;
1963     StgTSO_block_info(CurrentTSO) = ares;
1964     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1965     jump stg_block_async;
1966 }
1967 #endif
1968
1969 /* -----------------------------------------------------------------------------
1970   ** temporary **
1971
1972    classes CCallable and CReturnable don't really exist, but the
1973    compiler insists on generating dictionaries containing references
1974    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
1975    for these.  Some C compilers can't cope with zero-length static arrays,
1976    so we have to make these one element long.
1977   --------------------------------------------------------------------------- */
1978
1979 section "rodata" {
1980   GHC_ZCCCallable_static_info:   W_ 0;
1981 }
1982
1983 section "rodata" {
1984   GHC_ZCCReturnable_static_info: W_ 0;
1985 }