e50b17f2c8b1651e673a27abf3bb7da4c7481ca8
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocate(words);
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words);
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocate(words);
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
122
123   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
124   //
125   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
126   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
127   // it on the mutable list for the GC to remove (removing something from
128   // the mutable list is not easy, because the mut_list is only singly-linked).
129   // 
130   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
131   // either it is on a mut_list, or it isn't.  We adopt the convention that
132   // the mut_link field is NULL if it isn't on a mut_list, and the GC
133   // maintains this invariant.
134   //
135   if (StgMutClosure_mut_link(R1) == NULL) {
136         foreign "C" recordMutable(R1 "ptr");
137   }
138
139   RET_P(R1);
140 }
141
142 /* -----------------------------------------------------------------------------
143    MutVar primitives
144    -------------------------------------------------------------------------- */
145
146 newMutVarzh_fast
147 {
148     W_ mv;
149     /* Args: R1 = initialisation value */
150
151     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
152
153     mv = Hp - SIZEOF_StgMutVar + WDS(1);
154     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
155     StgMutVar_var(mv) = R1;
156     
157     RET_P(mv);
158 }
159
160 atomicModifyMutVarzh_fast
161 {
162     W_ mv, z, x, y, r;
163     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
164
165     /* If x is the current contents of the MutVar#, then 
166        We want to make the new contents point to
167
168          (sel_0 (f x))
169  
170        and the return value is
171          
172          (sel_1 (f x))
173
174         obviously we can share (f x).
175
176          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
177          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
178          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
179     */
180
181 #if MIN_UPD_SIZE > 1
182 #define THUNK_1_SIZE (SIZEOF_StgHeader + WDS(MIN_UPD_SIZE))
183 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
184 #else
185 #define THUNK_1_SIZE (SIZEOF_StgHeader + WDS(1))
186 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
187 #endif
188
189 #if MIN_UPD_SIZE > 2
190 #define THUNK_2_SIZE (SIZEOF_StgHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
192 #else
193 #define THUNK_2_SIZE (SIZEOF_StgHeader + WDS(2))
194 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
195 #endif
196
197 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
198
199    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
200
201    x = StgMutVar_var(R1);
202
203    TICK_ALLOC_THUNK_2();
204    CCCS_ALLOC(THUNK_2_SIZE);
205    z = Hp - THUNK_2_SIZE + WDS(1);
206    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
207    LDV_RECORD_CREATE(z);
208    StgClosure_payload(z,0) = R2;
209    StgClosure_payload(z,1) = x;
210
211    TICK_ALLOC_THUNK_1();
212    CCCS_ALLOC(THUNK_1_SIZE);
213    y = z - THUNK_1_SIZE;
214    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(y);
216    StgClosure_payload(y,0) = z;
217
218    StgMutVar_var(R1) = y;
219
220    TICK_ALLOC_THUNK_1();
221    CCCS_ALLOC(THUNK_1_SIZE);
222    r = y - THUNK_1_SIZE;
223    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
224    LDV_RECORD_CREATE(r);
225    StgClosure_payload(r,0) = z;
226
227    RET_P(r);
228 }
229
230 /* -----------------------------------------------------------------------------
231    Foreign Object Primitives
232    -------------------------------------------------------------------------- */
233
234 mkForeignObjzh_fast
235 {
236   /* R1 = ptr to foreign object,
237   */
238   W_ result;
239
240   ALLOC_PRIM( SIZEOF_StgForeignObj, NO_PTRS, mkForeignObjzh_fast);
241
242   result = Hp - SIZEOF_StgForeignObj + WDS(1);
243   SET_HDR(result,stg_FOREIGN_info,W_[CCCS]);
244   StgForeignObj_data(result) = R1;
245
246   /* returns (# s#, ForeignObj# #) */
247   RET_P(result);
248 }
249
250 /* -----------------------------------------------------------------------------
251    Weak Pointer Primitives
252    -------------------------------------------------------------------------- */
253
254 STRING(stg_weak_msg,"New weak pointer at %p\n")
255
256 mkWeakzh_fast
257 {
258   /* R1 = key
259      R2 = value
260      R3 = finalizer (or NULL)
261   */
262   W_ w;
263
264   if (R3 == NULL) {
265     R3 = stg_NO_FINALIZER_closure;
266   }
267
268   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
269
270   w = Hp - SIZEOF_StgWeak + WDS(1);
271   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
272
273   StgWeak_key(w)       = R1;
274   StgWeak_value(w)     = R2;
275   StgWeak_finalizer(w) = R3;
276
277   StgWeak_link(w)       = W_[weak_ptr_list];
278   W_[weak_ptr_list]     = w;
279
280   IF_DEBUG(weak, foreign "C" fprintf(stderr,stg_weak_msg,w));
281
282   RET_P(w);
283 }
284
285
286 finalizzeWeakzh_fast
287 {
288   /* R1 = weak ptr
289    */
290   W_ w, f;
291
292   w = R1;
293
294   // already dead?
295   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
296       RET_NP(0,stg_NO_FINALIZER_closure);
297   }
298
299   // kill it
300 #ifdef PROFILING
301   // @LDV profiling
302   // A weak pointer is inherently used, so we do not need to call
303   // LDV_recordDead_FILL_SLOP_DYNAMIC():
304   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
305   // or, LDV_recordDead():
306   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
307   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
308   // large as weak pointers, so there is no need to fill the slop, either.
309   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
310 #endif
311
312   //
313   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
314   //
315   SET_INFO(w,stg_DEAD_WEAK_info);
316   LDV_RECORD_CREATE(w);
317
318   f = StgWeak_finalizer(w);
319   StgDeadWeak_link(w) = StgWeak_link(w);
320
321   /* return the finalizer */
322   if (f == stg_NO_FINALIZER_closure) {
323       RET_NP(0,stg_NO_FINALIZER_closure);
324   } else {
325       RET_NP(1,f);
326   }
327 }
328
329 deRefWeakzh_fast
330 {
331   /* R1 = weak ptr */
332   W_ w, code, val;
333
334   w = R1;
335   if (GET_INFO(w) == stg_WEAK_info) {
336     code = 1;
337     val = StgWeak_value(w);
338   } else {
339     code = 0;
340     val = w;
341   }
342   RET_NP(code,val);
343 }
344
345 /* -----------------------------------------------------------------------------
346    Arbitrary-precision Integer operations.
347
348    There are some assumptions in this code that mp_limb_t == W_.  This is
349    the case for all the platforms that GHC supports, currently.
350    -------------------------------------------------------------------------- */
351
352 int2Integerzh_fast
353 {
354    /* arguments: R1 = Int# */
355
356    W_ val, s, p;        /* to avoid aliasing */
357
358    val = R1;
359    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
360
361    p = Hp - SIZEOF_StgArrWords;
362    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
363    StgArrWords_words(p) = 1;
364
365    /* mpz_set_si is inlined here, makes things simpler */
366    if (%lt(val,0)) { 
367         s  = -1;
368         Hp(0) = -val;
369    } else { 
370      if (%gt(val,0)) {
371         s = 1;
372         Hp(0) = val;
373      } else {
374         s = 0;
375      }
376   }
377
378    /* returns (# size  :: Int#, 
379                  data  :: ByteArray# 
380                #)
381    */
382    RET_NP(s,p);
383 }
384
385 word2Integerzh_fast
386 {
387    /* arguments: R1 = Word# */
388
389    W_ val, s, p;        /* to avoid aliasing */
390
391    val = R1;
392
393    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
394
395    p = Hp - SIZEOF_StgArrWords;
396    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
397    StgArrWords_words(p) = 1;
398
399    if (val != 0) {
400         s = 1;
401         W_[Hp] = val;
402    } else {
403         s = 0;
404    }
405
406    /* returns (# size  :: Int#, 
407                  data  :: ByteArray# #)
408    */
409    RET_NP(s,p);
410 }
411
412
413 /*
414  * 'long long' primops for converting to/from Integers.
415  */
416
417 #ifdef SUPPORT_LONG_LONGS
418
419 int64ToIntegerzh_fast
420 {
421    /* arguments: L1 = Int64# */
422
423    L_ val;
424    W_ hi, s, neg, words_needed, p;
425
426    val = L1;
427    neg = 0;
428
429    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
430        words_needed = 2;
431    } else { 
432        // minimum is one word
433        words_needed = 1;
434    }
435
436    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
437                NO_PTRS, int64ToIntegerzh_fast );
438
439    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
440    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
441    StgArrWords_words(p) = words_needed;
442
443    if ( %lt(val,0::L_) ) {
444      neg = 1;
445      val = -val;
446    }
447
448    hi = TO_W_(val >> 32);
449
450    if ( words_needed == 2 )  { 
451       s = 2;
452       Hp(-1) = TO_W_(val);
453       Hp(0) = hi;
454    } else { 
455        if ( val != 0::L_ ) {
456            s = 1;
457            Hp(0) = TO_W_(val);
458        } else /* val==0 */  {
459            s = 0;
460        }
461    }
462    if ( neg != 0 ) {
463         s = -s;
464    }
465
466    /* returns (# size  :: Int#, 
467                  data  :: ByteArray# #)
468    */
469    RET_NP(s,p);
470 }
471
472 word64ToIntegerzh_fast
473 {
474    /* arguments: L1 = Word64# */
475
476    L_ val;
477    W_ hi, s, words_needed, p;
478
479    val = L1;
480    if ( val >= 0x100000000::L_ ) {
481       words_needed = 2;
482    } else {
483       words_needed = 1;
484    }
485
486    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
487                NO_PTRS, word64ToIntegerzh_fast );
488
489    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
490    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
491    StgArrWords_words(p) = words_needed;
492
493    hi = TO_W_(val >> 32);
494    if ( val >= 0x100000000::L_ ) { 
495      s = 2;
496      Hp(-1) = TO_W_(val);
497      Hp(0)  = hi;
498    } else {
499       if ( val != 0::L_ ) {
500         s = 1;
501         Hp(0) = TO_W_(val);
502      } else /* val==0 */  {
503       s = 0;
504      }
505   }
506
507    /* returns (# size  :: Int#, 
508                  data  :: ByteArray# #)
509    */
510    RET_NP(s,p);
511 }
512
513
514 #endif /* SUPPORT_LONG_LONGS */
515
516 /* ToDo: this is shockingly inefficient */
517
518 section "bss" {
519   mp_tmp1:
520     bits8 [SIZEOF_MP_INT];
521 }
522
523 section "bss" {
524   mp_tmp2:
525     bits8 [SIZEOF_MP_INT];
526 }
527
528 section "bss" {
529   result1:
530     bits8 [SIZEOF_MP_INT];
531 }
532
533 section "bss" {
534   result2:
535     bits8 [SIZEOF_MP_INT];
536 }
537
538 #define GMP_TAKE2_RET1(name,mp_fun)                     \
539 name                                                    \
540 {                                                       \
541   CInt s1, s2;                                          \
542   W_ d1, d2;                                            \
543                                                         \
544   /* call doYouWantToGC() */                            \
545   MAYBE_GC(R2_PTR & R4_PTR, name);                      \
546                                                         \
547   s1 = W_TO_INT(R1);                                    \
548   d1 = R2;                                              \
549   s2 = W_TO_INT(R3);                                    \
550   d2 = R4;                                              \
551                                                         \
552   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));  \
553   MP_INT__mp_size(mp_tmp1)  = (s1);                     \
554   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);         \
555   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));  \
556   MP_INT__mp_size(mp_tmp2)  = (s2);                     \
557   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);         \
558                                                         \
559   foreign "C" mpz_init(result1);                        \
560                                                         \
561   /* Perform the operation */                           \
562   foreign "C" mp_fun(result1,mp_tmp1,mp_tmp2);          \
563                                                         \
564   RET_NP(TO_W_(MP_INT__mp_size(result1)),               \
565          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);   \
566 }
567
568 #define GMP_TAKE1_RET1(name,mp_fun)                             \
569 name                                                            \
570 {                                                               \
571   CInt s1;                                                      \
572   W_ d1;                                                        \
573                                                                 \
574   /* call doYouWantToGC() */                                    \
575   MAYBE_GC(R2_PTR, name);                                       \
576                                                                 \
577   d1 = R2;                                                      \
578   s1 = W_TO_INT(R1);                                            \
579                                                                 \
580   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
581   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
582   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
583                                                                 \
584   foreign "C" mpz_init(result1);                                \
585                                                                 \
586   /* Perform the operation */                                   \
587   foreign "C" mp_fun(result1,mp_tmp1);                          \
588                                                                 \
589   RET_NP(TO_W_(MP_INT__mp_size(result1)),                       \
590          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);           \
591 }
592
593 #define GMP_TAKE2_RET2(name,mp_fun)                             \
594 name                                                            \
595 {                                                               \
596   CInt s1, s2;                                                  \
597   W_ d1, d2;                                                    \
598                                                                 \
599   /* call doYouWantToGC() */                                    \
600   MAYBE_GC(R2_PTR & R4_PTR, name);                              \
601                                                                 \
602   s1 = W_TO_INT(R1);                                            \
603   d1 = R2;                                                      \
604   s2 = W_TO_INT(R3);                                            \
605   d2 = R4;                                                      \
606                                                                 \
607   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
608   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
609   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
610   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));      \
611   MP_INT__mp_size(mp_tmp2)      = (s2);                         \
612   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);             \
613                                                                 \
614   foreign "C" mpz_init(result1);                                \
615   foreign "C" mpz_init(result2);                                \
616                                                                 \
617   /* Perform the operation */                                   \
618   foreign "C" mp_fun(result1,result2,mp_tmp1,mp_tmp2);          \
619                                                                 \
620   RET_NPNP(TO_W_(MP_INT__mp_size(result1)),                     \
621            MP_INT__mp_d(result1) - SIZEOF_StgArrWords,          \
622            TO_W_(MP_INT__mp_size(result2)),                     \
623            MP_INT__mp_d(result2) - SIZEOF_StgArrWords);         \
624 }
625
626 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
627 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
628 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
629 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
630 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
631 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
632 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
633 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
634 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
635 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
636 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
637
638 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
639 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
640
641 section "bss" {
642   aa:  W_; // NB. aa is really an mp_limb_t
643 }
644
645 gcdIntzh_fast
646 {
647     /* R1 = the first Int#; R2 = the second Int# */
648     W_ r; 
649
650     W_[aa] = R1;
651     r = foreign "C" mpn_gcd_1(aa, 1, R2);
652
653     R1 = r;
654     /* Result parked in R1, return via info-pointer at TOS */
655     jump %ENTRY_CODE(Sp(0));
656 }
657
658
659 gcdIntegerIntzh_fast
660 {
661     /* R1 = s1; R2 = d1; R3 = the int */
662     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3);
663     
664     /* Result parked in R1, return via info-pointer at TOS */
665     jump %ENTRY_CODE(Sp(0));
666 }
667
668
669 cmpIntegerIntzh_fast
670 {
671     /* R1 = s1; R2 = d1; R3 = the int */
672     W_ usize, vsize, v_digit, u_digit;
673
674     usize = R1;
675     vsize = 0;
676     v_digit = R3;
677
678     // paraphrased from mpz_cmp_si() in the GMP sources
679     if (%gt(v_digit,0)) {
680         vsize = 1;
681     } else { 
682         if (%lt(v_digit,0)) {
683             vsize = -1;
684             v_digit = -v_digit;
685         }
686     }
687
688     if (usize != vsize) {
689         R1 = usize - vsize; 
690         jump %ENTRY_CODE(Sp(0));
691     }
692
693     if (usize == 0) {
694         R1 = 0; 
695         jump %ENTRY_CODE(Sp(0));
696     }
697
698     u_digit = W_[BYTE_ARR_CTS(R2)];
699
700     if (u_digit == v_digit) {
701         R1 = 0; 
702         jump %ENTRY_CODE(Sp(0));
703     }
704
705     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
706         R1 = usize; 
707     } else {
708         R1 = -usize; 
709     }
710
711     jump %ENTRY_CODE(Sp(0));
712 }
713
714 cmpIntegerzh_fast
715 {
716     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
717     W_ usize, vsize, size, up, vp;
718     CInt cmp;
719
720     // paraphrased from mpz_cmp() in the GMP sources
721     usize = R1;
722     vsize = R3;
723
724     if (usize != vsize) {
725         R1 = usize - vsize; 
726         jump %ENTRY_CODE(Sp(0));
727     }
728
729     if (usize == 0) {
730         R1 = 0; 
731         jump %ENTRY_CODE(Sp(0));
732     }
733
734     if (%lt(usize,0)) { // NB. not <, which is unsigned
735         size = -usize;
736     } else {
737         size = usize;
738     }
739
740     up = BYTE_ARR_CTS(R2);
741     vp = BYTE_ARR_CTS(R4);
742
743     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size);
744
745     if (cmp == 0 :: CInt) {
746         R1 = 0; 
747         jump %ENTRY_CODE(Sp(0));
748     }
749
750     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
751         R1 = 1;
752     } else {
753         R1 = (-1); 
754     }
755     /* Result parked in R1, return via info-pointer at TOS */
756     jump %ENTRY_CODE(Sp(0));
757 }
758
759 integer2Intzh_fast
760 {
761     /* R1 = s; R2 = d */
762     W_ r, s;
763
764     s = R1;
765     if (s == 0) {
766         r = 0;
767     } else {
768         r = W_[R2 + SIZEOF_StgArrWords];
769         if (%lt(s,0)) {
770             r = -r;
771         }
772     }
773     /* Result parked in R1, return via info-pointer at TOS */
774     R1 = r;
775     jump %ENTRY_CODE(Sp(0));
776 }
777
778 integer2Wordzh_fast
779 {
780   /* R1 = s; R2 = d */
781   W_ r, s;
782
783   s = R1;
784   if (s == 0) {
785     r = 0;
786   } else {
787     r = W_[R2 + SIZEOF_StgArrWords];
788     if (%lt(s,0)) {
789         r = -r;
790     }
791   }
792   /* Result parked in R1, return via info-pointer at TOS */
793   R1 = r;
794   jump %ENTRY_CODE(Sp(0));
795 }
796
797 section "bss" {
798   exponent:  W_;
799 }
800
801 decodeFloatzh_fast
802
803     W_ p;
804     F_ arg;
805     
806     /* arguments: F1 = Float# */
807     arg = F1;
808     
809     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
810     
811     /* Be prepared to tell Lennart-coded __decodeFloat
812        where mantissa._mp_d can be put (it does not care about the rest) */
813     p = Hp - SIZEOF_StgArrWords;
814     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
815     StgArrWords_words(p) = 1;
816     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
817     
818     /* Perform the operation */
819     foreign "C" __decodeFloat(mp_tmp1,exponent,arg);
820     
821     /* returns: (Int# (expn), Int#, ByteArray#) */
822     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
823 }
824
825 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
826 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
827
828 decodeDoublezh_fast
829
830     D_ arg;
831     W_ p;
832
833     /* arguments: D1 = Double# */
834     arg = D1;
835
836     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
837     
838     /* Be prepared to tell Lennart-coded __decodeDouble
839        where mantissa.d can be put (it does not care about the rest) */
840     p = Hp - ARR_SIZE + WDS(1);
841     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
842     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
843     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
844
845     /* Perform the operation */
846     foreign "C" __decodeDouble(mp_tmp1,exponent,arg);
847     
848     /* returns: (Int# (expn), Int#, ByteArray#) */
849     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
850 }
851
852 /* -----------------------------------------------------------------------------
853  * Concurrency primitives
854  * -------------------------------------------------------------------------- */
855
856 forkzh_fast
857 {
858   /* args: R1 = closure to spark */
859   
860   MAYBE_GC(R1_PTR, forkzh_fast);
861
862   // create it right now, return ThreadID in R1
863   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
864                                    R1 "ptr");
865   foreign "C" scheduleThread(R1 "ptr");
866
867   // switch at the earliest opportunity
868   CInt[context_switch] = 1 :: CInt;
869   
870   RET_P(R1);
871 }
872
873 yieldzh_fast
874 {
875   jump stg_yield_noregs;
876 }
877
878 myThreadIdzh_fast
879 {
880   /* no args. */
881   RET_P(CurrentTSO);
882 }
883
884 labelThreadzh_fast
885 {
886   /* args: 
887         R1 = ThreadId#
888         R2 = Addr# */
889 #ifdef DEBUG
890   foreign "C" labelThread(R1 "ptr", R2 "ptr");
891 #endif
892   jump %ENTRY_CODE(Sp(0));
893 }
894
895 isCurrentThreadBoundzh_fast
896 {
897   /* no args */
898   W_ r;
899   r = foreign "C" isThreadBound(CurrentTSO);
900   RET_N(r);
901 }
902
903
904 /* -----------------------------------------------------------------------------
905  * TVar primitives
906  * -------------------------------------------------------------------------- */
907
908 #ifdef REG_R1
909 #define SP_OFF 0
910 #define IF_NOT_REG_R1(x) 
911 #else
912 #define SP_OFF 1
913 #define IF_NOT_REG_R1(x) x
914 #endif
915
916 // Catch retry frame ------------------------------------------------------------
917
918
919 #define CATCH_RETRY_FRAME_ENTRY_TEMPLATE(label,ret)                                       \
920    label                                                                                  \
921    {                                                                                      \
922       W_ r, frame, trec, outer;                                                           \
923       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )                                 \
924                                                                                           \
925       frame = Sp;                                                                         \
926       trec = StgTSO_trec(CurrentTSO);                                                     \
927       "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");                          \
928       r = foreign "C" stmCommitTransaction(trec "ptr");                                   \
929       if (r) {                                                                            \
930         /* Succeeded (either first branch or second branch) */                            \
931         StgTSO_trec(CurrentTSO) = outer;                                                  \
932         Sp = Sp + SIZEOF_StgCatchRetryFrame;                                              \
933         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)                                          \
934         jump ret;                                                                         \
935       } else {                                                                            \
936         /* Did not commit: retry */                                                       \
937         W_ new_trec;                                                                      \
938         "ptr" new_trec = foreign "C" stmStartTransaction(outer "ptr");                    \
939         StgTSO_trec(CurrentTSO) = new_trec;                                               \
940         if (StgCatchRetryFrame_running_alt_code(frame)) {                                 \
941           R1 = StgCatchRetryFrame_alt_code(frame);                                        \
942         } else {                                                                          \
943           R1 = StgCatchRetryFrame_first_code(frame);                                      \
944           StgCatchRetryFrame_first_code_trec(frame) = new_trec;                           \
945         }                                                                                 \
946         Sp_adj(-1);                                                                       \
947         jump RET_LBL(stg_ap_v);                                                           \
948       }                                                                                   \
949    }
950
951 CATCH_RETRY_FRAME_ENTRY_TEMPLATE(stg_catch_retry_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
952 CATCH_RETRY_FRAME_ENTRY_TEMPLATE(stg_catch_retry_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
953 CATCH_RETRY_FRAME_ENTRY_TEMPLATE(stg_catch_retry_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
954 CATCH_RETRY_FRAME_ENTRY_TEMPLATE(stg_catch_retry_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
955 CATCH_RETRY_FRAME_ENTRY_TEMPLATE(stg_catch_retry_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
956 CATCH_RETRY_FRAME_ENTRY_TEMPLATE(stg_catch_retry_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
957 CATCH_RETRY_FRAME_ENTRY_TEMPLATE(stg_catch_retry_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
958 CATCH_RETRY_FRAME_ENTRY_TEMPLATE(stg_catch_retry_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
959
960 #if MAX_VECTORED_RTN > 8
961 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
962 #endif
963
964 #if defined(PROFILING)
965 #define CATCH_RETRY_FRAME_BITMAP 7
966 #define CATCH_RETRY_FRAME_WORDS  6
967 #else
968 #define CATCH_RETRY_FRAME_BITMAP 1
969 #define CATCH_RETRY_FRAME_WORDS  4
970 #endif
971
972 INFO_TABLE_RET(stg_catch_retry_frame,
973                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
974                CATCH_RETRY_FRAME,
975                stg_catch_retry_frame_0_ret,
976                stg_catch_retry_frame_1_ret,
977                stg_catch_retry_frame_2_ret,
978                stg_catch_retry_frame_3_ret,
979                stg_catch_retry_frame_4_ret,
980                stg_catch_retry_frame_5_ret,
981                stg_catch_retry_frame_6_ret,
982                stg_catch_retry_frame_7_ret)
983 CATCH_RETRY_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
984
985
986
987 // Atomically frame -------------------------------------------------------------
988
989 #define ATOMICALLY_FRAME_ENTRY_TEMPLATE(label,ret)                                       \
990    label                                                                                 \
991    {                                                                                     \
992       W_ frame, trec, valid;                                                             \
993       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )                                \
994                                                                                          \
995       frame = Sp;                                                                        \
996       trec = StgTSO_trec(CurrentTSO);                                                    \
997       if (StgAtomicallyFrame_waiting(frame)) {                                           \
998         /* The TSO is currently waiting: should we stop waiting? */                      \
999         valid = foreign "C" stmReWait(CurrentTSO "ptr");                                 \
1000         if (valid) {                                                                     \
1001           /* Previous attempt is still valid: no point trying again yet */               \
1002           IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)                                       \
1003           jump stg_block_noregs;                                                         \
1004         } else {                                                                         \
1005           /* Previous attempt is no longer valid: try again */                           \
1006           "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");                   \
1007           StgTSO_trec(CurrentTSO) = trec;                                                \
1008           StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */                    \
1009           R1 = StgAtomicallyFrame_code(frame);                                           \
1010           Sp_adj(-1);                                                                    \
1011           jump RET_LBL(stg_ap_v);                                                        \
1012         }                                                                                \
1013       } else {                                                                           \
1014         /* The TSO is not currently waiting: try to commit the transaction */            \
1015         valid = foreign "C" stmCommitTransaction(trec "ptr");                            \
1016         if (valid) {                                                                     \
1017           /* Transaction was valid: commit succeeded */                                  \
1018           StgTSO_trec(CurrentTSO) = NO_TREC;                                             \
1019           Sp = Sp + SIZEOF_StgAtomicallyFrame;                                           \
1020           IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)                                       \
1021           jump ret;                                                                      \
1022         } else {                                                                         \
1023           /* Transaction was not valid: try again */                                     \
1024           "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");                   \
1025           StgTSO_trec(CurrentTSO) = trec;                                                \
1026           R1 = StgAtomicallyFrame_code(frame);                                           \
1027           Sp_adj(-1);                                                                    \
1028           jump RET_LBL(stg_ap_v);                                                        \
1029         }                                                                                \
1030       }                                                                                  \
1031    }
1032
1033 ATOMICALLY_FRAME_ENTRY_TEMPLATE(stg_atomically_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1034 ATOMICALLY_FRAME_ENTRY_TEMPLATE(stg_atomically_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1035 ATOMICALLY_FRAME_ENTRY_TEMPLATE(stg_atomically_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1036 ATOMICALLY_FRAME_ENTRY_TEMPLATE(stg_atomically_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1037 ATOMICALLY_FRAME_ENTRY_TEMPLATE(stg_atomically_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1038 ATOMICALLY_FRAME_ENTRY_TEMPLATE(stg_atomically_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1039 ATOMICALLY_FRAME_ENTRY_TEMPLATE(stg_atomically_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1040 ATOMICALLY_FRAME_ENTRY_TEMPLATE(stg_atomically_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1041
1042 #if MAX_VECTORED_RTN > 8
1043 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1044 #endif
1045
1046 #if defined(PROFILING)
1047 #define ATOMICALLY_FRAME_BITMAP 7
1048 #define ATOMICALLY_FRAME_WORDS  4
1049 #else
1050 #define ATOMICALLY_FRAME_BITMAP 1
1051 #define ATOMICALLY_FRAME_WORDS  2
1052 #endif
1053
1054 INFO_TABLE_RET(stg_atomically_frame,
1055                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1056                ATOMICALLY_FRAME,
1057                stg_atomically_frame_0_ret,
1058                stg_atomically_frame_1_ret,
1059                stg_atomically_frame_2_ret,
1060                stg_atomically_frame_3_ret,
1061                stg_atomically_frame_4_ret,
1062                stg_atomically_frame_5_ret,
1063                stg_atomically_frame_6_ret,
1064                stg_atomically_frame_7_ret)
1065 ATOMICALLY_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1066
1067
1068 // STM catch frame --------------------------------------------------------------
1069
1070 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1071    label                                                   \
1072    {                                                       \
1073       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1074       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1075       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1076       jump ret;                                            \
1077    }
1078
1079 #ifdef REG_R1
1080 #define SP_OFF 0
1081 #else
1082 #define SP_OFF 1
1083 #endif
1084
1085 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1086 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1087 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1088 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1089 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1090 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1091 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1092 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1093
1094 #if MAX_VECTORED_RTN > 8
1095 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1096 #endif
1097
1098 #if defined(PROFILING)
1099 #define CATCH_STM_FRAME_BITMAP 3
1100 #define CATCH_STM_FRAME_WORDS  3
1101 #else
1102 #define CATCH_STM_FRAME_BITMAP 0
1103 #define CATCH_STM_FRAME_WORDS  1
1104 #endif
1105
1106 /* Catch frames are very similar to update frames, but when entering
1107  * one we just pop the frame off the stack and perform the correct
1108  * kind of return to the activation record underneath us on the stack.
1109  */
1110
1111 INFO_TABLE_RET(stg_catch_stm_frame,
1112                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1113                CATCH_STM_FRAME,
1114                stg_catch_stm_frame_0_ret,
1115                stg_catch_stm_frame_1_ret,
1116                stg_catch_stm_frame_2_ret,
1117                stg_catch_stm_frame_3_ret,
1118                stg_catch_stm_frame_4_ret,
1119                stg_catch_stm_frame_5_ret,
1120                stg_catch_stm_frame_6_ret,
1121                stg_catch_stm_frame_7_ret)
1122 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1123
1124
1125 // Primop definition ------------------------------------------------------------
1126
1127 atomicallyzh_fast
1128 {
1129   W_ frame;
1130   W_ old_trec;
1131   W_ new_trec;
1132   
1133   /* Args: R1 = m :: STM a */
1134   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1135
1136   /* Set up the atomically frame */
1137   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1138   frame = Sp;
1139
1140   SET_HDR(frame,stg_atomically_frame_info,CCCS);
1141   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1142   StgAtomicallyFrame_code(frame) = R1;
1143
1144   /* Start the memory transcation */
1145   old_trec = StgTSO_trec(CurrentTSO);
1146   "ptr" new_trec = foreign "C" stmStartTransaction(old_trec "ptr");
1147   StgTSO_trec(CurrentTSO) = new_trec;
1148
1149   /* Apply R1 to the realworld token */
1150   Sp_adj(-1);
1151   jump RET_LBL(stg_ap_v);
1152 }
1153
1154
1155 catchSTMzh_fast
1156 {
1157   W_ frame;
1158   
1159   /* Args: R1 :: STM a */
1160   /* Args: R2 :: Exception -> STM a */
1161   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1162
1163   /* Set up the catch frame */
1164   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1165   frame = Sp;
1166
1167   SET_HDR(frame, stg_catch_stm_frame_info, CCCS);
1168   StgCatchSTMFrame_handler(frame) = R2;
1169
1170   /* Apply R1 to the realworld token */
1171   Sp_adj(-1);
1172   jump RET_LBL(stg_ap_v);
1173 }
1174
1175
1176 catchRetryzh_fast
1177 {
1178   W_ frame;
1179   W_ new_trec;
1180   W_ trec;
1181
1182   /* Args: R1 :: STM a */
1183   /* Args: R2 :: STM a */
1184   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1185
1186   /* Start a nested transaction within which to run the first code */
1187   trec = StgTSO_trec(CurrentTSO);
1188   "ptr" new_trec = foreign "C" stmStartTransaction(trec "ptr");
1189   StgTSO_trec(CurrentTSO) = new_trec;
1190
1191   /* Set up the catch-retry frame */
1192   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1193   frame = Sp;
1194   
1195   SET_HDR(frame, stg_catch_retry_frame_info, CCCS);
1196   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1197   StgCatchRetryFrame_first_code(frame) = R1;
1198   StgCatchRetryFrame_alt_code(frame) = R2;
1199   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1200
1201   /* Apply R1 to the realworld token */
1202   Sp_adj(-1);
1203   jump RET_LBL(stg_ap_v);  
1204 }
1205
1206
1207 retryzh_fast
1208 {
1209   W_ frame_type;
1210   W_ frame;
1211   W_ trec;
1212   W_ outer;
1213   W_ r;
1214
1215   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1216
1217   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1218 retry_pop_stack:
1219   trec = StgTSO_trec(CurrentTSO);
1220   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1221   StgTSO_sp(CurrentTSO) = Sp;
1222   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1223   Sp = StgTSO_sp(CurrentTSO);
1224   frame = Sp;
1225
1226   if (frame_type == CATCH_RETRY_FRAME) {
1227     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1228     ASSERT(outer != NO_TREC);
1229     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1230       // Retry in the first code: try the alternative
1231       "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1232       StgTSO_trec(CurrentTSO) = trec;
1233       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1234       R1 = StgCatchRetryFrame_alt_code(frame);
1235       Sp_adj(-1);
1236       jump RET_LBL(stg_ap_v);
1237     } else {
1238       // Retry in the alternative code: propagate
1239       W_ other_trec;
1240       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1241       r = foreign "C" stmMergeForWaiting(trec "ptr", other_trec "ptr");
1242       if (r) {
1243         r = foreign "C" stmCommitTransaction(trec "ptr");
1244       }
1245       if (r) {
1246         // Merge between siblings succeeded: commit it back to enclosing transaction
1247         // and then propagate the retry
1248         StgTSO_trec(CurrentTSO) = outer;
1249         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1250         goto retry_pop_stack;
1251       } else {
1252         // Merge failed: we musn't propagate the retry.  Try both paths again.
1253         "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1254         StgCatchRetryFrame_first_code_trec(frame) = trec;
1255         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1256         StgTSO_trec(CurrentTSO) = trec;
1257         R1 = StgCatchRetryFrame_first_code(frame);
1258         Sp_adj(-1);
1259         jump RET_LBL(stg_ap_v);
1260       }
1261     }
1262   }
1263
1264   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1265   ASSERT(frame_type == ATOMICALLY_FRAME);
1266   ASSERT(outer == NO_TREC);
1267   r = foreign "C" stmWait(CurrentTSO "ptr", trec "ptr");
1268   if (r) {
1269     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1270     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1271     Sp = frame;
1272     jump stg_block_noregs;
1273   } else {
1274     // Transaction was not valid: retry immediately
1275     "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1276     StgTSO_trec(CurrentTSO) = trec;
1277     R1 = StgAtomicallyFrame_code(frame);
1278     Sp = frame;
1279     Sp_adj(-1);
1280     jump RET_LBL(stg_ap_v);
1281   }
1282 }
1283
1284
1285 newTVarzh_fast
1286 {
1287   W_ tv;
1288
1289   /* Args: R1 = initialisation value */
1290
1291   ALLOC_PRIM( SIZEOF_StgTVar, R1_PTR, newTVarzh_fast);
1292   tv = Hp - SIZEOF_StgTVar + WDS(1);
1293   SET_HDR(tv,stg_TVAR_info,W_[CCCS]);
1294   StgTVar_current_value(tv) = R1;
1295   StgTVar_first_wait_queue_entry(tv) = stg_END_STM_WAIT_QUEUE_closure;
1296     
1297   RET_P(tv);
1298 }
1299
1300
1301 readTVarzh_fast
1302 {
1303   W_ trec;
1304   W_ tvar;
1305   W_ result;
1306
1307   /* Args: R1 = TVar closure */
1308
1309   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1310   trec = StgTSO_trec(CurrentTSO);
1311   tvar = R1;
1312   "ptr" result = foreign "C" stmReadTVar(trec "ptr", tvar "ptr");
1313
1314   RET_P(result);
1315 }
1316
1317
1318 writeTVarzh_fast
1319 {
1320   W_ trec;
1321   W_ tvar;
1322   W_ new_value;
1323   
1324   /* Args: R1 = TVar closure */
1325   /*       R2 = New value    */
1326
1327   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1328   trec = StgTSO_trec(CurrentTSO);
1329   tvar = R1;
1330   new_value = R2;
1331   foreign "C" stmWriteTVar(trec "ptr", tvar "ptr", new_value "ptr");
1332
1333   jump %ENTRY_CODE(Sp(0));
1334 }
1335
1336
1337 /* -----------------------------------------------------------------------------
1338  * MVar primitives
1339  *
1340  * take & putMVar work as follows.  Firstly, an important invariant:
1341  *
1342  *    If the MVar is full, then the blocking queue contains only
1343  *    threads blocked on putMVar, and if the MVar is empty then the
1344  *    blocking queue contains only threads blocked on takeMVar.
1345  *
1346  * takeMvar:
1347  *    MVar empty : then add ourselves to the blocking queue
1348  *    MVar full  : remove the value from the MVar, and
1349  *                 blocking queue empty     : return
1350  *                 blocking queue non-empty : perform the first blocked putMVar
1351  *                                            from the queue, and wake up the
1352  *                                            thread (MVar is now full again)
1353  *
1354  * putMVar is just the dual of the above algorithm.
1355  *
1356  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1357  * the stack of the thread waiting to do the putMVar.  See
1358  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1359  * the stack layout, and the PerformPut and PerformTake macros below.
1360  *
1361  * It is important that a blocked take or put is woken up with the
1362  * take/put already performed, because otherwise there would be a
1363  * small window of vulnerability where the thread could receive an
1364  * exception and never perform its take or put, and we'd end up with a
1365  * deadlock.
1366  *
1367  * -------------------------------------------------------------------------- */
1368
1369 isEmptyMVarzh_fast
1370 {
1371     /* args: R1 = MVar closure */
1372
1373     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1374         RET_N(1);
1375     } else {
1376         RET_N(0);
1377     }
1378 }
1379
1380 newMVarzh_fast
1381 {
1382     /* args: none */
1383     W_ mvar;
1384
1385     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1386   
1387     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1388     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1389     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1390     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1391     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1392     RET_P(mvar);
1393 }
1394
1395
1396 /* If R1 isn't available, pass it on the stack */
1397 #ifdef REG_R1
1398 #define PerformTake(tso, value)                         \
1399     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1400     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1401 #else
1402 #define PerformTake(tso, value)                                 \
1403     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1404     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1405 #endif
1406
1407 #define PerformPut(tso,lval)                    \
1408     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1409     lval = W_[StgTSO_sp(tso) - WDS(1)];
1410
1411
1412 takeMVarzh_fast
1413 {
1414     W_ mvar, val, info, tso;
1415
1416     /* args: R1 = MVar closure */
1417     mvar = R1;
1418
1419     info = GET_INFO(mvar);
1420
1421     /* If the MVar is empty, put ourselves on its blocking queue,
1422      * and wait until we're woken up.
1423      */
1424     if (info == stg_EMPTY_MVAR_info) {
1425         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1426             StgMVar_head(mvar) = CurrentTSO;
1427         } else {
1428             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1429         }
1430         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1431         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1432         StgTSO_block_info(CurrentTSO)  = mvar;
1433         StgMVar_tail(mvar) = CurrentTSO;
1434         
1435         jump stg_block_takemvar;
1436   }
1437
1438   /* we got the value... */
1439   val = StgMVar_value(mvar);
1440
1441   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1442   {
1443       /* There are putMVar(s) waiting... 
1444        * wake up the first thread on the queue
1445        */
1446       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1447
1448       /* actually perform the putMVar for the thread that we just woke up */
1449       tso = StgMVar_head(mvar);
1450       PerformPut(tso,StgMVar_value(mvar));
1451
1452 #if defined(GRAN) || defined(PAR)
1453       /* ToDo: check 2nd arg (mvar) is right */
1454       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar);
1455       StgMVar_head(mvar) = tso;
1456 #else
1457       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1458       StgMVar_head(mvar) = tso;
1459 #endif
1460       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1461           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1462       }
1463       RET_P(val);
1464   } 
1465   else
1466   {
1467       /* No further putMVars, MVar is now empty */
1468       
1469       /* do this last... we might have locked the MVar in the SMP case,
1470        * and writing the info pointer will unlock it.
1471        */
1472       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1473       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1474       RET_P(val);
1475   }
1476 }
1477
1478
1479 tryTakeMVarzh_fast
1480 {
1481     W_ mvar, val, info, tso;
1482
1483     /* args: R1 = MVar closure */
1484
1485     mvar = R1;
1486
1487     info = GET_INFO(mvar);
1488
1489     if (info == stg_EMPTY_MVAR_info) {
1490         /* HACK: we need a pointer to pass back, 
1491          * so we abuse NO_FINALIZER_closure
1492          */
1493         RET_NP(0, stg_NO_FINALIZER_closure);
1494     }
1495
1496     /* we got the value... */
1497     val = StgMVar_value(mvar);
1498
1499     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1500         /* There are putMVar(s) waiting... 
1501          * wake up the first thread on the queue
1502          */
1503         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1504
1505         /* actually perform the putMVar for the thread that we just woke up */
1506         tso = StgMVar_head(mvar);
1507         PerformPut(tso,StgMVar_value(mvar));
1508
1509 #if defined(GRAN) || defined(PAR)
1510         /* ToDo: check 2nd arg (mvar) is right */
1511         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr");
1512         StgMVar_head(mvar) = tso;
1513 #else
1514         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1515         StgMVar_head(mvar) = tso;
1516 #endif
1517
1518         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1519             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1520         }
1521     }
1522     else 
1523     {
1524         /* No further putMVars, MVar is now empty */
1525         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1526         
1527         /* do this last... we might have locked the MVar in the SMP case,
1528          * and writing the info pointer will unlock it.
1529          */
1530         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1531     }
1532     
1533     RET_NP(1, val);
1534 }
1535
1536
1537 putMVarzh_fast
1538 {
1539     W_ mvar, info, tso;
1540
1541     /* args: R1 = MVar, R2 = value */
1542     mvar = R1;
1543
1544     info = GET_INFO(mvar);
1545
1546     if (info == stg_FULL_MVAR_info) {
1547         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1548             StgMVar_head(mvar) = CurrentTSO;
1549         } else {
1550             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1551         }
1552         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1553         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1554         StgTSO_block_info(CurrentTSO)  = mvar;
1555         StgMVar_tail(mvar) = CurrentTSO;
1556         
1557         jump stg_block_putmvar;
1558     }
1559   
1560     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1561         /* There are takeMVar(s) waiting: wake up the first one
1562          */
1563         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1564
1565         /* actually perform the takeMVar */
1566         tso = StgMVar_head(mvar);
1567         PerformTake(tso, R2);
1568       
1569 #if defined(GRAN) || defined(PAR)
1570         /* ToDo: check 2nd arg (mvar) is right */
1571         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1572         StgMVar_head(mvar) = tso;
1573 #else
1574         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1575         StgMVar_head(mvar) = tso;
1576 #endif
1577
1578         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1579             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1580         }
1581
1582         jump %ENTRY_CODE(Sp(0));
1583     }
1584     else
1585     {
1586         /* No further takes, the MVar is now full. */
1587         StgMVar_value(mvar) = R2;
1588         /* unlocks the MVar in the SMP case */
1589         SET_INFO(mvar,stg_FULL_MVAR_info);
1590         jump %ENTRY_CODE(Sp(0));
1591     }
1592     
1593     /* ToDo: yield afterward for better communication performance? */
1594 }
1595
1596
1597 tryPutMVarzh_fast
1598 {
1599     W_ mvar, info, tso;
1600
1601     /* args: R1 = MVar, R2 = value */
1602     mvar = R1;
1603
1604     info = GET_INFO(mvar);
1605
1606     if (info == stg_FULL_MVAR_info) {
1607         RET_N(0);
1608     }
1609   
1610     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1611         /* There are takeMVar(s) waiting: wake up the first one
1612          */
1613         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1614         
1615         /* actually perform the takeMVar */
1616         tso = StgMVar_head(mvar);
1617         PerformTake(tso, R2);
1618       
1619 #if defined(GRAN) || defined(PAR)
1620         /* ToDo: check 2nd arg (mvar) is right */
1621         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1622         StgMVar_head(mvar) = tso;
1623 #else
1624         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1625         StgMVar_head(mvar) = tso;
1626 #endif
1627
1628         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1629             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1630         }
1631
1632         jump %ENTRY_CODE(Sp(0));
1633     }
1634     else
1635     {
1636         /* No further takes, the MVar is now full. */
1637         StgMVar_value(mvar) = R2;
1638         /* unlocks the MVar in the SMP case */
1639         SET_INFO(mvar,stg_FULL_MVAR_info);
1640         jump %ENTRY_CODE(Sp(0));
1641     }
1642     
1643     /* ToDo: yield afterward for better communication performance? */
1644 }
1645
1646
1647 /* -----------------------------------------------------------------------------
1648    Stable pointer primitives
1649    -------------------------------------------------------------------------  */
1650
1651 makeStableNamezh_fast
1652 {
1653     W_ index, sn_obj;
1654
1655     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1656   
1657     index = foreign "C" lookupStableName(R1 "ptr");
1658
1659     /* Is there already a StableName for this heap object?
1660      *  stable_ptr_table is an array of snEntry structs.
1661      */
1662     if ( snEntry_sn_obj(stable_ptr_table + index*SIZEOF_snEntry) == NULL ) {
1663         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1664         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1665         StgStableName_sn(sn_obj) = index;
1666         snEntry_sn_obj(stable_ptr_table + index*SIZEOF_snEntry) = sn_obj;
1667     } else {
1668         sn_obj = snEntry_sn_obj(stable_ptr_table + index*SIZEOF_snEntry);
1669     }
1670     
1671     RET_P(sn_obj);
1672 }
1673
1674
1675 makeStablePtrzh_fast
1676 {
1677     /* Args: R1 = a */
1678     W_ sp;
1679     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1680     "ptr" sp = foreign "C" getStablePtr(R1 "ptr");
1681     RET_N(sp);
1682 }
1683
1684 deRefStablePtrzh_fast
1685 {
1686     /* Args: R1 = the stable ptr */
1687     W_ r, sp;
1688     sp = R1;
1689     r = snEntry_addr(stable_ptr_table + sp*SIZEOF_snEntry);
1690     RET_P(r);
1691 }
1692
1693 /* -----------------------------------------------------------------------------
1694    Bytecode object primitives
1695    -------------------------------------------------------------------------  */
1696
1697 newBCOzh_fast
1698 {
1699     /* R1 = instrs
1700        R2 = literals
1701        R3 = ptrs
1702        R4 = itbls
1703        R5 = arity
1704        R6 = bitmap array
1705     */
1706     W_ bco, bitmap_arr, bytes, words;
1707     
1708     bitmap_arr = R6;
1709     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1710     bytes = WDS(words);
1711
1712     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1713
1714     bco = Hp - bytes + WDS(1);
1715     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1716     
1717     StgBCO_instrs(bco)     = R1;
1718     StgBCO_literals(bco)   = R2;
1719     StgBCO_ptrs(bco)       = R3;
1720     StgBCO_itbls(bco)      = R4;
1721     StgBCO_arity(bco)      = HALF_W_(R5);
1722     StgBCO_size(bco)       = HALF_W_(words);
1723     
1724     // Copy the arity/bitmap info into the BCO
1725     W_ i;
1726     i = 0;
1727 for:
1728     if (i < StgArrWords_words(bitmap_arr)) {
1729         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1730         i = i + 1;
1731         goto for;
1732     }
1733     
1734     RET_P(bco);
1735 }
1736
1737
1738 mkApUpd0zh_fast
1739 {
1740     // R1 = the BCO# for the AP
1741     //  
1742     W_ ap;
1743
1744     // This function is *only* used to wrap zero-arity BCOs in an
1745     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1746     // saturated and always points directly to a FUN or BCO.
1747     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1748            StgBCO_arity(R1) == HALF_W_(0));
1749
1750     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1751     TICK_ALLOC_UP_THK(0, 0);
1752     CCCS_ALLOC(SIZEOF_StgAP);
1753
1754     ap = Hp - SIZEOF_StgAP + WDS(1);
1755     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1756     
1757     StgAP_n_args(ap) = HALF_W_(0);
1758     StgAP_fun(ap) = R1;
1759     
1760     RET_P(ap);
1761 }
1762
1763 /* -----------------------------------------------------------------------------
1764    Thread I/O blocking primitives
1765    -------------------------------------------------------------------------- */
1766
1767 /* Add a thread to the end of the blocked queue. (C-- version of the C
1768  * macro in Schedule.h).
1769  */
1770 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1771     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1772     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1773       W_[blocked_queue_hd] = tso;                       \
1774     } else {                                            \
1775       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1776     }                                                   \
1777     W_[blocked_queue_tl] = tso;
1778
1779 waitReadzh_fast
1780 {
1781     /* args: R1 */
1782 #ifdef THREADED_RTS
1783     foreign "C" barf("waitRead# on threaded RTS");
1784 #endif
1785
1786     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1787     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1788     StgTSO_block_info(CurrentTSO) = R1;
1789     // No locking - we're not going to use this interface in the
1790     // threaded RTS anyway.
1791     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1792     jump stg_block_noregs;
1793 }
1794
1795 waitWritezh_fast
1796 {
1797     /* args: R1 */
1798 #ifdef THREADED_RTS
1799     foreign "C" barf("waitWrite# on threaded RTS");
1800 #endif
1801
1802     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1803     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1804     StgTSO_block_info(CurrentTSO) = R1;
1805     // No locking - we're not going to use this interface in the
1806     // threaded RTS anyway.
1807     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1808     jump stg_block_noregs;
1809 }
1810
1811
1812 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1813 delayzh_fast
1814 {
1815 #ifdef mingw32_TARGET_OS
1816     W_ ares;
1817     CInt reqID;
1818 #else
1819     W_ t, prev, target;
1820 #endif
1821
1822 #ifdef THREADED_RTS
1823     foreign "C" barf("delay# on threaded RTS");
1824 #endif
1825
1826     /* args: R1 (microsecond delay amount) */
1827     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1828     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1829
1830 #ifdef mingw32_TARGET_OS
1831
1832     /* could probably allocate this on the heap instead */
1833     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1834                                             stg_delayzh_malloc_str);
1835     reqID = foreign "C" addDelayRequest(R1);
1836     StgAsyncIOResult_reqID(ares)   = reqID;
1837     StgAsyncIOResult_len(ares)     = 0;
1838     StgAsyncIOResult_errCode(ares) = 0;
1839     StgTSO_block_info(CurrentTSO)  = ares;
1840
1841     /* Having all async-blocked threads reside on the blocked_queue
1842      * simplifies matters, so change the status to OnDoProc put the
1843      * delayed thread on the blocked_queue.
1844      */
1845     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1846     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1847     jump stg_block_async_void;
1848
1849 #else
1850
1851     W_ time;
1852     time = foreign "C" getourtimeofday();
1853     target = (R1 / (TICK_MILLISECS*1000)) + time;
1854     StgTSO_block_info(CurrentTSO) = target;
1855
1856     /* Insert the new thread in the sleeping queue. */
1857     prev = NULL;
1858     t = W_[sleeping_queue];
1859 while:
1860     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1861         prev = t;
1862         t = StgTSO_link(t);
1863         goto while;
1864     }
1865
1866     StgTSO_link(CurrentTSO) = t;
1867     if (prev == NULL) {
1868         W_[sleeping_queue] = CurrentTSO;
1869     } else {
1870         StgTSO_link(prev) = CurrentTSO;
1871     }
1872     jump stg_block_noregs;
1873 #endif
1874 }
1875
1876
1877 #ifdef mingw32_TARGET_OS
1878 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1879 asyncReadzh_fast
1880 {
1881     W_ ares;
1882     CInt reqID;
1883
1884 #ifdef THREADED_RTS
1885     foreign "C" barf("asyncRead# on threaded RTS");
1886 #endif
1887
1888     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1889     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1890     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1891
1892     /* could probably allocate this on the heap instead */
1893     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1894                                             stg_asyncReadzh_malloc_str);
1895     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1896     StgAsyncIOResult_reqID(ares)   = reqID;
1897     StgAsyncIOResult_len(ares)     = 0;
1898     StgAsyncIOResult_errCode(ares) = 0;
1899     StgTSO_block_info(CurrentTSO)  = ares;
1900     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1901     jump stg_block_async;
1902 }
1903
1904 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1905 asyncWritezh_fast
1906 {
1907     W_ ares;
1908     CInt reqID;
1909
1910 #ifdef THREADED_RTS
1911     foreign "C" barf("asyncWrite# on threaded RTS");
1912 #endif
1913
1914     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1915     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1916     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1917
1918     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1919                                             stg_asyncWritezh_malloc_str);
1920     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
1921
1922     StgAsyncIOResult_reqID(ares)   = reqID;
1923     StgAsyncIOResult_len(ares)     = 0;
1924     StgAsyncIOResult_errCode(ares) = 0;
1925     StgTSO_block_info(CurrentTSO)  = ares;
1926     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1927     jump stg_block_async;
1928 }
1929
1930 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
1931 asyncDoProczh_fast
1932 {
1933     W_ ares;
1934     CInt reqID;
1935
1936     /* args: R1 = proc, R2 = param */
1937     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1938     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1939
1940     /* could probably allocate this on the heap instead */
1941     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1942                                             stg_asyncDoProczh_malloc_str);
1943     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
1944     StgAsyncIOResult_reqID(ares)   = reqID;
1945     StgAsyncIOResult_len(ares)     = 0;
1946     StgAsyncIOResult_errCode(ares) = 0;
1947     StgTSO_block_info(CurrentTSO) = ares;
1948     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1949     jump stg_block_async;
1950 }
1951 #endif
1952
1953 /* -----------------------------------------------------------------------------
1954   ** temporary **
1955
1956    classes CCallable and CReturnable don't really exist, but the
1957    compiler insists on generating dictionaries containing references
1958    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
1959    for these.  Some C compilers can't cope with zero-length static arrays,
1960    so we have to make these one element long.
1961   --------------------------------------------------------------------------- */
1962
1963 section "rodata" {
1964   GHC_ZCCCallable_static_info:   W_ 0;
1965 }
1966
1967 section "rodata" {
1968   GHC_ZCCReturnable_static_info: W_ 0;
1969 }