[project @ 2005-05-19 13:46:24 by simonmar]
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(BaseReg "ptr",words);
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words);
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(BaseReg "ptr",words);
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 to indicate
130   // that it is still on the mutable list.
131
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the mut_link field is NULL if it isn't on a mut_list, and the GC
135   // maintains this invariant.
136   //
137   if (%INFO_TYPE(%GET_STD_INFO(R1)) != HALF_W_(MUT_ARR_PTRS_FROZEN0)) {
138         foreign "C" recordMutableLock(R1 "ptr");
139   }
140
141   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
142
143   RET_P(R1);
144 }
145
146 /* -----------------------------------------------------------------------------
147    MutVar primitives
148    -------------------------------------------------------------------------- */
149
150 newMutVarzh_fast
151 {
152     W_ mv;
153     /* Args: R1 = initialisation value */
154
155     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
156
157     mv = Hp - SIZEOF_StgMutVar + WDS(1);
158     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
159     StgMutVar_var(mv) = R1;
160     
161     RET_P(mv);
162 }
163
164 atomicModifyMutVarzh_fast
165 {
166     W_ mv, z, x, y, r;
167     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
168
169     /* If x is the current contents of the MutVar#, then 
170        We want to make the new contents point to
171
172          (sel_0 (f x))
173  
174        and the return value is
175          
176          (sel_1 (f x))
177
178         obviously we can share (f x).
179
180          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
181          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
182          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
183     */
184
185 #if MIN_UPD_SIZE > 1
186 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
187 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
188 #else
189 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
190 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
191 #endif
192
193 #if MIN_UPD_SIZE > 2
194 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
195 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
196 #else
197 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
198 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
199 #endif
200
201 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
202
203    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
204
205 #if defined(SMP)
206     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
207 #endif
208
209    x = StgMutVar_var(R1);
210
211    TICK_ALLOC_THUNK_2();
212    CCCS_ALLOC(THUNK_2_SIZE);
213    z = Hp - THUNK_2_SIZE + WDS(1);
214    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(z);
216    StgThunk_payload(z,0) = R2;
217    StgThunk_payload(z,1) = x;
218
219    TICK_ALLOC_THUNK_1();
220    CCCS_ALLOC(THUNK_1_SIZE);
221    y = z - THUNK_1_SIZE;
222    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
223    LDV_RECORD_CREATE(y);
224    StgThunk_payload(y,0) = z;
225
226    StgMutVar_var(R1) = y;
227
228    TICK_ALLOC_THUNK_1();
229    CCCS_ALLOC(THUNK_1_SIZE);
230    r = y - THUNK_1_SIZE;
231    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
232    LDV_RECORD_CREATE(r);
233    StgThunk_payload(r,0) = z;
234
235 #if defined(SMP)
236     foreign "C" RELEASE_LOCK(sm_mutex "ptr");
237 #endif
238
239    RET_P(r);
240 }
241
242 /* -----------------------------------------------------------------------------
243    Foreign Object Primitives
244    -------------------------------------------------------------------------- */
245
246 mkForeignObjzh_fast
247 {
248   /* R1 = ptr to foreign object,
249   */
250   W_ result;
251
252   ALLOC_PRIM( SIZEOF_StgForeignObj, NO_PTRS, mkForeignObjzh_fast);
253
254   result = Hp - SIZEOF_StgForeignObj + WDS(1);
255   SET_HDR(result,stg_FOREIGN_info,W_[CCCS]);
256   StgForeignObj_data(result) = R1;
257
258   /* returns (# s#, ForeignObj# #) */
259   RET_P(result);
260 }
261
262 /* -----------------------------------------------------------------------------
263    Weak Pointer Primitives
264    -------------------------------------------------------------------------- */
265
266 STRING(stg_weak_msg,"New weak pointer at %p\n")
267
268 mkWeakzh_fast
269 {
270   /* R1 = key
271      R2 = value
272      R3 = finalizer (or NULL)
273   */
274   W_ w;
275
276   if (R3 == NULL) {
277     R3 = stg_NO_FINALIZER_closure;
278   }
279
280   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
281
282   w = Hp - SIZEOF_StgWeak + WDS(1);
283   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
284
285   StgWeak_key(w)       = R1;
286   StgWeak_value(w)     = R2;
287   StgWeak_finalizer(w) = R3;
288
289   StgWeak_link(w)       = W_[weak_ptr_list];
290   W_[weak_ptr_list]     = w;
291
292   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w));
293
294   RET_P(w);
295 }
296
297
298 finalizzeWeakzh_fast
299 {
300   /* R1 = weak ptr
301    */
302   W_ w, f;
303
304   w = R1;
305
306   // already dead?
307   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
308       RET_NP(0,stg_NO_FINALIZER_closure);
309   }
310
311   // kill it
312 #ifdef PROFILING
313   // @LDV profiling
314   // A weak pointer is inherently used, so we do not need to call
315   // LDV_recordDead_FILL_SLOP_DYNAMIC():
316   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
317   // or, LDV_recordDead():
318   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
319   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
320   // large as weak pointers, so there is no need to fill the slop, either.
321   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
322 #endif
323
324   //
325   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
326   //
327   SET_INFO(w,stg_DEAD_WEAK_info);
328   LDV_RECORD_CREATE(w);
329
330   f = StgWeak_finalizer(w);
331   StgDeadWeak_link(w) = StgWeak_link(w);
332
333   /* return the finalizer */
334   if (f == stg_NO_FINALIZER_closure) {
335       RET_NP(0,stg_NO_FINALIZER_closure);
336   } else {
337       RET_NP(1,f);
338   }
339 }
340
341 deRefWeakzh_fast
342 {
343   /* R1 = weak ptr */
344   W_ w, code, val;
345
346   w = R1;
347   if (GET_INFO(w) == stg_WEAK_info) {
348     code = 1;
349     val = StgWeak_value(w);
350   } else {
351     code = 0;
352     val = w;
353   }
354   RET_NP(code,val);
355 }
356
357 /* -----------------------------------------------------------------------------
358    Arbitrary-precision Integer operations.
359
360    There are some assumptions in this code that mp_limb_t == W_.  This is
361    the case for all the platforms that GHC supports, currently.
362    -------------------------------------------------------------------------- */
363
364 int2Integerzh_fast
365 {
366    /* arguments: R1 = Int# */
367
368    W_ val, s, p;        /* to avoid aliasing */
369
370    val = R1;
371    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
372
373    p = Hp - SIZEOF_StgArrWords;
374    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
375    StgArrWords_words(p) = 1;
376
377    /* mpz_set_si is inlined here, makes things simpler */
378    if (%lt(val,0)) { 
379         s  = -1;
380         Hp(0) = -val;
381    } else { 
382      if (%gt(val,0)) {
383         s = 1;
384         Hp(0) = val;
385      } else {
386         s = 0;
387      }
388   }
389
390    /* returns (# size  :: Int#, 
391                  data  :: ByteArray# 
392                #)
393    */
394    RET_NP(s,p);
395 }
396
397 word2Integerzh_fast
398 {
399    /* arguments: R1 = Word# */
400
401    W_ val, s, p;        /* to avoid aliasing */
402
403    val = R1;
404
405    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
406
407    p = Hp - SIZEOF_StgArrWords;
408    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
409    StgArrWords_words(p) = 1;
410
411    if (val != 0) {
412         s = 1;
413         W_[Hp] = val;
414    } else {
415         s = 0;
416    }
417
418    /* returns (# size  :: Int#, 
419                  data  :: ByteArray# #)
420    */
421    RET_NP(s,p);
422 }
423
424
425 /*
426  * 'long long' primops for converting to/from Integers.
427  */
428
429 #ifdef SUPPORT_LONG_LONGS
430
431 int64ToIntegerzh_fast
432 {
433    /* arguments: L1 = Int64# */
434
435    L_ val;
436    W_ hi, s, neg, words_needed, p;
437
438    val = L1;
439    neg = 0;
440
441    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
442        words_needed = 2;
443    } else { 
444        // minimum is one word
445        words_needed = 1;
446    }
447
448    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
449                NO_PTRS, int64ToIntegerzh_fast );
450
451    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
452    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
453    StgArrWords_words(p) = words_needed;
454
455    if ( %lt(val,0::L_) ) {
456      neg = 1;
457      val = -val;
458    }
459
460    hi = TO_W_(val >> 32);
461
462    if ( words_needed == 2 )  { 
463       s = 2;
464       Hp(-1) = TO_W_(val);
465       Hp(0) = hi;
466    } else { 
467        if ( val != 0::L_ ) {
468            s = 1;
469            Hp(0) = TO_W_(val);
470        } else /* val==0 */  {
471            s = 0;
472        }
473    }
474    if ( neg != 0 ) {
475         s = -s;
476    }
477
478    /* returns (# size  :: Int#, 
479                  data  :: ByteArray# #)
480    */
481    RET_NP(s,p);
482 }
483
484 word64ToIntegerzh_fast
485 {
486    /* arguments: L1 = Word64# */
487
488    L_ val;
489    W_ hi, s, words_needed, p;
490
491    val = L1;
492    if ( val >= 0x100000000::L_ ) {
493       words_needed = 2;
494    } else {
495       words_needed = 1;
496    }
497
498    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
499                NO_PTRS, word64ToIntegerzh_fast );
500
501    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
502    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
503    StgArrWords_words(p) = words_needed;
504
505    hi = TO_W_(val >> 32);
506    if ( val >= 0x100000000::L_ ) { 
507      s = 2;
508      Hp(-1) = TO_W_(val);
509      Hp(0)  = hi;
510    } else {
511       if ( val != 0::L_ ) {
512         s = 1;
513         Hp(0) = TO_W_(val);
514      } else /* val==0 */  {
515       s = 0;
516      }
517   }
518
519    /* returns (# size  :: Int#, 
520                  data  :: ByteArray# #)
521    */
522    RET_NP(s,p);
523 }
524
525
526 #endif /* SUPPORT_LONG_LONGS */
527
528 /* ToDo: this is shockingly inefficient */
529
530 section "bss" {
531   mp_tmp1:
532     bits8 [SIZEOF_MP_INT];
533 }
534
535 section "bss" {
536   mp_tmp2:
537     bits8 [SIZEOF_MP_INT];
538 }
539
540 section "bss" {
541   result1:
542     bits8 [SIZEOF_MP_INT];
543 }
544
545 section "bss" {
546   result2:
547     bits8 [SIZEOF_MP_INT];
548 }
549
550 #define GMP_TAKE2_RET1(name,mp_fun)                     \
551 name                                                    \
552 {                                                       \
553   CInt s1, s2;                                          \
554   W_ d1, d2;                                            \
555                                                         \
556   /* call doYouWantToGC() */                            \
557   MAYBE_GC(R2_PTR & R4_PTR, name);                      \
558                                                         \
559   s1 = W_TO_INT(R1);                                    \
560   d1 = R2;                                              \
561   s2 = W_TO_INT(R3);                                    \
562   d2 = R4;                                              \
563                                                         \
564   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));  \
565   MP_INT__mp_size(mp_tmp1)  = (s1);                     \
566   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);         \
567   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));  \
568   MP_INT__mp_size(mp_tmp2)  = (s2);                     \
569   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);         \
570                                                         \
571   foreign "C" mpz_init(result1);                        \
572                                                         \
573   /* Perform the operation */                           \
574   foreign "C" mp_fun(result1,mp_tmp1,mp_tmp2);          \
575                                                         \
576   RET_NP(TO_W_(MP_INT__mp_size(result1)),               \
577          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);   \
578 }
579
580 #define GMP_TAKE1_RET1(name,mp_fun)                             \
581 name                                                            \
582 {                                                               \
583   CInt s1;                                                      \
584   W_ d1;                                                        \
585                                                                 \
586   /* call doYouWantToGC() */                                    \
587   MAYBE_GC(R2_PTR, name);                                       \
588                                                                 \
589   d1 = R2;                                                      \
590   s1 = W_TO_INT(R1);                                            \
591                                                                 \
592   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
593   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
594   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
595                                                                 \
596   foreign "C" mpz_init(result1);                                \
597                                                                 \
598   /* Perform the operation */                                   \
599   foreign "C" mp_fun(result1,mp_tmp1);                          \
600                                                                 \
601   RET_NP(TO_W_(MP_INT__mp_size(result1)),                       \
602          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);           \
603 }
604
605 #define GMP_TAKE2_RET2(name,mp_fun)                             \
606 name                                                            \
607 {                                                               \
608   CInt s1, s2;                                                  \
609   W_ d1, d2;                                                    \
610                                                                 \
611   /* call doYouWantToGC() */                                    \
612   MAYBE_GC(R2_PTR & R4_PTR, name);                              \
613                                                                 \
614   s1 = W_TO_INT(R1);                                            \
615   d1 = R2;                                                      \
616   s2 = W_TO_INT(R3);                                            \
617   d2 = R4;                                                      \
618                                                                 \
619   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
620   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
621   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
622   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));      \
623   MP_INT__mp_size(mp_tmp2)      = (s2);                         \
624   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);             \
625                                                                 \
626   foreign "C" mpz_init(result1);                                \
627   foreign "C" mpz_init(result2);                                \
628                                                                 \
629   /* Perform the operation */                                   \
630   foreign "C" mp_fun(result1,result2,mp_tmp1,mp_tmp2);          \
631                                                                 \
632   RET_NPNP(TO_W_(MP_INT__mp_size(result1)),                     \
633            MP_INT__mp_d(result1) - SIZEOF_StgArrWords,          \
634            TO_W_(MP_INT__mp_size(result2)),                     \
635            MP_INT__mp_d(result2) - SIZEOF_StgArrWords);         \
636 }
637
638 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
639 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
640 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
641 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
642 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
643 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
644 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
645 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
646 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
647 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
648 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
649
650 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
651 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
652
653 section "bss" {
654   aa:  W_; // NB. aa is really an mp_limb_t
655 }
656
657 gcdIntzh_fast
658 {
659     /* R1 = the first Int#; R2 = the second Int# */
660     W_ r; 
661
662     W_[aa] = R1;
663     r = foreign "C" mpn_gcd_1(aa, 1, R2);
664
665     R1 = r;
666     /* Result parked in R1, return via info-pointer at TOS */
667     jump %ENTRY_CODE(Sp(0));
668 }
669
670
671 gcdIntegerIntzh_fast
672 {
673     /* R1 = s1; R2 = d1; R3 = the int */
674     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3);
675     
676     /* Result parked in R1, return via info-pointer at TOS */
677     jump %ENTRY_CODE(Sp(0));
678 }
679
680
681 cmpIntegerIntzh_fast
682 {
683     /* R1 = s1; R2 = d1; R3 = the int */
684     W_ usize, vsize, v_digit, u_digit;
685
686     usize = R1;
687     vsize = 0;
688     v_digit = R3;
689
690     // paraphrased from mpz_cmp_si() in the GMP sources
691     if (%gt(v_digit,0)) {
692         vsize = 1;
693     } else { 
694         if (%lt(v_digit,0)) {
695             vsize = -1;
696             v_digit = -v_digit;
697         }
698     }
699
700     if (usize != vsize) {
701         R1 = usize - vsize; 
702         jump %ENTRY_CODE(Sp(0));
703     }
704
705     if (usize == 0) {
706         R1 = 0; 
707         jump %ENTRY_CODE(Sp(0));
708     }
709
710     u_digit = W_[BYTE_ARR_CTS(R2)];
711
712     if (u_digit == v_digit) {
713         R1 = 0; 
714         jump %ENTRY_CODE(Sp(0));
715     }
716
717     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
718         R1 = usize; 
719     } else {
720         R1 = -usize; 
721     }
722
723     jump %ENTRY_CODE(Sp(0));
724 }
725
726 cmpIntegerzh_fast
727 {
728     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
729     W_ usize, vsize, size, up, vp;
730     CInt cmp;
731
732     // paraphrased from mpz_cmp() in the GMP sources
733     usize = R1;
734     vsize = R3;
735
736     if (usize != vsize) {
737         R1 = usize - vsize; 
738         jump %ENTRY_CODE(Sp(0));
739     }
740
741     if (usize == 0) {
742         R1 = 0; 
743         jump %ENTRY_CODE(Sp(0));
744     }
745
746     if (%lt(usize,0)) { // NB. not <, which is unsigned
747         size = -usize;
748     } else {
749         size = usize;
750     }
751
752     up = BYTE_ARR_CTS(R2);
753     vp = BYTE_ARR_CTS(R4);
754
755     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size);
756
757     if (cmp == 0 :: CInt) {
758         R1 = 0; 
759         jump %ENTRY_CODE(Sp(0));
760     }
761
762     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
763         R1 = 1;
764     } else {
765         R1 = (-1); 
766     }
767     /* Result parked in R1, return via info-pointer at TOS */
768     jump %ENTRY_CODE(Sp(0));
769 }
770
771 integer2Intzh_fast
772 {
773     /* R1 = s; R2 = d */
774     W_ r, s;
775
776     s = R1;
777     if (s == 0) {
778         r = 0;
779     } else {
780         r = W_[R2 + SIZEOF_StgArrWords];
781         if (%lt(s,0)) {
782             r = -r;
783         }
784     }
785     /* Result parked in R1, return via info-pointer at TOS */
786     R1 = r;
787     jump %ENTRY_CODE(Sp(0));
788 }
789
790 integer2Wordzh_fast
791 {
792   /* R1 = s; R2 = d */
793   W_ r, s;
794
795   s = R1;
796   if (s == 0) {
797     r = 0;
798   } else {
799     r = W_[R2 + SIZEOF_StgArrWords];
800     if (%lt(s,0)) {
801         r = -r;
802     }
803   }
804   /* Result parked in R1, return via info-pointer at TOS */
805   R1 = r;
806   jump %ENTRY_CODE(Sp(0));
807 }
808
809 section "bss" {
810   exponent:  W_;
811 }
812
813 decodeFloatzh_fast
814
815     W_ p;
816     F_ arg;
817     
818     /* arguments: F1 = Float# */
819     arg = F1;
820     
821     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
822     
823     /* Be prepared to tell Lennart-coded __decodeFloat
824        where mantissa._mp_d can be put (it does not care about the rest) */
825     p = Hp - SIZEOF_StgArrWords;
826     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
827     StgArrWords_words(p) = 1;
828     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
829     
830     /* Perform the operation */
831     foreign "C" __decodeFloat(mp_tmp1,exponent,arg);
832     
833     /* returns: (Int# (expn), Int#, ByteArray#) */
834     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
835 }
836
837 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
838 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
839
840 decodeDoublezh_fast
841
842     D_ arg;
843     W_ p;
844
845     /* arguments: D1 = Double# */
846     arg = D1;
847
848     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
849     
850     /* Be prepared to tell Lennart-coded __decodeDouble
851        where mantissa.d can be put (it does not care about the rest) */
852     p = Hp - ARR_SIZE + WDS(1);
853     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
854     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
855     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
856
857     /* Perform the operation */
858     foreign "C" __decodeDouble(mp_tmp1,exponent,arg);
859     
860     /* returns: (Int# (expn), Int#, ByteArray#) */
861     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
862 }
863
864 /* -----------------------------------------------------------------------------
865  * Concurrency primitives
866  * -------------------------------------------------------------------------- */
867
868 forkzh_fast
869 {
870   /* args: R1 = closure to spark */
871   
872   MAYBE_GC(R1_PTR, forkzh_fast);
873
874   // create it right now, return ThreadID in R1
875   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
876                                    R1 "ptr");
877   foreign "C" scheduleThread(R1 "ptr");
878
879   // switch at the earliest opportunity
880   CInt[context_switch] = 1 :: CInt;
881   
882   RET_P(R1);
883 }
884
885 yieldzh_fast
886 {
887   jump stg_yield_noregs;
888 }
889
890 myThreadIdzh_fast
891 {
892   /* no args. */
893   RET_P(CurrentTSO);
894 }
895
896 labelThreadzh_fast
897 {
898   /* args: 
899         R1 = ThreadId#
900         R2 = Addr# */
901 #ifdef DEBUG
902   foreign "C" labelThread(R1 "ptr", R2 "ptr");
903 #endif
904   jump %ENTRY_CODE(Sp(0));
905 }
906
907 isCurrentThreadBoundzh_fast
908 {
909   /* no args */
910   W_ r;
911   r = foreign "C" isThreadBound(CurrentTSO);
912   RET_N(r);
913 }
914
915
916 /* -----------------------------------------------------------------------------
917  * TVar primitives
918  * -------------------------------------------------------------------------- */
919
920 #ifdef REG_R1
921 #define SP_OFF 0
922 #define IF_NOT_REG_R1(x) 
923 #else
924 #define SP_OFF 1
925 #define IF_NOT_REG_R1(x) x
926 #endif
927
928 // Catch retry frame ------------------------------------------------------------
929
930 #define CATCH_RETRY_FRAME_ERROR(label) \
931   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
932
933 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
934 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
935 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
936 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
937 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
938 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
939 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
940 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
941
942 #if MAX_VECTORED_RTN > 8
943 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
944 #endif
945
946 #if defined(PROFILING)
947 #define CATCH_RETRY_FRAME_BITMAP 7
948 #define CATCH_RETRY_FRAME_WORDS  6
949 #else
950 #define CATCH_RETRY_FRAME_BITMAP 1
951 #define CATCH_RETRY_FRAME_WORDS  4
952 #endif
953
954 INFO_TABLE_RET(stg_catch_retry_frame,
955                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
956                CATCH_RETRY_FRAME,
957                stg_catch_retry_frame_0_ret,
958                stg_catch_retry_frame_1_ret,
959                stg_catch_retry_frame_2_ret,
960                stg_catch_retry_frame_3_ret,
961                stg_catch_retry_frame_4_ret,
962                stg_catch_retry_frame_5_ret,
963                stg_catch_retry_frame_6_ret,
964                stg_catch_retry_frame_7_ret)
965 {
966    W_ r, frame, trec, outer;
967    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
968
969    frame = Sp;
970    trec = StgTSO_trec(CurrentTSO);
971    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
972    r = foreign "C" stmCommitTransaction(trec "ptr");
973    if (r) {
974      /* Succeeded (either first branch or second branch) */
975      StgTSO_trec(CurrentTSO) = outer;
976      Sp = Sp + SIZEOF_StgCatchRetryFrame;
977      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
978      jump %ENTRY_CODE(Sp(SP_OFF));
979    } else {
980      /* Did not commit: retry */
981      W_ new_trec;
982      "ptr" new_trec = foreign "C" stmStartTransaction(outer "ptr");
983      StgTSO_trec(CurrentTSO) = new_trec;
984      if (StgCatchRetryFrame_running_alt_code(frame)) {
985        R1 = StgCatchRetryFrame_alt_code(frame);
986      } else {
987        R1 = StgCatchRetryFrame_first_code(frame);
988        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
989      }
990      Sp_adj(-1);
991      jump RET_LBL(stg_ap_v);
992    }
993 }
994
995
996 // Atomically frame -------------------------------------------------------------
997
998
999 #define ATOMICALLY_FRAME_ERROR(label) \
1000   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1001
1002 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1003 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1004 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1005 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1006 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1007 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1008 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1009 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1010
1011 #if MAX_VECTORED_RTN > 8
1012 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1013 #endif
1014
1015 #if defined(PROFILING)
1016 #define ATOMICALLY_FRAME_BITMAP 7
1017 #define ATOMICALLY_FRAME_WORDS  4
1018 #else
1019 #define ATOMICALLY_FRAME_BITMAP 1
1020 #define ATOMICALLY_FRAME_WORDS  2
1021 #endif
1022
1023
1024 INFO_TABLE_RET(stg_atomically_frame,
1025                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1026                ATOMICALLY_FRAME,
1027                stg_atomically_frame_0_ret,
1028                stg_atomically_frame_1_ret,
1029                stg_atomically_frame_2_ret,
1030                stg_atomically_frame_3_ret,
1031                stg_atomically_frame_4_ret,
1032                stg_atomically_frame_5_ret,
1033                stg_atomically_frame_6_ret,
1034                stg_atomically_frame_7_ret)
1035 {
1036    W_ frame, trec, valid;
1037    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1038
1039    frame = Sp;
1040    trec = StgTSO_trec(CurrentTSO);
1041    if (StgAtomicallyFrame_waiting(frame)) {
1042      /* The TSO is currently waiting: should we stop waiting? */
1043      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1044      if (valid) {
1045        /* Previous attempt is still valid: no point trying again yet */
1046           IF_NOT_REG_R1(Sp_adj(-2);
1047                         Sp(1) = stg_NO_FINALIZER_closure;
1048                         Sp(0) = stg_ut_1_0_unreg_info;)
1049        jump stg_block_noregs;
1050      } else {
1051        /* Previous attempt is no longer valid: try again */
1052        "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");
1053        StgTSO_trec(CurrentTSO) = trec;
1054        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1055        R1 = StgAtomicallyFrame_code(frame);
1056        Sp_adj(-1);
1057        jump RET_LBL(stg_ap_v);
1058      }
1059    } else {
1060      /* The TSO is not currently waiting: try to commit the transaction */
1061      valid = foreign "C" stmCommitTransaction(trec "ptr");
1062      if (valid) {
1063        /* Transaction was valid: commit succeeded */
1064        StgTSO_trec(CurrentTSO) = NO_TREC;
1065        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1066        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1067        jump %ENTRY_CODE(Sp(SP_OFF));
1068      } else {
1069        /* Transaction was not valid: try again */
1070        "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");
1071        StgTSO_trec(CurrentTSO) = trec;
1072        R1 = StgAtomicallyFrame_code(frame);
1073        Sp_adj(-1);
1074        jump RET_LBL(stg_ap_v);
1075      }
1076    }
1077 }
1078
1079
1080 // STM catch frame --------------------------------------------------------------
1081
1082 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1083    label                                                   \
1084    {                                                       \
1085       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1086       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1087       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1088       jump ret;                                            \
1089    }
1090
1091 #ifdef REG_R1
1092 #define SP_OFF 0
1093 #else
1094 #define SP_OFF 1
1095 #endif
1096
1097 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1098 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1099 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1100 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1101 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1102 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1103 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1104 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1105
1106 #if MAX_VECTORED_RTN > 8
1107 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1108 #endif
1109
1110 #if defined(PROFILING)
1111 #define CATCH_STM_FRAME_BITMAP 3
1112 #define CATCH_STM_FRAME_WORDS  3
1113 #else
1114 #define CATCH_STM_FRAME_BITMAP 0
1115 #define CATCH_STM_FRAME_WORDS  1
1116 #endif
1117
1118 /* Catch frames are very similar to update frames, but when entering
1119  * one we just pop the frame off the stack and perform the correct
1120  * kind of return to the activation record underneath us on the stack.
1121  */
1122
1123 INFO_TABLE_RET(stg_catch_stm_frame,
1124                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1125                CATCH_STM_FRAME,
1126                stg_catch_stm_frame_0_ret,
1127                stg_catch_stm_frame_1_ret,
1128                stg_catch_stm_frame_2_ret,
1129                stg_catch_stm_frame_3_ret,
1130                stg_catch_stm_frame_4_ret,
1131                stg_catch_stm_frame_5_ret,
1132                stg_catch_stm_frame_6_ret,
1133                stg_catch_stm_frame_7_ret)
1134 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1135
1136
1137 // Primop definition ------------------------------------------------------------
1138
1139 atomicallyzh_fast
1140 {
1141   W_ frame;
1142   W_ old_trec;
1143   W_ new_trec;
1144   
1145   /* Args: R1 = m :: STM a */
1146   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1147
1148   /* Set up the atomically frame */
1149   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1150   frame = Sp;
1151
1152   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1153   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1154   StgAtomicallyFrame_code(frame) = R1;
1155
1156   /* Start the memory transcation */
1157   old_trec = StgTSO_trec(CurrentTSO);
1158   "ptr" new_trec = foreign "C" stmStartTransaction(old_trec "ptr");
1159   StgTSO_trec(CurrentTSO) = new_trec;
1160
1161   /* Apply R1 to the realworld token */
1162   Sp_adj(-1);
1163   jump RET_LBL(stg_ap_v);
1164 }
1165
1166
1167 catchSTMzh_fast
1168 {
1169   W_ frame;
1170   
1171   /* Args: R1 :: STM a */
1172   /* Args: R2 :: Exception -> STM a */
1173   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1174
1175   /* Set up the catch frame */
1176   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1177   frame = Sp;
1178
1179   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1180   StgCatchSTMFrame_handler(frame) = R2;
1181
1182   /* Apply R1 to the realworld token */
1183   Sp_adj(-1);
1184   jump RET_LBL(stg_ap_v);
1185 }
1186
1187
1188 catchRetryzh_fast
1189 {
1190   W_ frame;
1191   W_ new_trec;
1192   W_ trec;
1193
1194   /* Args: R1 :: STM a */
1195   /* Args: R2 :: STM a */
1196   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1197
1198   /* Start a nested transaction within which to run the first code */
1199   trec = StgTSO_trec(CurrentTSO);
1200   "ptr" new_trec = foreign "C" stmStartTransaction(trec "ptr");
1201   StgTSO_trec(CurrentTSO) = new_trec;
1202
1203   /* Set up the catch-retry frame */
1204   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1205   frame = Sp;
1206   
1207   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1208   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1209   StgCatchRetryFrame_first_code(frame) = R1;
1210   StgCatchRetryFrame_alt_code(frame) = R2;
1211   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1212
1213   /* Apply R1 to the realworld token */
1214   Sp_adj(-1);
1215   jump RET_LBL(stg_ap_v);  
1216 }
1217
1218
1219 retryzh_fast
1220 {
1221   W_ frame_type;
1222   W_ frame;
1223   W_ trec;
1224   W_ outer;
1225   W_ r;
1226
1227   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1228
1229   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1230 retry_pop_stack:
1231   trec = StgTSO_trec(CurrentTSO);
1232   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1233   StgTSO_sp(CurrentTSO) = Sp;
1234   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1235   Sp = StgTSO_sp(CurrentTSO);
1236   frame = Sp;
1237
1238   if (frame_type == CATCH_RETRY_FRAME) {
1239     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1240     ASSERT(outer != NO_TREC);
1241     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1242       // Retry in the first code: try the alternative
1243       "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1244       StgTSO_trec(CurrentTSO) = trec;
1245       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1246       R1 = StgCatchRetryFrame_alt_code(frame);
1247       Sp_adj(-1);
1248       jump RET_LBL(stg_ap_v);
1249     } else {
1250       // Retry in the alternative code: propagate
1251       W_ other_trec;
1252       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1253       r = foreign "C" stmMergeForWaiting(trec "ptr", other_trec "ptr");
1254       if (r) {
1255         r = foreign "C" stmCommitTransaction(trec "ptr");
1256       }
1257       if (r) {
1258         // Merge between siblings succeeded: commit it back to enclosing transaction
1259         // and then propagate the retry
1260         StgTSO_trec(CurrentTSO) = outer;
1261         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1262         goto retry_pop_stack;
1263       } else {
1264         // Merge failed: we musn't propagate the retry.  Try both paths again.
1265         "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1266         StgCatchRetryFrame_first_code_trec(frame) = trec;
1267         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1268         StgTSO_trec(CurrentTSO) = trec;
1269         R1 = StgCatchRetryFrame_first_code(frame);
1270         Sp_adj(-1);
1271         jump RET_LBL(stg_ap_v);
1272       }
1273     }
1274   }
1275
1276   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1277   ASSERT(frame_type == ATOMICALLY_FRAME);
1278   ASSERT(outer == NO_TREC);
1279   r = foreign "C" stmWait(CurrentTSO "ptr", trec "ptr");
1280   if (r) {
1281     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1282     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1283     Sp = frame;
1284     // Fix up the stack in the unregisterised case: the return convention is different.
1285     IF_NOT_REG_R1(Sp_adj(-2); 
1286                   Sp(1) = stg_NO_FINALIZER_closure;
1287                   Sp(0) = stg_ut_1_0_unreg_info;)
1288     jump stg_block_noregs;
1289   } else {
1290     // Transaction was not valid: retry immediately
1291     "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1292     StgTSO_trec(CurrentTSO) = trec;
1293     R1 = StgAtomicallyFrame_code(frame);
1294     Sp = frame;
1295     Sp_adj(-1);
1296     jump RET_LBL(stg_ap_v);
1297   }
1298 }
1299
1300
1301 newTVarzh_fast
1302 {
1303   W_ tv;
1304
1305   /* Args: R1 = initialisation value */
1306
1307   ALLOC_PRIM( SIZEOF_StgTVar, R1_PTR, newTVarzh_fast);
1308   tv = Hp - SIZEOF_StgTVar + WDS(1);
1309   SET_HDR(tv,stg_TVAR_info,W_[CCCS]);
1310   StgTVar_current_value(tv) = R1;
1311   StgTVar_first_wait_queue_entry(tv) = stg_END_STM_WAIT_QUEUE_closure;
1312     
1313   RET_P(tv);
1314 }
1315
1316
1317 readTVarzh_fast
1318 {
1319   W_ trec;
1320   W_ tvar;
1321   W_ result;
1322
1323   /* Args: R1 = TVar closure */
1324
1325   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1326   trec = StgTSO_trec(CurrentTSO);
1327   tvar = R1;
1328   "ptr" result = foreign "C" stmReadTVar(trec "ptr", tvar "ptr");
1329
1330   RET_P(result);
1331 }
1332
1333
1334 writeTVarzh_fast
1335 {
1336   W_ trec;
1337   W_ tvar;
1338   W_ new_value;
1339   
1340   /* Args: R1 = TVar closure */
1341   /*       R2 = New value    */
1342
1343   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1344   trec = StgTSO_trec(CurrentTSO);
1345   tvar = R1;
1346   new_value = R2;
1347   foreign "C" stmWriteTVar(trec "ptr", tvar "ptr", new_value "ptr");
1348
1349   jump %ENTRY_CODE(Sp(0));
1350 }
1351
1352
1353 /* -----------------------------------------------------------------------------
1354  * MVar primitives
1355  *
1356  * take & putMVar work as follows.  Firstly, an important invariant:
1357  *
1358  *    If the MVar is full, then the blocking queue contains only
1359  *    threads blocked on putMVar, and if the MVar is empty then the
1360  *    blocking queue contains only threads blocked on takeMVar.
1361  *
1362  * takeMvar:
1363  *    MVar empty : then add ourselves to the blocking queue
1364  *    MVar full  : remove the value from the MVar, and
1365  *                 blocking queue empty     : return
1366  *                 blocking queue non-empty : perform the first blocked putMVar
1367  *                                            from the queue, and wake up the
1368  *                                            thread (MVar is now full again)
1369  *
1370  * putMVar is just the dual of the above algorithm.
1371  *
1372  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1373  * the stack of the thread waiting to do the putMVar.  See
1374  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1375  * the stack layout, and the PerformPut and PerformTake macros below.
1376  *
1377  * It is important that a blocked take or put is woken up with the
1378  * take/put already performed, because otherwise there would be a
1379  * small window of vulnerability where the thread could receive an
1380  * exception and never perform its take or put, and we'd end up with a
1381  * deadlock.
1382  *
1383  * -------------------------------------------------------------------------- */
1384
1385 isEmptyMVarzh_fast
1386 {
1387     /* args: R1 = MVar closure */
1388
1389     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1390         RET_N(1);
1391     } else {
1392         RET_N(0);
1393     }
1394 }
1395
1396 newMVarzh_fast
1397 {
1398     /* args: none */
1399     W_ mvar;
1400
1401     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1402   
1403     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1404     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1405     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1406     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1407     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1408     RET_P(mvar);
1409 }
1410
1411
1412 /* If R1 isn't available, pass it on the stack */
1413 #ifdef REG_R1
1414 #define PerformTake(tso, value)                         \
1415     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1416     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1417 #else
1418 #define PerformTake(tso, value)                                 \
1419     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1420     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1421 #endif
1422
1423 #define PerformPut(tso,lval)                    \
1424     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1425     lval = W_[StgTSO_sp(tso) - WDS(1)];
1426
1427 takeMVarzh_fast
1428 {
1429     W_ mvar, val, info, tso;
1430
1431     /* args: R1 = MVar closure */
1432     mvar = R1;
1433
1434 #if defined(SMP)
1435     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1436 #else
1437     info = GET_INFO(mvar);
1438 #endif
1439
1440     /* If the MVar is empty, put ourselves on its blocking queue,
1441      * and wait until we're woken up.
1442      */
1443     if (info == stg_EMPTY_MVAR_info) {
1444         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1445             StgMVar_head(mvar) = CurrentTSO;
1446         } else {
1447             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1448         }
1449         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1450         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1451         StgTSO_block_info(CurrentTSO)  = mvar;
1452         StgMVar_tail(mvar) = CurrentTSO;
1453         
1454         jump stg_block_takemvar;
1455   }
1456
1457   /* we got the value... */
1458   val = StgMVar_value(mvar);
1459
1460   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1461   {
1462       /* There are putMVar(s) waiting... 
1463        * wake up the first thread on the queue
1464        */
1465       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1466
1467       /* actually perform the putMVar for the thread that we just woke up */
1468       tso = StgMVar_head(mvar);
1469       PerformPut(tso,StgMVar_value(mvar));
1470
1471 #if defined(GRAN) || defined(PAR)
1472       /* ToDo: check 2nd arg (mvar) is right */
1473       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar);
1474       StgMVar_head(mvar) = tso;
1475 #else
1476       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1477       StgMVar_head(mvar) = tso;
1478 #endif
1479
1480       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1481           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1482       }
1483
1484 #if defined(SMP)
1485       foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1486 #endif
1487       RET_P(val);
1488   } 
1489   else
1490   {
1491       /* No further putMVars, MVar is now empty */
1492       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1493  
1494 #if defined(SMP)
1495       foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1496 #else
1497       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1498 #endif
1499
1500       RET_P(val);
1501   }
1502 }
1503
1504
1505 tryTakeMVarzh_fast
1506 {
1507     W_ mvar, val, info, tso;
1508
1509     /* args: R1 = MVar closure */
1510
1511     mvar = R1;
1512
1513 #if defined(SMP)
1514     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1515 #else
1516     info = GET_INFO(mvar);
1517 #endif
1518
1519     if (info == stg_EMPTY_MVAR_info) {
1520 #if defined(SMP)
1521         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1522 #endif
1523         /* HACK: we need a pointer to pass back, 
1524          * so we abuse NO_FINALIZER_closure
1525          */
1526         RET_NP(0, stg_NO_FINALIZER_closure);
1527     }
1528
1529     /* we got the value... */
1530     val = StgMVar_value(mvar);
1531
1532     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1533
1534         /* There are putMVar(s) waiting... 
1535          * wake up the first thread on the queue
1536          */
1537         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1538
1539         /* actually perform the putMVar for the thread that we just woke up */
1540         tso = StgMVar_head(mvar);
1541         PerformPut(tso,StgMVar_value(mvar));
1542
1543 #if defined(GRAN) || defined(PAR)
1544         /* ToDo: check 2nd arg (mvar) is right */
1545         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr");
1546         StgMVar_head(mvar) = tso;
1547 #else
1548         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1549         StgMVar_head(mvar) = tso;
1550 #endif
1551
1552         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1553             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1554         }
1555 #if defined(SMP)
1556         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1557 #endif
1558     }
1559     else 
1560     {
1561         /* No further putMVars, MVar is now empty */
1562         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1563 #if defined(SMP)
1564         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1565 #else
1566         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1567 #endif
1568     }
1569     
1570     RET_NP(1, val);
1571 }
1572
1573
1574 putMVarzh_fast
1575 {
1576     W_ mvar, info, tso;
1577
1578     /* args: R1 = MVar, R2 = value */
1579     mvar = R1;
1580
1581 #if defined(SMP)
1582     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1583 #else
1584     info = GET_INFO(mvar);
1585 #endif
1586
1587     if (info == stg_FULL_MVAR_info) {
1588         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1589             StgMVar_head(mvar) = CurrentTSO;
1590         } else {
1591             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1592         }
1593         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1594         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1595         StgTSO_block_info(CurrentTSO)  = mvar;
1596         StgMVar_tail(mvar) = CurrentTSO;
1597         
1598         jump stg_block_putmvar;
1599     }
1600   
1601     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1602
1603         /* There are takeMVar(s) waiting: wake up the first one
1604          */
1605         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1606
1607         /* actually perform the takeMVar */
1608         tso = StgMVar_head(mvar);
1609         PerformTake(tso, R2);
1610       
1611 #if defined(GRAN) || defined(PAR)
1612         /* ToDo: check 2nd arg (mvar) is right */
1613         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1614         StgMVar_head(mvar) = tso;
1615 #else
1616         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1617         StgMVar_head(mvar) = tso;
1618 #endif
1619
1620         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1621             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1622         }
1623
1624 #if defined(SMP)
1625         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1626 #endif
1627         jump %ENTRY_CODE(Sp(0));
1628     }
1629     else
1630     {
1631         /* No further takes, the MVar is now full. */
1632         StgMVar_value(mvar) = R2;
1633
1634 #if defined(SMP)
1635         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1636 #else
1637         SET_INFO(mvar,stg_FULL_MVAR_info);
1638 #endif
1639         jump %ENTRY_CODE(Sp(0));
1640     }
1641     
1642     /* ToDo: yield afterward for better communication performance? */
1643 }
1644
1645
1646 tryPutMVarzh_fast
1647 {
1648     W_ mvar, info, tso;
1649
1650     /* args: R1 = MVar, R2 = value */
1651     mvar = R1;
1652
1653 #if defined(SMP)
1654     "ptr" info = foreign "C" lockClosure(mvar "ptr");
1655 #else
1656     info = GET_INFO(mvar);
1657 #endif
1658
1659     if (info == stg_FULL_MVAR_info) {
1660 #if defined(SMP)
1661         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1662 #endif
1663         RET_N(0);
1664     }
1665   
1666     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1667
1668         /* There are takeMVar(s) waiting: wake up the first one
1669          */
1670         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1671         
1672         /* actually perform the takeMVar */
1673         tso = StgMVar_head(mvar);
1674         PerformTake(tso, R2);
1675       
1676 #if defined(GRAN) || defined(PAR)
1677         /* ToDo: check 2nd arg (mvar) is right */
1678         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1679         StgMVar_head(mvar) = tso;
1680 #else
1681         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1682         StgMVar_head(mvar) = tso;
1683 #endif
1684
1685         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1686             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1687         }
1688
1689 #if defined(SMP)
1690         foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info);
1691 #endif
1692         jump %ENTRY_CODE(Sp(0));
1693     }
1694     else
1695     {
1696         /* No further takes, the MVar is now full. */
1697         StgMVar_value(mvar) = R2;
1698
1699 #if defined(SMP)
1700         foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info);
1701 #else
1702         SET_INFO(mvar,stg_FULL_MVAR_info);
1703 #endif
1704         jump %ENTRY_CODE(Sp(0));
1705     }
1706     
1707     /* ToDo: yield afterward for better communication performance? */
1708 }
1709
1710
1711 /* -----------------------------------------------------------------------------
1712    Stable pointer primitives
1713    -------------------------------------------------------------------------  */
1714
1715 makeStableNamezh_fast
1716 {
1717     W_ index, sn_obj;
1718
1719     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1720   
1721     index = foreign "C" lookupStableName(R1 "ptr");
1722
1723     /* Is there already a StableName for this heap object?
1724      *  stable_ptr_table is a pointer to an array of snEntry structs.
1725      */
1726     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1727         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1728         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1729         StgStableName_sn(sn_obj) = index;
1730         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1731     } else {
1732         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1733     }
1734     
1735     RET_P(sn_obj);
1736 }
1737
1738
1739 makeStablePtrzh_fast
1740 {
1741     /* Args: R1 = a */
1742     W_ sp;
1743     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1744     "ptr" sp = foreign "C" getStablePtr(R1 "ptr");
1745     RET_N(sp);
1746 }
1747
1748 deRefStablePtrzh_fast
1749 {
1750     /* Args: R1 = the stable ptr */
1751     W_ r, sp;
1752     sp = R1;
1753     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1754     RET_P(r);
1755 }
1756
1757 /* -----------------------------------------------------------------------------
1758    Bytecode object primitives
1759    -------------------------------------------------------------------------  */
1760
1761 newBCOzh_fast
1762 {
1763     /* R1 = instrs
1764        R2 = literals
1765        R3 = ptrs
1766        R4 = itbls
1767        R5 = arity
1768        R6 = bitmap array
1769     */
1770     W_ bco, bitmap_arr, bytes, words;
1771     
1772     bitmap_arr = R6;
1773     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1774     bytes = WDS(words);
1775
1776     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1777
1778     bco = Hp - bytes + WDS(1);
1779     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1780     
1781     StgBCO_instrs(bco)     = R1;
1782     StgBCO_literals(bco)   = R2;
1783     StgBCO_ptrs(bco)       = R3;
1784     StgBCO_itbls(bco)      = R4;
1785     StgBCO_arity(bco)      = HALF_W_(R5);
1786     StgBCO_size(bco)       = HALF_W_(words);
1787     
1788     // Copy the arity/bitmap info into the BCO
1789     W_ i;
1790     i = 0;
1791 for:
1792     if (i < StgArrWords_words(bitmap_arr)) {
1793         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1794         i = i + 1;
1795         goto for;
1796     }
1797     
1798     RET_P(bco);
1799 }
1800
1801
1802 mkApUpd0zh_fast
1803 {
1804     // R1 = the BCO# for the AP
1805     //  
1806     W_ ap;
1807
1808     // This function is *only* used to wrap zero-arity BCOs in an
1809     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1810     // saturated and always points directly to a FUN or BCO.
1811     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1812            StgBCO_arity(R1) == HALF_W_(0));
1813
1814     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1815     TICK_ALLOC_UP_THK(0, 0);
1816     CCCS_ALLOC(SIZEOF_StgAP);
1817
1818     ap = Hp - SIZEOF_StgAP + WDS(1);
1819     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1820     
1821     StgAP_n_args(ap) = HALF_W_(0);
1822     StgAP_fun(ap) = R1;
1823     
1824     RET_P(ap);
1825 }
1826
1827 /* -----------------------------------------------------------------------------
1828    Thread I/O blocking primitives
1829    -------------------------------------------------------------------------- */
1830
1831 /* Add a thread to the end of the blocked queue. (C-- version of the C
1832  * macro in Schedule.h).
1833  */
1834 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1835     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1836     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1837       W_[blocked_queue_hd] = tso;                       \
1838     } else {                                            \
1839       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1840     }                                                   \
1841     W_[blocked_queue_tl] = tso;
1842
1843 waitReadzh_fast
1844 {
1845     /* args: R1 */
1846 #ifdef THREADED_RTS
1847     foreign "C" barf("waitRead# on threaded RTS");
1848 #endif
1849
1850     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1851     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1852     StgTSO_block_info(CurrentTSO) = R1;
1853     // No locking - we're not going to use this interface in the
1854     // threaded RTS anyway.
1855     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1856     jump stg_block_noregs;
1857 }
1858
1859 waitWritezh_fast
1860 {
1861     /* args: R1 */
1862 #ifdef THREADED_RTS
1863     foreign "C" barf("waitWrite# on threaded RTS");
1864 #endif
1865
1866     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1867     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1868     StgTSO_block_info(CurrentTSO) = R1;
1869     // No locking - we're not going to use this interface in the
1870     // threaded RTS anyway.
1871     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1872     jump stg_block_noregs;
1873 }
1874
1875
1876 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1877 delayzh_fast
1878 {
1879 #ifdef mingw32_HOST_OS
1880     W_ ares;
1881     CInt reqID;
1882 #else
1883     W_ t, prev, target;
1884 #endif
1885
1886 #ifdef THREADED_RTS
1887     foreign "C" barf("delay# on threaded RTS");
1888 #endif
1889
1890     /* args: R1 (microsecond delay amount) */
1891     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1892     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1893
1894 #ifdef mingw32_HOST_OS
1895
1896     /* could probably allocate this on the heap instead */
1897     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1898                                             stg_delayzh_malloc_str);
1899     reqID = foreign "C" addDelayRequest(R1);
1900     StgAsyncIOResult_reqID(ares)   = reqID;
1901     StgAsyncIOResult_len(ares)     = 0;
1902     StgAsyncIOResult_errCode(ares) = 0;
1903     StgTSO_block_info(CurrentTSO)  = ares;
1904
1905     /* Having all async-blocked threads reside on the blocked_queue
1906      * simplifies matters, so change the status to OnDoProc put the
1907      * delayed thread on the blocked_queue.
1908      */
1909     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1910     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1911     jump stg_block_async_void;
1912
1913 #else
1914
1915     W_ time;
1916     time = foreign "C" getourtimeofday();
1917     target = (R1 / (TICK_MILLISECS*1000)) + time;
1918     StgTSO_block_info(CurrentTSO) = target;
1919
1920     /* Insert the new thread in the sleeping queue. */
1921     prev = NULL;
1922     t = W_[sleeping_queue];
1923 while:
1924     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1925         prev = t;
1926         t = StgTSO_link(t);
1927         goto while;
1928     }
1929
1930     StgTSO_link(CurrentTSO) = t;
1931     if (prev == NULL) {
1932         W_[sleeping_queue] = CurrentTSO;
1933     } else {
1934         StgTSO_link(prev) = CurrentTSO;
1935     }
1936     jump stg_block_noregs;
1937 #endif
1938 }
1939
1940
1941 #ifdef mingw32_HOST_OS
1942 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1943 asyncReadzh_fast
1944 {
1945     W_ ares;
1946     CInt reqID;
1947
1948 #ifdef THREADED_RTS
1949     foreign "C" barf("asyncRead# on threaded RTS");
1950 #endif
1951
1952     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1953     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1954     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1955
1956     /* could probably allocate this on the heap instead */
1957     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1958                                             stg_asyncReadzh_malloc_str);
1959     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1960     StgAsyncIOResult_reqID(ares)   = reqID;
1961     StgAsyncIOResult_len(ares)     = 0;
1962     StgAsyncIOResult_errCode(ares) = 0;
1963     StgTSO_block_info(CurrentTSO)  = ares;
1964     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1965     jump stg_block_async;
1966 }
1967
1968 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1969 asyncWritezh_fast
1970 {
1971     W_ ares;
1972     CInt reqID;
1973
1974 #ifdef THREADED_RTS
1975     foreign "C" barf("asyncWrite# on threaded RTS");
1976 #endif
1977
1978     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1979     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1980     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1981
1982     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1983                                             stg_asyncWritezh_malloc_str);
1984     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
1985
1986     StgAsyncIOResult_reqID(ares)   = reqID;
1987     StgAsyncIOResult_len(ares)     = 0;
1988     StgAsyncIOResult_errCode(ares) = 0;
1989     StgTSO_block_info(CurrentTSO)  = ares;
1990     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1991     jump stg_block_async;
1992 }
1993
1994 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
1995 asyncDoProczh_fast
1996 {
1997     W_ ares;
1998     CInt reqID;
1999
2000     /* args: R1 = proc, R2 = param */
2001     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2002     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2003
2004     /* could probably allocate this on the heap instead */
2005     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2006                                             stg_asyncDoProczh_malloc_str);
2007     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
2008     StgAsyncIOResult_reqID(ares)   = reqID;
2009     StgAsyncIOResult_len(ares)     = 0;
2010     StgAsyncIOResult_errCode(ares) = 0;
2011     StgTSO_block_info(CurrentTSO) = ares;
2012     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2013     jump stg_block_async;
2014 }
2015 #endif
2016
2017 /* -----------------------------------------------------------------------------
2018   ** temporary **
2019
2020    classes CCallable and CReturnable don't really exist, but the
2021    compiler insists on generating dictionaries containing references
2022    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2023    for these.  Some C compilers can't cope with zero-length static arrays,
2024    so we have to make these one element long.
2025   --------------------------------------------------------------------------- */
2026
2027 section "rodata" {
2028   GHC_ZCCCallable_static_info:   W_ 0;
2029 }
2030
2031 section "rodata" {
2032   GHC_ZCCReturnable_static_info: W_ 0;
2033 }