[project @ 2005-04-27 14:25:17 by simonmar]
[ghc-hetmet.git] / ghc / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocate(words);
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words);
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocate(words);
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0 to indicate
130   // that it is still on the mutable list.
131
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the mut_link field is NULL if it isn't on a mut_list, and the GC
135   // maintains this invariant.
136   //
137   if (%INFO_TYPE(%GET_STD_INFO(R1)) != HALF_W_(MUT_ARR_PTRS_FROZEN0)) {
138         foreign "C" recordMutableLock(R1 "ptr");
139   }
140
141   SET_INFO(R1,stg_MUT_ARR_PTRS_info);
142
143   RET_P(R1);
144 }
145
146 /* -----------------------------------------------------------------------------
147    MutVar primitives
148    -------------------------------------------------------------------------- */
149
150 newMutVarzh_fast
151 {
152     W_ mv;
153     /* Args: R1 = initialisation value */
154
155     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
156
157     mv = Hp - SIZEOF_StgMutVar + WDS(1);
158     SET_HDR(mv,stg_MUT_VAR_info,W_[CCCS]);
159     StgMutVar_var(mv) = R1;
160     
161     RET_P(mv);
162 }
163
164 atomicModifyMutVarzh_fast
165 {
166     W_ mv, z, x, y, r;
167     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
168
169     /* If x is the current contents of the MutVar#, then 
170        We want to make the new contents point to
171
172          (sel_0 (f x))
173  
174        and the return value is
175          
176          (sel_1 (f x))
177
178         obviously we can share (f x).
179
180          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
181          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
182          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
183     */
184
185 #if MIN_UPD_SIZE > 1
186 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
187 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
188 #else
189 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
190 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
191 #endif
192
193 #if MIN_UPD_SIZE > 2
194 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
195 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
196 #else
197 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
198 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
199 #endif
200
201 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
202
203    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
204
205 #if defined(SMP)
206     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
207 #endif
208
209    x = StgMutVar_var(R1);
210
211    TICK_ALLOC_THUNK_2();
212    CCCS_ALLOC(THUNK_2_SIZE);
213    z = Hp - THUNK_2_SIZE + WDS(1);
214    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
215    LDV_RECORD_CREATE(z);
216    StgThunk_payload(z,0) = R2;
217    StgThunk_payload(z,1) = x;
218
219    TICK_ALLOC_THUNK_1();
220    CCCS_ALLOC(THUNK_1_SIZE);
221    y = z - THUNK_1_SIZE;
222    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
223    LDV_RECORD_CREATE(y);
224    StgThunk_payload(y,0) = z;
225
226    StgMutVar_var(R1) = y;
227
228    TICK_ALLOC_THUNK_1();
229    CCCS_ALLOC(THUNK_1_SIZE);
230    r = y - THUNK_1_SIZE;
231    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
232    LDV_RECORD_CREATE(r);
233    StgThunk_payload(r,0) = z;
234
235 #if defined(SMP)
236     foreign "C" RELEASE_LOCK(sm_mutex "ptr");
237 #endif
238
239    RET_P(r);
240 }
241
242 /* -----------------------------------------------------------------------------
243    Foreign Object Primitives
244    -------------------------------------------------------------------------- */
245
246 mkForeignObjzh_fast
247 {
248   /* R1 = ptr to foreign object,
249   */
250   W_ result;
251
252   ALLOC_PRIM( SIZEOF_StgForeignObj, NO_PTRS, mkForeignObjzh_fast);
253
254   result = Hp - SIZEOF_StgForeignObj + WDS(1);
255   SET_HDR(result,stg_FOREIGN_info,W_[CCCS]);
256   StgForeignObj_data(result) = R1;
257
258   /* returns (# s#, ForeignObj# #) */
259   RET_P(result);
260 }
261
262 /* -----------------------------------------------------------------------------
263    Weak Pointer Primitives
264    -------------------------------------------------------------------------- */
265
266 STRING(stg_weak_msg,"New weak pointer at %p\n")
267
268 mkWeakzh_fast
269 {
270   /* R1 = key
271      R2 = value
272      R3 = finalizer (or NULL)
273   */
274   W_ w;
275
276   if (R3 == NULL) {
277     R3 = stg_NO_FINALIZER_closure;
278   }
279
280   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
281
282   w = Hp - SIZEOF_StgWeak + WDS(1);
283   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
284
285   StgWeak_key(w)       = R1;
286   StgWeak_value(w)     = R2;
287   StgWeak_finalizer(w) = R3;
288
289   StgWeak_link(w)       = W_[weak_ptr_list];
290   W_[weak_ptr_list]     = w;
291
292   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w));
293
294   RET_P(w);
295 }
296
297
298 finalizzeWeakzh_fast
299 {
300   /* R1 = weak ptr
301    */
302   W_ w, f;
303
304   w = R1;
305
306   // already dead?
307   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
308       RET_NP(0,stg_NO_FINALIZER_closure);
309   }
310
311   // kill it
312 #ifdef PROFILING
313   // @LDV profiling
314   // A weak pointer is inherently used, so we do not need to call
315   // LDV_recordDead_FILL_SLOP_DYNAMIC():
316   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
317   // or, LDV_recordDead():
318   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
319   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
320   // large as weak pointers, so there is no need to fill the slop, either.
321   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
322 #endif
323
324   //
325   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
326   //
327   SET_INFO(w,stg_DEAD_WEAK_info);
328   LDV_RECORD_CREATE(w);
329
330   f = StgWeak_finalizer(w);
331   StgDeadWeak_link(w) = StgWeak_link(w);
332
333   /* return the finalizer */
334   if (f == stg_NO_FINALIZER_closure) {
335       RET_NP(0,stg_NO_FINALIZER_closure);
336   } else {
337       RET_NP(1,f);
338   }
339 }
340
341 deRefWeakzh_fast
342 {
343   /* R1 = weak ptr */
344   W_ w, code, val;
345
346   w = R1;
347   if (GET_INFO(w) == stg_WEAK_info) {
348     code = 1;
349     val = StgWeak_value(w);
350   } else {
351     code = 0;
352     val = w;
353   }
354   RET_NP(code,val);
355 }
356
357 /* -----------------------------------------------------------------------------
358    Arbitrary-precision Integer operations.
359
360    There are some assumptions in this code that mp_limb_t == W_.  This is
361    the case for all the platforms that GHC supports, currently.
362    -------------------------------------------------------------------------- */
363
364 int2Integerzh_fast
365 {
366    /* arguments: R1 = Int# */
367
368    W_ val, s, p;        /* to avoid aliasing */
369
370    val = R1;
371    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
372
373    p = Hp - SIZEOF_StgArrWords;
374    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
375    StgArrWords_words(p) = 1;
376
377    /* mpz_set_si is inlined here, makes things simpler */
378    if (%lt(val,0)) { 
379         s  = -1;
380         Hp(0) = -val;
381    } else { 
382      if (%gt(val,0)) {
383         s = 1;
384         Hp(0) = val;
385      } else {
386         s = 0;
387      }
388   }
389
390    /* returns (# size  :: Int#, 
391                  data  :: ByteArray# 
392                #)
393    */
394    RET_NP(s,p);
395 }
396
397 word2Integerzh_fast
398 {
399    /* arguments: R1 = Word# */
400
401    W_ val, s, p;        /* to avoid aliasing */
402
403    val = R1;
404
405    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
406
407    p = Hp - SIZEOF_StgArrWords;
408    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
409    StgArrWords_words(p) = 1;
410
411    if (val != 0) {
412         s = 1;
413         W_[Hp] = val;
414    } else {
415         s = 0;
416    }
417
418    /* returns (# size  :: Int#, 
419                  data  :: ByteArray# #)
420    */
421    RET_NP(s,p);
422 }
423
424
425 /*
426  * 'long long' primops for converting to/from Integers.
427  */
428
429 #ifdef SUPPORT_LONG_LONGS
430
431 int64ToIntegerzh_fast
432 {
433    /* arguments: L1 = Int64# */
434
435    L_ val;
436    W_ hi, s, neg, words_needed, p;
437
438    val = L1;
439    neg = 0;
440
441    if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) )  { 
442        words_needed = 2;
443    } else { 
444        // minimum is one word
445        words_needed = 1;
446    }
447
448    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
449                NO_PTRS, int64ToIntegerzh_fast );
450
451    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
452    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
453    StgArrWords_words(p) = words_needed;
454
455    if ( %lt(val,0::L_) ) {
456      neg = 1;
457      val = -val;
458    }
459
460    hi = TO_W_(val >> 32);
461
462    if ( words_needed == 2 )  { 
463       s = 2;
464       Hp(-1) = TO_W_(val);
465       Hp(0) = hi;
466    } else { 
467        if ( val != 0::L_ ) {
468            s = 1;
469            Hp(0) = TO_W_(val);
470        } else /* val==0 */  {
471            s = 0;
472        }
473    }
474    if ( neg != 0 ) {
475         s = -s;
476    }
477
478    /* returns (# size  :: Int#, 
479                  data  :: ByteArray# #)
480    */
481    RET_NP(s,p);
482 }
483
484 word64ToIntegerzh_fast
485 {
486    /* arguments: L1 = Word64# */
487
488    L_ val;
489    W_ hi, s, words_needed, p;
490
491    val = L1;
492    if ( val >= 0x100000000::L_ ) {
493       words_needed = 2;
494    } else {
495       words_needed = 1;
496    }
497
498    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
499                NO_PTRS, word64ToIntegerzh_fast );
500
501    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
502    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
503    StgArrWords_words(p) = words_needed;
504
505    hi = TO_W_(val >> 32);
506    if ( val >= 0x100000000::L_ ) { 
507      s = 2;
508      Hp(-1) = TO_W_(val);
509      Hp(0)  = hi;
510    } else {
511       if ( val != 0::L_ ) {
512         s = 1;
513         Hp(0) = TO_W_(val);
514      } else /* val==0 */  {
515       s = 0;
516      }
517   }
518
519    /* returns (# size  :: Int#, 
520                  data  :: ByteArray# #)
521    */
522    RET_NP(s,p);
523 }
524
525
526 #endif /* SUPPORT_LONG_LONGS */
527
528 /* ToDo: this is shockingly inefficient */
529
530 section "bss" {
531   mp_tmp1:
532     bits8 [SIZEOF_MP_INT];
533 }
534
535 section "bss" {
536   mp_tmp2:
537     bits8 [SIZEOF_MP_INT];
538 }
539
540 section "bss" {
541   result1:
542     bits8 [SIZEOF_MP_INT];
543 }
544
545 section "bss" {
546   result2:
547     bits8 [SIZEOF_MP_INT];
548 }
549
550 #define GMP_TAKE2_RET1(name,mp_fun)                     \
551 name                                                    \
552 {                                                       \
553   CInt s1, s2;                                          \
554   W_ d1, d2;                                            \
555                                                         \
556   /* call doYouWantToGC() */                            \
557   MAYBE_GC(R2_PTR & R4_PTR, name);                      \
558                                                         \
559   s1 = W_TO_INT(R1);                                    \
560   d1 = R2;                                              \
561   s2 = W_TO_INT(R3);                                    \
562   d2 = R4;                                              \
563                                                         \
564   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));  \
565   MP_INT__mp_size(mp_tmp1)  = (s1);                     \
566   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);         \
567   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));  \
568   MP_INT__mp_size(mp_tmp2)  = (s2);                     \
569   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);         \
570                                                         \
571   foreign "C" mpz_init(result1);                        \
572                                                         \
573   /* Perform the operation */                           \
574   foreign "C" mp_fun(result1,mp_tmp1,mp_tmp2);          \
575                                                         \
576   RET_NP(TO_W_(MP_INT__mp_size(result1)),               \
577          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);   \
578 }
579
580 #define GMP_TAKE1_RET1(name,mp_fun)                             \
581 name                                                            \
582 {                                                               \
583   CInt s1;                                                      \
584   W_ d1;                                                        \
585                                                                 \
586   /* call doYouWantToGC() */                                    \
587   MAYBE_GC(R2_PTR, name);                                       \
588                                                                 \
589   d1 = R2;                                                      \
590   s1 = W_TO_INT(R1);                                            \
591                                                                 \
592   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
593   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
594   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
595                                                                 \
596   foreign "C" mpz_init(result1);                                \
597                                                                 \
598   /* Perform the operation */                                   \
599   foreign "C" mp_fun(result1,mp_tmp1);                          \
600                                                                 \
601   RET_NP(TO_W_(MP_INT__mp_size(result1)),                       \
602          MP_INT__mp_d(result1) - SIZEOF_StgArrWords);           \
603 }
604
605 #define GMP_TAKE2_RET2(name,mp_fun)                             \
606 name                                                            \
607 {                                                               \
608   CInt s1, s2;                                                  \
609   W_ d1, d2;                                                    \
610                                                                 \
611   /* call doYouWantToGC() */                                    \
612   MAYBE_GC(R2_PTR & R4_PTR, name);                              \
613                                                                 \
614   s1 = W_TO_INT(R1);                                            \
615   d1 = R2;                                                      \
616   s2 = W_TO_INT(R3);                                            \
617   d2 = R4;                                                      \
618                                                                 \
619   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
620   MP_INT__mp_size(mp_tmp1)      = (s1);                         \
621   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);             \
622   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));      \
623   MP_INT__mp_size(mp_tmp2)      = (s2);                         \
624   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);             \
625                                                                 \
626   foreign "C" mpz_init(result1);                                \
627   foreign "C" mpz_init(result2);                                \
628                                                                 \
629   /* Perform the operation */                                   \
630   foreign "C" mp_fun(result1,result2,mp_tmp1,mp_tmp2);          \
631                                                                 \
632   RET_NPNP(TO_W_(MP_INT__mp_size(result1)),                     \
633            MP_INT__mp_d(result1) - SIZEOF_StgArrWords,          \
634            TO_W_(MP_INT__mp_size(result2)),                     \
635            MP_INT__mp_d(result2) - SIZEOF_StgArrWords);         \
636 }
637
638 GMP_TAKE2_RET1(plusIntegerzh_fast,     mpz_add)
639 GMP_TAKE2_RET1(minusIntegerzh_fast,    mpz_sub)
640 GMP_TAKE2_RET1(timesIntegerzh_fast,    mpz_mul)
641 GMP_TAKE2_RET1(gcdIntegerzh_fast,      mpz_gcd)
642 GMP_TAKE2_RET1(quotIntegerzh_fast,     mpz_tdiv_q)
643 GMP_TAKE2_RET1(remIntegerzh_fast,      mpz_tdiv_r)
644 GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
645 GMP_TAKE2_RET1(andIntegerzh_fast,      mpz_and)
646 GMP_TAKE2_RET1(orIntegerzh_fast,       mpz_ior)
647 GMP_TAKE2_RET1(xorIntegerzh_fast,      mpz_xor)
648 GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
649
650 GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
651 GMP_TAKE2_RET2(divModIntegerzh_fast,  mpz_fdiv_qr)
652
653 section "bss" {
654   aa:  W_; // NB. aa is really an mp_limb_t
655 }
656
657 gcdIntzh_fast
658 {
659     /* R1 = the first Int#; R2 = the second Int# */
660     W_ r; 
661
662     W_[aa] = R1;
663     r = foreign "C" mpn_gcd_1(aa, 1, R2);
664
665     R1 = r;
666     /* Result parked in R1, return via info-pointer at TOS */
667     jump %ENTRY_CODE(Sp(0));
668 }
669
670
671 gcdIntegerIntzh_fast
672 {
673     /* R1 = s1; R2 = d1; R3 = the int */
674     R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3);
675     
676     /* Result parked in R1, return via info-pointer at TOS */
677     jump %ENTRY_CODE(Sp(0));
678 }
679
680
681 cmpIntegerIntzh_fast
682 {
683     /* R1 = s1; R2 = d1; R3 = the int */
684     W_ usize, vsize, v_digit, u_digit;
685
686     usize = R1;
687     vsize = 0;
688     v_digit = R3;
689
690     // paraphrased from mpz_cmp_si() in the GMP sources
691     if (%gt(v_digit,0)) {
692         vsize = 1;
693     } else { 
694         if (%lt(v_digit,0)) {
695             vsize = -1;
696             v_digit = -v_digit;
697         }
698     }
699
700     if (usize != vsize) {
701         R1 = usize - vsize; 
702         jump %ENTRY_CODE(Sp(0));
703     }
704
705     if (usize == 0) {
706         R1 = 0; 
707         jump %ENTRY_CODE(Sp(0));
708     }
709
710     u_digit = W_[BYTE_ARR_CTS(R2)];
711
712     if (u_digit == v_digit) {
713         R1 = 0; 
714         jump %ENTRY_CODE(Sp(0));
715     }
716
717     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
718         R1 = usize; 
719     } else {
720         R1 = -usize; 
721     }
722
723     jump %ENTRY_CODE(Sp(0));
724 }
725
726 cmpIntegerzh_fast
727 {
728     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
729     W_ usize, vsize, size, up, vp;
730     CInt cmp;
731
732     // paraphrased from mpz_cmp() in the GMP sources
733     usize = R1;
734     vsize = R3;
735
736     if (usize != vsize) {
737         R1 = usize - vsize; 
738         jump %ENTRY_CODE(Sp(0));
739     }
740
741     if (usize == 0) {
742         R1 = 0; 
743         jump %ENTRY_CODE(Sp(0));
744     }
745
746     if (%lt(usize,0)) { // NB. not <, which is unsigned
747         size = -usize;
748     } else {
749         size = usize;
750     }
751
752     up = BYTE_ARR_CTS(R2);
753     vp = BYTE_ARR_CTS(R4);
754
755     cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size);
756
757     if (cmp == 0 :: CInt) {
758         R1 = 0; 
759         jump %ENTRY_CODE(Sp(0));
760     }
761
762     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
763         R1 = 1;
764     } else {
765         R1 = (-1); 
766     }
767     /* Result parked in R1, return via info-pointer at TOS */
768     jump %ENTRY_CODE(Sp(0));
769 }
770
771 integer2Intzh_fast
772 {
773     /* R1 = s; R2 = d */
774     W_ r, s;
775
776     s = R1;
777     if (s == 0) {
778         r = 0;
779     } else {
780         r = W_[R2 + SIZEOF_StgArrWords];
781         if (%lt(s,0)) {
782             r = -r;
783         }
784     }
785     /* Result parked in R1, return via info-pointer at TOS */
786     R1 = r;
787     jump %ENTRY_CODE(Sp(0));
788 }
789
790 integer2Wordzh_fast
791 {
792   /* R1 = s; R2 = d */
793   W_ r, s;
794
795   s = R1;
796   if (s == 0) {
797     r = 0;
798   } else {
799     r = W_[R2 + SIZEOF_StgArrWords];
800     if (%lt(s,0)) {
801         r = -r;
802     }
803   }
804   /* Result parked in R1, return via info-pointer at TOS */
805   R1 = r;
806   jump %ENTRY_CODE(Sp(0));
807 }
808
809 section "bss" {
810   exponent:  W_;
811 }
812
813 decodeFloatzh_fast
814
815     W_ p;
816     F_ arg;
817     
818     /* arguments: F1 = Float# */
819     arg = F1;
820     
821     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
822     
823     /* Be prepared to tell Lennart-coded __decodeFloat
824        where mantissa._mp_d can be put (it does not care about the rest) */
825     p = Hp - SIZEOF_StgArrWords;
826     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
827     StgArrWords_words(p) = 1;
828     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
829     
830     /* Perform the operation */
831     foreign "C" __decodeFloat(mp_tmp1,exponent,arg);
832     
833     /* returns: (Int# (expn), Int#, ByteArray#) */
834     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
835 }
836
837 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
838 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
839
840 decodeDoublezh_fast
841
842     D_ arg;
843     W_ p;
844
845     /* arguments: D1 = Double# */
846     arg = D1;
847
848     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
849     
850     /* Be prepared to tell Lennart-coded __decodeDouble
851        where mantissa.d can be put (it does not care about the rest) */
852     p = Hp - ARR_SIZE + WDS(1);
853     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
854     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
855     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
856
857     /* Perform the operation */
858     foreign "C" __decodeDouble(mp_tmp1,exponent,arg);
859     
860     /* returns: (Int# (expn), Int#, ByteArray#) */
861     RET_NNP(W_[exponent], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
862 }
863
864 /* -----------------------------------------------------------------------------
865  * Concurrency primitives
866  * -------------------------------------------------------------------------- */
867
868 forkzh_fast
869 {
870   /* args: R1 = closure to spark */
871   
872   MAYBE_GC(R1_PTR, forkzh_fast);
873
874   // create it right now, return ThreadID in R1
875   "ptr" R1 = foreign "C" createIOThread( RtsFlags_GcFlags_initialStkSize(RtsFlags), 
876                                    R1 "ptr");
877   foreign "C" scheduleThread(R1 "ptr");
878
879   // switch at the earliest opportunity
880   CInt[context_switch] = 1 :: CInt;
881   
882   RET_P(R1);
883 }
884
885 yieldzh_fast
886 {
887   jump stg_yield_noregs;
888 }
889
890 myThreadIdzh_fast
891 {
892   /* no args. */
893   RET_P(CurrentTSO);
894 }
895
896 labelThreadzh_fast
897 {
898   /* args: 
899         R1 = ThreadId#
900         R2 = Addr# */
901 #ifdef DEBUG
902   foreign "C" labelThread(R1 "ptr", R2 "ptr");
903 #endif
904   jump %ENTRY_CODE(Sp(0));
905 }
906
907 isCurrentThreadBoundzh_fast
908 {
909   /* no args */
910   W_ r;
911   r = foreign "C" isThreadBound(CurrentTSO);
912   RET_N(r);
913 }
914
915
916 /* -----------------------------------------------------------------------------
917  * TVar primitives
918  * -------------------------------------------------------------------------- */
919
920 #ifdef REG_R1
921 #define SP_OFF 0
922 #define IF_NOT_REG_R1(x) 
923 #else
924 #define SP_OFF 1
925 #define IF_NOT_REG_R1(x) x
926 #endif
927
928 // Catch retry frame ------------------------------------------------------------
929
930 #define CATCH_RETRY_FRAME_ERROR(label) \
931   label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
932
933 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
934 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
935 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
936 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
937 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
938 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
939 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
940 CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
941
942 #if MAX_VECTORED_RTN > 8
943 #error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
944 #endif
945
946 #if defined(PROFILING)
947 #define CATCH_RETRY_FRAME_BITMAP 7
948 #define CATCH_RETRY_FRAME_WORDS  6
949 #else
950 #define CATCH_RETRY_FRAME_BITMAP 1
951 #define CATCH_RETRY_FRAME_WORDS  4
952 #endif
953
954 INFO_TABLE_RET(stg_catch_retry_frame,
955                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
956                CATCH_RETRY_FRAME,
957                stg_catch_retry_frame_0_ret,
958                stg_catch_retry_frame_1_ret,
959                stg_catch_retry_frame_2_ret,
960                stg_catch_retry_frame_3_ret,
961                stg_catch_retry_frame_4_ret,
962                stg_catch_retry_frame_5_ret,
963                stg_catch_retry_frame_6_ret,
964                stg_catch_retry_frame_7_ret)
965 {
966    W_ r, frame, trec, outer;
967    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
968
969    frame = Sp;
970    trec = StgTSO_trec(CurrentTSO);
971    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
972    r = foreign "C" stmCommitTransaction(trec "ptr");
973    if (r) {
974      /* Succeeded (either first branch or second branch) */
975      StgTSO_trec(CurrentTSO) = outer;
976      Sp = Sp + SIZEOF_StgCatchRetryFrame;
977      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
978      jump %ENTRY_CODE(Sp(SP_OFF));
979    } else {
980      /* Did not commit: retry */
981      W_ new_trec;
982      "ptr" new_trec = foreign "C" stmStartTransaction(outer "ptr");
983      StgTSO_trec(CurrentTSO) = new_trec;
984      if (StgCatchRetryFrame_running_alt_code(frame)) {
985        R1 = StgCatchRetryFrame_alt_code(frame);
986      } else {
987        R1 = StgCatchRetryFrame_first_code(frame);
988        StgCatchRetryFrame_first_code_trec(frame) = new_trec;
989      }
990      Sp_adj(-1);
991      jump RET_LBL(stg_ap_v);
992    }
993 }
994
995
996 // Atomically frame -------------------------------------------------------------
997
998
999 #define ATOMICALLY_FRAME_ERROR(label) \
1000   label { foreign "C" barf("atomically_frame incorrectly entered!"); }
1001
1002 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
1003 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
1004 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
1005 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
1006 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
1007 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
1008 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
1009 ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
1010
1011 #if MAX_VECTORED_RTN > 8
1012 #error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
1013 #endif
1014
1015 #if defined(PROFILING)
1016 #define ATOMICALLY_FRAME_BITMAP 7
1017 #define ATOMICALLY_FRAME_WORDS  4
1018 #else
1019 #define ATOMICALLY_FRAME_BITMAP 1
1020 #define ATOMICALLY_FRAME_WORDS  2
1021 #endif
1022
1023
1024 INFO_TABLE_RET(stg_atomically_frame,
1025                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1026                ATOMICALLY_FRAME,
1027                stg_atomically_frame_0_ret,
1028                stg_atomically_frame_1_ret,
1029                stg_atomically_frame_2_ret,
1030                stg_atomically_frame_3_ret,
1031                stg_atomically_frame_4_ret,
1032                stg_atomically_frame_5_ret,
1033                stg_atomically_frame_6_ret,
1034                stg_atomically_frame_7_ret)
1035 {
1036    W_ frame, trec, valid;
1037    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1038
1039    frame = Sp;
1040    trec = StgTSO_trec(CurrentTSO);
1041    if (StgAtomicallyFrame_waiting(frame)) {
1042      /* The TSO is currently waiting: should we stop waiting? */
1043      valid = foreign "C" stmReWait(CurrentTSO "ptr");
1044      if (valid) {
1045        /* Previous attempt is still valid: no point trying again yet */
1046           IF_NOT_REG_R1(Sp_adj(-2);
1047                         Sp(1) = stg_NO_FINALIZER_closure;
1048                         Sp(0) = stg_ut_1_0_unreg_info;)
1049        jump stg_block_noregs;
1050      } else {
1051        /* Previous attempt is no longer valid: try again */
1052        "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");
1053        StgTSO_trec(CurrentTSO) = trec;
1054        StgAtomicallyFrame_waiting(frame) = 0 :: CInt; /* false; */
1055        R1 = StgAtomicallyFrame_code(frame);
1056        Sp_adj(-1);
1057        jump RET_LBL(stg_ap_v);
1058      }
1059    } else {
1060      /* The TSO is not currently waiting: try to commit the transaction */
1061      valid = foreign "C" stmCommitTransaction(trec "ptr");
1062      if (valid) {
1063        /* Transaction was valid: commit succeeded */
1064        StgTSO_trec(CurrentTSO) = NO_TREC;
1065        Sp = Sp + SIZEOF_StgAtomicallyFrame;
1066        IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1067        jump %ENTRY_CODE(Sp(SP_OFF));
1068      } else {
1069        /* Transaction was not valid: try again */
1070        "ptr" trec = foreign "C" stmStartTransaction(NO_TREC "ptr");
1071        StgTSO_trec(CurrentTSO) = trec;
1072        R1 = StgAtomicallyFrame_code(frame);
1073        Sp_adj(-1);
1074        jump RET_LBL(stg_ap_v);
1075      }
1076    }
1077 }
1078
1079
1080 // STM catch frame --------------------------------------------------------------
1081
1082 #define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret)          \
1083    label                                                   \
1084    {                                                       \
1085       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )  \
1086       Sp = Sp + SIZEOF_StgCatchSTMFrame;                   \
1087       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)             \
1088       jump ret;                                            \
1089    }
1090
1091 #ifdef REG_R1
1092 #define SP_OFF 0
1093 #else
1094 #define SP_OFF 1
1095 #endif
1096
1097 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
1098 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
1099 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
1100 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
1101 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
1102 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
1103 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
1104 CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
1105
1106 #if MAX_VECTORED_RTN > 8
1107 #error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
1108 #endif
1109
1110 #if defined(PROFILING)
1111 #define CATCH_STM_FRAME_BITMAP 3
1112 #define CATCH_STM_FRAME_WORDS  3
1113 #else
1114 #define CATCH_STM_FRAME_BITMAP 0
1115 #define CATCH_STM_FRAME_WORDS  1
1116 #endif
1117
1118 /* Catch frames are very similar to update frames, but when entering
1119  * one we just pop the frame off the stack and perform the correct
1120  * kind of return to the activation record underneath us on the stack.
1121  */
1122
1123 INFO_TABLE_RET(stg_catch_stm_frame,
1124                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1125                CATCH_STM_FRAME,
1126                stg_catch_stm_frame_0_ret,
1127                stg_catch_stm_frame_1_ret,
1128                stg_catch_stm_frame_2_ret,
1129                stg_catch_stm_frame_3_ret,
1130                stg_catch_stm_frame_4_ret,
1131                stg_catch_stm_frame_5_ret,
1132                stg_catch_stm_frame_6_ret,
1133                stg_catch_stm_frame_7_ret)
1134 CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
1135
1136
1137 // Primop definition ------------------------------------------------------------
1138
1139 atomicallyzh_fast
1140 {
1141   W_ frame;
1142   W_ old_trec;
1143   W_ new_trec;
1144   
1145   /* Args: R1 = m :: STM a */
1146   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1147
1148   /* Set up the atomically frame */
1149   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1150   frame = Sp;
1151
1152   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1153   StgAtomicallyFrame_waiting(frame) = 0 :: CInt; // False
1154   StgAtomicallyFrame_code(frame) = R1;
1155
1156   /* Start the memory transcation */
1157   old_trec = StgTSO_trec(CurrentTSO);
1158   "ptr" new_trec = foreign "C" stmStartTransaction(old_trec "ptr");
1159   StgTSO_trec(CurrentTSO) = new_trec;
1160
1161   /* Apply R1 to the realworld token */
1162   Sp_adj(-1);
1163   jump RET_LBL(stg_ap_v);
1164 }
1165
1166
1167 catchSTMzh_fast
1168 {
1169   W_ frame;
1170   
1171   /* Args: R1 :: STM a */
1172   /* Args: R2 :: Exception -> STM a */
1173   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1174
1175   /* Set up the catch frame */
1176   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1177   frame = Sp;
1178
1179   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1180   StgCatchSTMFrame_handler(frame) = R2;
1181
1182   /* Apply R1 to the realworld token */
1183   Sp_adj(-1);
1184   jump RET_LBL(stg_ap_v);
1185 }
1186
1187
1188 catchRetryzh_fast
1189 {
1190   W_ frame;
1191   W_ new_trec;
1192   W_ trec;
1193
1194   /* Args: R1 :: STM a */
1195   /* Args: R2 :: STM a */
1196   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1197
1198   /* Start a nested transaction within which to run the first code */
1199   trec = StgTSO_trec(CurrentTSO);
1200   "ptr" new_trec = foreign "C" stmStartTransaction(trec "ptr");
1201   StgTSO_trec(CurrentTSO) = new_trec;
1202
1203   /* Set up the catch-retry frame */
1204   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1205   frame = Sp;
1206   
1207   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1208   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1209   StgCatchRetryFrame_first_code(frame) = R1;
1210   StgCatchRetryFrame_alt_code(frame) = R2;
1211   StgCatchRetryFrame_first_code_trec(frame) = new_trec;
1212
1213   /* Apply R1 to the realworld token */
1214   Sp_adj(-1);
1215   jump RET_LBL(stg_ap_v);  
1216 }
1217
1218
1219 retryzh_fast
1220 {
1221   W_ frame_type;
1222   W_ frame;
1223   W_ trec;
1224   W_ outer;
1225   W_ r;
1226
1227   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1228
1229   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1230 retry_pop_stack:
1231   trec = StgTSO_trec(CurrentTSO);
1232   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr");
1233   StgTSO_sp(CurrentTSO) = Sp;
1234   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr");
1235   Sp = StgTSO_sp(CurrentTSO);
1236   frame = Sp;
1237
1238   if (frame_type == CATCH_RETRY_FRAME) {
1239     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1240     ASSERT(outer != NO_TREC);
1241     if (!StgCatchRetryFrame_running_alt_code(frame)) {
1242       // Retry in the first code: try the alternative
1243       "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1244       StgTSO_trec(CurrentTSO) = trec;
1245       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1246       R1 = StgCatchRetryFrame_alt_code(frame);
1247       Sp_adj(-1);
1248       jump RET_LBL(stg_ap_v);
1249     } else {
1250       // Retry in the alternative code: propagate
1251       W_ other_trec;
1252       other_trec = StgCatchRetryFrame_first_code_trec(frame);
1253       r = foreign "C" stmMergeForWaiting(trec "ptr", other_trec "ptr");
1254       if (r) {
1255         r = foreign "C" stmCommitTransaction(trec "ptr");
1256       }
1257       if (r) {
1258         // Merge between siblings succeeded: commit it back to enclosing transaction
1259         // and then propagate the retry
1260         StgTSO_trec(CurrentTSO) = outer;
1261         Sp = Sp + SIZEOF_StgCatchRetryFrame;
1262         goto retry_pop_stack;
1263       } else {
1264         // Merge failed: we musn't propagate the retry.  Try both paths again.
1265         "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1266         StgCatchRetryFrame_first_code_trec(frame) = trec;
1267         StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1268         StgTSO_trec(CurrentTSO) = trec;
1269         R1 = StgCatchRetryFrame_first_code(frame);
1270         Sp_adj(-1);
1271         jump RET_LBL(stg_ap_v);
1272       }
1273     }
1274   }
1275
1276   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1277   ASSERT(frame_type == ATOMICALLY_FRAME);
1278   ASSERT(outer == NO_TREC);
1279   r = foreign "C" stmWait(CurrentTSO "ptr", trec "ptr");
1280   if (r) {
1281     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1282     StgAtomicallyFrame_waiting(frame) = 1 :: CInt; // true
1283     Sp = frame;
1284     // Fix up the stack in the unregisterised case: the return convention is different.
1285     IF_NOT_REG_R1(Sp_adj(-2); 
1286                   Sp(1) = stg_NO_FINALIZER_closure;
1287                   Sp(0) = stg_ut_1_0_unreg_info;)
1288     jump stg_block_noregs;
1289   } else {
1290     // Transaction was not valid: retry immediately
1291     "ptr" trec = foreign "C" stmStartTransaction(outer "ptr");
1292     StgTSO_trec(CurrentTSO) = trec;
1293     R1 = StgAtomicallyFrame_code(frame);
1294     Sp = frame;
1295     Sp_adj(-1);
1296     jump RET_LBL(stg_ap_v);
1297   }
1298 }
1299
1300
1301 newTVarzh_fast
1302 {
1303   W_ tv;
1304
1305   /* Args: R1 = initialisation value */
1306
1307   ALLOC_PRIM( SIZEOF_StgTVar, R1_PTR, newTVarzh_fast);
1308   tv = Hp - SIZEOF_StgTVar + WDS(1);
1309   SET_HDR(tv,stg_TVAR_info,W_[CCCS]);
1310   StgTVar_current_value(tv) = R1;
1311   StgTVar_first_wait_queue_entry(tv) = stg_END_STM_WAIT_QUEUE_closure;
1312     
1313   RET_P(tv);
1314 }
1315
1316
1317 readTVarzh_fast
1318 {
1319   W_ trec;
1320   W_ tvar;
1321   W_ result;
1322
1323   /* Args: R1 = TVar closure */
1324
1325   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1326   trec = StgTSO_trec(CurrentTSO);
1327   tvar = R1;
1328   "ptr" result = foreign "C" stmReadTVar(trec "ptr", tvar "ptr");
1329
1330   RET_P(result);
1331 }
1332
1333
1334 writeTVarzh_fast
1335 {
1336   W_ trec;
1337   W_ tvar;
1338   W_ new_value;
1339   
1340   /* Args: R1 = TVar closure */
1341   /*       R2 = New value    */
1342
1343   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1344   trec = StgTSO_trec(CurrentTSO);
1345   tvar = R1;
1346   new_value = R2;
1347   foreign "C" stmWriteTVar(trec "ptr", tvar "ptr", new_value "ptr");
1348
1349   jump %ENTRY_CODE(Sp(0));
1350 }
1351
1352
1353 /* -----------------------------------------------------------------------------
1354  * MVar primitives
1355  *
1356  * take & putMVar work as follows.  Firstly, an important invariant:
1357  *
1358  *    If the MVar is full, then the blocking queue contains only
1359  *    threads blocked on putMVar, and if the MVar is empty then the
1360  *    blocking queue contains only threads blocked on takeMVar.
1361  *
1362  * takeMvar:
1363  *    MVar empty : then add ourselves to the blocking queue
1364  *    MVar full  : remove the value from the MVar, and
1365  *                 blocking queue empty     : return
1366  *                 blocking queue non-empty : perform the first blocked putMVar
1367  *                                            from the queue, and wake up the
1368  *                                            thread (MVar is now full again)
1369  *
1370  * putMVar is just the dual of the above algorithm.
1371  *
1372  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1373  * the stack of the thread waiting to do the putMVar.  See
1374  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1375  * the stack layout, and the PerformPut and PerformTake macros below.
1376  *
1377  * It is important that a blocked take or put is woken up with the
1378  * take/put already performed, because otherwise there would be a
1379  * small window of vulnerability where the thread could receive an
1380  * exception and never perform its take or put, and we'd end up with a
1381  * deadlock.
1382  *
1383  * -------------------------------------------------------------------------- */
1384
1385 isEmptyMVarzh_fast
1386 {
1387     /* args: R1 = MVar closure */
1388
1389     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1390         RET_N(1);
1391     } else {
1392         RET_N(0);
1393     }
1394 }
1395
1396 newMVarzh_fast
1397 {
1398     /* args: none */
1399     W_ mvar;
1400
1401     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1402   
1403     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1404     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1405     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1406     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1407     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1408     RET_P(mvar);
1409 }
1410
1411
1412 /* If R1 isn't available, pass it on the stack */
1413 #ifdef REG_R1
1414 #define PerformTake(tso, value)                         \
1415     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1416     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1417 #else
1418 #define PerformTake(tso, value)                                 \
1419     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1420     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1421 #endif
1422
1423 #define PerformPut(tso,lval)                    \
1424     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1425     lval = W_[StgTSO_sp(tso) - WDS(1)];
1426
1427
1428 takeMVarzh_fast
1429 {
1430     W_ mvar, val, info, tso;
1431
1432 #if defined(SMP)
1433     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
1434 #endif
1435
1436     /* args: R1 = MVar closure */
1437     mvar = R1;
1438
1439     info = GET_INFO(mvar);
1440
1441     /* If the MVar is empty, put ourselves on its blocking queue,
1442      * and wait until we're woken up.
1443      */
1444     if (info == stg_EMPTY_MVAR_info) {
1445         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1446             StgMVar_head(mvar) = CurrentTSO;
1447         } else {
1448             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1449         }
1450         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1451         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1452         StgTSO_block_info(CurrentTSO)  = mvar;
1453         StgMVar_tail(mvar) = CurrentTSO;
1454         
1455 #if defined(SMP)
1456         foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1457 #endif
1458
1459         jump stg_block_takemvar;
1460   }
1461
1462   /* we got the value... */
1463   val = StgMVar_value(mvar);
1464
1465   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1466   {
1467       /* There are putMVar(s) waiting... 
1468        * wake up the first thread on the queue
1469        */
1470       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1471
1472       /* actually perform the putMVar for the thread that we just woke up */
1473       tso = StgMVar_head(mvar);
1474       PerformPut(tso,StgMVar_value(mvar));
1475
1476 #if defined(GRAN) || defined(PAR)
1477       /* ToDo: check 2nd arg (mvar) is right */
1478       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar);
1479       StgMVar_head(mvar) = tso;
1480 #else
1481       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1482       StgMVar_head(mvar) = tso;
1483 #endif
1484       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1485           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1486       }
1487
1488 #if defined(SMP)
1489       foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1490 #endif
1491
1492       RET_P(val);
1493   } 
1494   else
1495   {
1496       /* No further putMVars, MVar is now empty */
1497       
1498       /* do this last... we might have locked the MVar in the SMP case,
1499        * and writing the info pointer will unlock it.
1500        */
1501       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1502       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1503
1504 #if defined(SMP)
1505       foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1506 #endif
1507
1508       RET_P(val);
1509   }
1510 }
1511
1512
1513 tryTakeMVarzh_fast
1514 {
1515     W_ mvar, val, info, tso;
1516
1517 #if defined(SMP)
1518     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
1519 #endif
1520
1521     /* args: R1 = MVar closure */
1522
1523     mvar = R1;
1524
1525     info = GET_INFO(mvar);
1526
1527     if (info == stg_EMPTY_MVAR_info) {
1528         /* HACK: we need a pointer to pass back, 
1529          * so we abuse NO_FINALIZER_closure
1530          */
1531 #if defined(SMP)
1532        foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1533 #endif
1534         RET_NP(0, stg_NO_FINALIZER_closure);
1535     }
1536
1537     /* we got the value... */
1538     val = StgMVar_value(mvar);
1539
1540     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1541         /* There are putMVar(s) waiting... 
1542          * wake up the first thread on the queue
1543          */
1544         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1545
1546         /* actually perform the putMVar for the thread that we just woke up */
1547         tso = StgMVar_head(mvar);
1548         PerformPut(tso,StgMVar_value(mvar));
1549
1550 #if defined(GRAN) || defined(PAR)
1551         /* ToDo: check 2nd arg (mvar) is right */
1552         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr");
1553         StgMVar_head(mvar) = tso;
1554 #else
1555         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1556         StgMVar_head(mvar) = tso;
1557 #endif
1558
1559         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1560             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1561         }
1562     }
1563     else 
1564     {
1565         /* No further putMVars, MVar is now empty */
1566         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1567         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1568     }
1569     
1570 #if defined(SMP)
1571     foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1572 #endif
1573
1574     RET_NP(1, val);
1575 }
1576
1577
1578 putMVarzh_fast
1579 {
1580     W_ mvar, info, tso;
1581
1582 #if defined(SMP)
1583     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
1584 #endif
1585
1586     /* args: R1 = MVar, R2 = value */
1587     mvar = R1;
1588
1589     info = GET_INFO(mvar);
1590
1591     if (info == stg_FULL_MVAR_info) {
1592         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1593             StgMVar_head(mvar) = CurrentTSO;
1594         } else {
1595             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1596         }
1597         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1598         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1599         StgTSO_block_info(CurrentTSO)  = mvar;
1600         StgMVar_tail(mvar) = CurrentTSO;
1601         
1602 #if defined(SMP)
1603         foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1604 #endif
1605         jump stg_block_putmvar;
1606     }
1607   
1608     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1609         /* There are takeMVar(s) waiting: wake up the first one
1610          */
1611         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1612
1613         /* actually perform the takeMVar */
1614         tso = StgMVar_head(mvar);
1615         PerformTake(tso, R2);
1616       
1617 #if defined(GRAN) || defined(PAR)
1618         /* ToDo: check 2nd arg (mvar) is right */
1619         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1620         StgMVar_head(mvar) = tso;
1621 #else
1622         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1623         StgMVar_head(mvar) = tso;
1624 #endif
1625
1626         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1627             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1628         }
1629
1630 #if defined(SMP)
1631         foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1632 #endif
1633         jump %ENTRY_CODE(Sp(0));
1634     }
1635     else
1636     {
1637         /* No further takes, the MVar is now full. */
1638         StgMVar_value(mvar) = R2;
1639         /* unlocks the MVar in the SMP case */
1640         SET_INFO(mvar,stg_FULL_MVAR_info);
1641
1642 #if defined(SMP)
1643         foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1644 #endif
1645         jump %ENTRY_CODE(Sp(0));
1646     }
1647     
1648     /* ToDo: yield afterward for better communication performance? */
1649 }
1650
1651
1652 tryPutMVarzh_fast
1653 {
1654     W_ mvar, info, tso;
1655
1656 #if defined(SMP)
1657     foreign "C" ACQUIRE_LOCK(sm_mutex "ptr");
1658 #endif
1659
1660     /* args: R1 = MVar, R2 = value */
1661     mvar = R1;
1662
1663     info = GET_INFO(mvar);
1664
1665     if (info == stg_FULL_MVAR_info) {
1666 #if defined(SMP)
1667         foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1668 #endif
1669         RET_N(0);
1670     }
1671   
1672     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1673         /* There are takeMVar(s) waiting: wake up the first one
1674          */
1675         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1676         
1677         /* actually perform the takeMVar */
1678         tso = StgMVar_head(mvar);
1679         PerformTake(tso, R2);
1680       
1681 #if defined(GRAN) || defined(PAR)
1682         /* ToDo: check 2nd arg (mvar) is right */
1683         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr",mvar "ptr");
1684         StgMVar_head(mvar) = tso;
1685 #else
1686         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr");
1687         StgMVar_head(mvar) = tso;
1688 #endif
1689
1690         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1691             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1692         }
1693
1694 #if defined(SMP)
1695         foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1696 #endif
1697         jump %ENTRY_CODE(Sp(0));
1698     }
1699     else
1700     {
1701         /* No further takes, the MVar is now full. */
1702         StgMVar_value(mvar) = R2;
1703         /* unlocks the MVar in the SMP case */
1704         SET_INFO(mvar,stg_FULL_MVAR_info);
1705 #if defined(SMP)
1706         foreign "C" RELEASE_LOCK(sm_mutex "ptr");
1707 #endif
1708         jump %ENTRY_CODE(Sp(0));
1709     }
1710     
1711     /* ToDo: yield afterward for better communication performance? */
1712 }
1713
1714
1715 /* -----------------------------------------------------------------------------
1716    Stable pointer primitives
1717    -------------------------------------------------------------------------  */
1718
1719 makeStableNamezh_fast
1720 {
1721     W_ index, sn_obj;
1722
1723     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1724   
1725     index = foreign "C" lookupStableName(R1 "ptr");
1726
1727     /* Is there already a StableName for this heap object?
1728      *  stable_ptr_table is a pointer to an array of snEntry structs.
1729      */
1730     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1731         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1732         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1733         StgStableName_sn(sn_obj) = index;
1734         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1735     } else {
1736         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1737     }
1738     
1739     RET_P(sn_obj);
1740 }
1741
1742
1743 makeStablePtrzh_fast
1744 {
1745     /* Args: R1 = a */
1746     W_ sp;
1747     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1748     "ptr" sp = foreign "C" getStablePtr(R1 "ptr");
1749     RET_N(sp);
1750 }
1751
1752 deRefStablePtrzh_fast
1753 {
1754     /* Args: R1 = the stable ptr */
1755     W_ r, sp;
1756     sp = R1;
1757     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1758     RET_P(r);
1759 }
1760
1761 /* -----------------------------------------------------------------------------
1762    Bytecode object primitives
1763    -------------------------------------------------------------------------  */
1764
1765 newBCOzh_fast
1766 {
1767     /* R1 = instrs
1768        R2 = literals
1769        R3 = ptrs
1770        R4 = itbls
1771        R5 = arity
1772        R6 = bitmap array
1773     */
1774     W_ bco, bitmap_arr, bytes, words;
1775     
1776     bitmap_arr = R6;
1777     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1778     bytes = WDS(words);
1779
1780     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
1781
1782     bco = Hp - bytes + WDS(1);
1783     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1784     
1785     StgBCO_instrs(bco)     = R1;
1786     StgBCO_literals(bco)   = R2;
1787     StgBCO_ptrs(bco)       = R3;
1788     StgBCO_itbls(bco)      = R4;
1789     StgBCO_arity(bco)      = HALF_W_(R5);
1790     StgBCO_size(bco)       = HALF_W_(words);
1791     
1792     // Copy the arity/bitmap info into the BCO
1793     W_ i;
1794     i = 0;
1795 for:
1796     if (i < StgArrWords_words(bitmap_arr)) {
1797         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1798         i = i + 1;
1799         goto for;
1800     }
1801     
1802     RET_P(bco);
1803 }
1804
1805
1806 mkApUpd0zh_fast
1807 {
1808     // R1 = the BCO# for the AP
1809     //  
1810     W_ ap;
1811
1812     // This function is *only* used to wrap zero-arity BCOs in an
1813     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1814     // saturated and always points directly to a FUN or BCO.
1815     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1816            StgBCO_arity(R1) == HALF_W_(0));
1817
1818     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1819     TICK_ALLOC_UP_THK(0, 0);
1820     CCCS_ALLOC(SIZEOF_StgAP);
1821
1822     ap = Hp - SIZEOF_StgAP + WDS(1);
1823     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1824     
1825     StgAP_n_args(ap) = HALF_W_(0);
1826     StgAP_fun(ap) = R1;
1827     
1828     RET_P(ap);
1829 }
1830
1831 /* -----------------------------------------------------------------------------
1832    Thread I/O blocking primitives
1833    -------------------------------------------------------------------------- */
1834
1835 /* Add a thread to the end of the blocked queue. (C-- version of the C
1836  * macro in Schedule.h).
1837  */
1838 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1839     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1840     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1841       W_[blocked_queue_hd] = tso;                       \
1842     } else {                                            \
1843       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1844     }                                                   \
1845     W_[blocked_queue_tl] = tso;
1846
1847 waitReadzh_fast
1848 {
1849     /* args: R1 */
1850 #ifdef THREADED_RTS
1851     foreign "C" barf("waitRead# on threaded RTS");
1852 #endif
1853
1854     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1855     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1856     StgTSO_block_info(CurrentTSO) = R1;
1857     // No locking - we're not going to use this interface in the
1858     // threaded RTS anyway.
1859     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1860     jump stg_block_noregs;
1861 }
1862
1863 waitWritezh_fast
1864 {
1865     /* args: R1 */
1866 #ifdef THREADED_RTS
1867     foreign "C" barf("waitWrite# on threaded RTS");
1868 #endif
1869
1870     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1871     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1872     StgTSO_block_info(CurrentTSO) = R1;
1873     // No locking - we're not going to use this interface in the
1874     // threaded RTS anyway.
1875     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1876     jump stg_block_noregs;
1877 }
1878
1879
1880 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1881 delayzh_fast
1882 {
1883 #ifdef mingw32_HOST_OS
1884     W_ ares;
1885     CInt reqID;
1886 #else
1887     W_ t, prev, target;
1888 #endif
1889
1890 #ifdef THREADED_RTS
1891     foreign "C" barf("delay# on threaded RTS");
1892 #endif
1893
1894     /* args: R1 (microsecond delay amount) */
1895     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1896     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
1897
1898 #ifdef mingw32_HOST_OS
1899
1900     /* could probably allocate this on the heap instead */
1901     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1902                                             stg_delayzh_malloc_str);
1903     reqID = foreign "C" addDelayRequest(R1);
1904     StgAsyncIOResult_reqID(ares)   = reqID;
1905     StgAsyncIOResult_len(ares)     = 0;
1906     StgAsyncIOResult_errCode(ares) = 0;
1907     StgTSO_block_info(CurrentTSO)  = ares;
1908
1909     /* Having all async-blocked threads reside on the blocked_queue
1910      * simplifies matters, so change the status to OnDoProc put the
1911      * delayed thread on the blocked_queue.
1912      */
1913     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
1914     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1915     jump stg_block_async_void;
1916
1917 #else
1918
1919     W_ time;
1920     time = foreign "C" getourtimeofday();
1921     target = (R1 / (TICK_MILLISECS*1000)) + time;
1922     StgTSO_block_info(CurrentTSO) = target;
1923
1924     /* Insert the new thread in the sleeping queue. */
1925     prev = NULL;
1926     t = W_[sleeping_queue];
1927 while:
1928     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
1929         prev = t;
1930         t = StgTSO_link(t);
1931         goto while;
1932     }
1933
1934     StgTSO_link(CurrentTSO) = t;
1935     if (prev == NULL) {
1936         W_[sleeping_queue] = CurrentTSO;
1937     } else {
1938         StgTSO_link(prev) = CurrentTSO;
1939     }
1940     jump stg_block_noregs;
1941 #endif
1942 }
1943
1944
1945 #ifdef mingw32_HOST_OS
1946 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
1947 asyncReadzh_fast
1948 {
1949     W_ ares;
1950     CInt reqID;
1951
1952 #ifdef THREADED_RTS
1953     foreign "C" barf("asyncRead# on threaded RTS");
1954 #endif
1955
1956     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1957     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1958     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1959
1960     /* could probably allocate this on the heap instead */
1961     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1962                                             stg_asyncReadzh_malloc_str);
1963     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr");
1964     StgAsyncIOResult_reqID(ares)   = reqID;
1965     StgAsyncIOResult_len(ares)     = 0;
1966     StgAsyncIOResult_errCode(ares) = 0;
1967     StgTSO_block_info(CurrentTSO)  = ares;
1968     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1969     jump stg_block_async;
1970 }
1971
1972 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
1973 asyncWritezh_fast
1974 {
1975     W_ ares;
1976     CInt reqID;
1977
1978 #ifdef THREADED_RTS
1979     foreign "C" barf("asyncWrite# on threaded RTS");
1980 #endif
1981
1982     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
1983     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1984     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1985
1986     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
1987                                             stg_asyncWritezh_malloc_str);
1988     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr");
1989
1990     StgAsyncIOResult_reqID(ares)   = reqID;
1991     StgAsyncIOResult_len(ares)     = 0;
1992     StgAsyncIOResult_errCode(ares) = 0;
1993     StgTSO_block_info(CurrentTSO)  = ares;
1994     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1995     jump stg_block_async;
1996 }
1997
1998 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
1999 asyncDoProczh_fast
2000 {
2001     W_ ares;
2002     CInt reqID;
2003
2004     /* args: R1 = proc, R2 = param */
2005     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2006     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2007
2008     /* could probably allocate this on the heap instead */
2009     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2010                                             stg_asyncDoProczh_malloc_str);
2011     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr");
2012     StgAsyncIOResult_reqID(ares)   = reqID;
2013     StgAsyncIOResult_len(ares)     = 0;
2014     StgAsyncIOResult_errCode(ares) = 0;
2015     StgTSO_block_info(CurrentTSO) = ares;
2016     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2017     jump stg_block_async;
2018 }
2019 #endif
2020
2021 /* -----------------------------------------------------------------------------
2022   ** temporary **
2023
2024    classes CCallable and CReturnable don't really exist, but the
2025    compiler insists on generating dictionaries containing references
2026    to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
2027    for these.  Some C compilers can't cope with zero-length static arrays,
2028    so we have to make these one element long.
2029   --------------------------------------------------------------------------- */
2030
2031 section "rodata" {
2032   GHC_ZCCCallable_static_info:   W_ 0;
2033 }
2034
2035 section "rodata" {
2036   GHC_ZCCReturnable_static_info: W_ 0;
2037 }